diff --git a/submodules/ImageCompression/Sources/ImageCompression.swift b/submodules/ImageCompression/Sources/ImageCompression.swift index 7fe7b191d5..ee84db8bb3 100644 --- a/submodules/ImageCompression/Sources/ImageCompression.swift +++ b/submodules/ImageCompression/Sources/ImageCompression.swift @@ -44,6 +44,14 @@ public func compressImageToJPEG(_ image: UIImage, quality: Float) -> Data? { return data as Data } +public func compressImageToJPEGXL(_ image: UIImage, quality: Int) -> Data? { + return compressJPEGXLData(image, Int32(quality)) +} + +public func decompressImageFromJPEGXL(data: Data) -> UIImage? { + return decompressJPEGXLData(data) +} + @available(iOSApplicationExtension 11.0, iOS 11.0, *) public func compressImage(_ image: UIImage, quality: Float) -> Data? { let data = NSMutableData() diff --git a/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift b/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift index 73ba5c27cc..9a698f5f21 100644 --- a/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift +++ b/submodules/LegacyMediaPickerUI/Sources/LegacyMediaPickers.swift @@ -495,50 +495,128 @@ public func legacyAssetPickerEnqueueMessages(context: AccountContext, account: A } } case let .asset(asset): - var randomId: Int64 = 0 - arc4random_buf(&randomId, 8) - let size = CGSize(width: CGFloat(asset.pixelWidth), height: CGFloat(asset.pixelHeight)) - let scaledSize = size.aspectFittedOrSmaller(CGSize(width: 1280.0, height: 1280.0)) - let resource = PhotoLibraryMediaResource(localIdentifier: asset.localIdentifier, uniqueId: Int64.random(in: Int64.min ... Int64.max)) - representations.append(TelegramMediaImageRepresentation(dimensions: PixelDimensions(scaledSize), resource: resource, progressiveSizes: [], immediateThumbnailData: nil, hasVideo: false, isPersonal: false)) - - let media = TelegramMediaImage(imageId: MediaId(namespace: Namespaces.Media.LocalImage, id: randomId), representations: representations, immediateThumbnailData: nil, reference: nil, partialReference: nil, flags: []) - var attributes: [MessageAttribute] = [] - if let timer = item.timer, timer > 0 && timer <= 60 { - attributes.append(AutoremoveTimeoutMessageAttribute(timeout: Int32(timer), countdownBeginTime: nil)) - } - if let spoiler = item.spoiler, spoiler { - attributes.append(MediaSpoilerMessageAttribute()) - } - - let text = trimChatInputText(convertMarkdownToAttributes(caption ?? NSAttributedString())) - let entities = generateTextEntities(text.string, enabledTypes: .all, currentEntities: generateChatInputTextEntities(text)) - if !entities.isEmpty { - attributes.append(TextEntitiesMessageAttribute(entities: entities)) - } - - var bubbleUpEmojiOrStickersetsById: [Int64: ItemCollectionId] = [:] - text.enumerateAttribute(ChatTextInputAttributes.customEmoji, in: NSRange(location: 0, length: text.length), using: { value, _, _ in - if let value = value as? ChatTextInputTextCustomEmojiAttribute { - if let file = value.file { - if let packId = value.interactivelySelectedFromPackId { - bubbleUpEmojiOrStickersetsById[file.fileId.id] = packId + if context.sharedContext.immediateExperimentalUISettings.storiesJpegExperiment { + let sizes: [Int32] = [2048, 1280] + let formats: [MediaImageFormat] = [.jxl, .jpeg] + let qualities: [Int32: [Int32]] = [ + MediaImageFormat.jxl.rawValue: [ + 50, + 75 + ], + MediaImageFormat.jpeg.rawValue: [ + 75 + ] + ] + for sizeSide in sizes { + for format in formats { + for quality in qualities[format.rawValue]! { + var randomId: Int64 = 0 + arc4random_buf(&randomId, 8) + let resource = PhotoLibraryMediaResource( + localIdentifier: asset.localIdentifier, + uniqueId: Int64.random(in: Int64.min ... Int64.max), + width: sizeSide, + height: sizeSide, + format: format, + quality: quality + ) + + let size = CGSize(width: CGFloat(asset.pixelWidth), height: CGFloat(asset.pixelHeight)) + let scaledSize = size.aspectFittedOrSmaller(CGSize(width: CGFloat(sizeSide), height: CGFloat(sizeSide))) + + let media: Media + media = TelegramMediaFile(fileId: MediaId(namespace: Namespaces.Media.LocalFile, id: Int64.random(in: Int64.min ... Int64.max)), partialReference: nil, resource: resource, previewRepresentations: [], videoThumbnails: [], immediateThumbnailData: nil, mimeType: format == .jxl ? "image/jxl" : "image/jpeg", size: nil, attributes: [ + .FileName(fileName: format == .jxl ? "image\(sizeSide)-q\(quality).jxl" : "image\(sizeSide)-q\(quality).jpg"), + .ImageSize(size: PixelDimensions(scaledSize)) + ]) + + var attributes: [MessageAttribute] = [] + if let timer = item.timer, timer > 0 && timer <= 60 { + attributes.append(AutoremoveTimeoutMessageAttribute(timeout: Int32(timer), countdownBeginTime: nil)) + } + if let spoiler = item.spoiler, spoiler { + attributes.append(MediaSpoilerMessageAttribute()) + } + + let text = trimChatInputText(convertMarkdownToAttributes(caption ?? NSAttributedString())) + let entities = generateTextEntities(text.string, enabledTypes: .all, currentEntities: generateChatInputTextEntities(text)) + if !entities.isEmpty { + attributes.append(TextEntitiesMessageAttribute(entities: entities)) + } + + var bubbleUpEmojiOrStickersetsById: [Int64: ItemCollectionId] = [:] + text.enumerateAttribute(ChatTextInputAttributes.customEmoji, in: NSRange(location: 0, length: text.length), using: { value, _, _ in + if let value = value as? ChatTextInputTextCustomEmojiAttribute { + if let file = value.file { + if let packId = value.interactivelySelectedFromPackId { + bubbleUpEmojiOrStickersetsById[file.fileId.id] = packId + } + } + } + }) + var bubbleUpEmojiOrStickersets: [ItemCollectionId] = [] + for entity in entities { + if case let .CustomEmoji(_, fileId) = entity.type { + if let packId = bubbleUpEmojiOrStickersetsById[fileId] { + if !bubbleUpEmojiOrStickersets.contains(packId) { + bubbleUpEmojiOrStickersets.append(packId) + } + } + } + } + + messages.append(LegacyAssetPickerEnqueueMessage(message: .message(text: text.string, attributes: attributes, inlineStickers: [:], mediaReference: .standalone(media: media), replyToMessageId: nil, replyToStoryId: nil, localGroupingKey: item.groupedId, correlationId: nil, bubbleUpEmojiOrStickersets: bubbleUpEmojiOrStickersets), uniqueId: item.uniqueId, isFile: false)) } } } - }) - var bubbleUpEmojiOrStickersets: [ItemCollectionId] = [] - for entity in entities { - if case let .CustomEmoji(_, fileId) = entity.type { - if let packId = bubbleUpEmojiOrStickersetsById[fileId] { - if !bubbleUpEmojiOrStickersets.contains(packId) { - bubbleUpEmojiOrStickersets.append(packId) + } else { + var randomId: Int64 = 0 + arc4random_buf(&randomId, 8) + let size = CGSize(width: CGFloat(asset.pixelWidth), height: CGFloat(asset.pixelHeight)) + let scaledSize = size.aspectFittedOrSmaller(CGSize(width: 1280.0, height: 1280.0)) + let resource = PhotoLibraryMediaResource(localIdentifier: asset.localIdentifier, uniqueId: Int64.random(in: Int64.min ... Int64.max)) + + let media: Media + representations.append(TelegramMediaImageRepresentation(dimensions: PixelDimensions(scaledSize), resource: resource, progressiveSizes: [], immediateThumbnailData: nil, hasVideo: false, isPersonal: false)) + media = TelegramMediaImage(imageId: MediaId(namespace: Namespaces.Media.LocalImage, id: randomId), representations: representations, immediateThumbnailData: nil, reference: nil, partialReference: nil, flags: []) + + var attributes: [MessageAttribute] = [] + if let timer = item.timer, timer > 0 && timer <= 60 { + attributes.append(AutoremoveTimeoutMessageAttribute(timeout: Int32(timer), countdownBeginTime: nil)) + } + if let spoiler = item.spoiler, spoiler { + attributes.append(MediaSpoilerMessageAttribute()) + } + + let text = trimChatInputText(convertMarkdownToAttributes(caption ?? NSAttributedString())) + let entities = generateTextEntities(text.string, enabledTypes: .all, currentEntities: generateChatInputTextEntities(text)) + if !entities.isEmpty { + attributes.append(TextEntitiesMessageAttribute(entities: entities)) + } + + var bubbleUpEmojiOrStickersetsById: [Int64: ItemCollectionId] = [:] + text.enumerateAttribute(ChatTextInputAttributes.customEmoji, in: NSRange(location: 0, length: text.length), using: { value, _, _ in + if let value = value as? ChatTextInputTextCustomEmojiAttribute { + if let file = value.file { + if let packId = value.interactivelySelectedFromPackId { + bubbleUpEmojiOrStickersetsById[file.fileId.id] = packId + } + } + } + }) + var bubbleUpEmojiOrStickersets: [ItemCollectionId] = [] + for entity in entities { + if case let .CustomEmoji(_, fileId) = entity.type { + if let packId = bubbleUpEmojiOrStickersetsById[fileId] { + if !bubbleUpEmojiOrStickersets.contains(packId) { + bubbleUpEmojiOrStickersets.append(packId) + } } } } + + messages.append(LegacyAssetPickerEnqueueMessage(message: .message(text: text.string, attributes: attributes, inlineStickers: [:], mediaReference: .standalone(media: media), replyToMessageId: nil, replyToStoryId: nil, localGroupingKey: item.groupedId, correlationId: nil, bubbleUpEmojiOrStickersets: bubbleUpEmojiOrStickersets), uniqueId: item.uniqueId, isFile: false)) } - - messages.append(LegacyAssetPickerEnqueueMessage(message: .message(text: text.string, attributes: attributes, inlineStickers: [:], mediaReference: .standalone(media: media), replyToMessageId: nil, replyToStoryId: nil, localGroupingKey: item.groupedId, correlationId: nil, bubbleUpEmojiOrStickersets: bubbleUpEmojiOrStickersets), uniqueId: item.uniqueId, isFile: false)) case .tempFile: break } diff --git a/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift b/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift index f2b7aa5631..88c2bcca61 100644 --- a/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift +++ b/submodules/LocalMediaResources/Sources/FetchPhotoLibraryImageResource.swift @@ -84,7 +84,7 @@ extension UIImage.Orientation { private let fetchPhotoWorkers = ThreadPool(threadCount: 3, threadPriority: 0.2) -public func fetchPhotoLibraryResource(localIdentifier: String) -> Signal { +public func fetchPhotoLibraryResource(localIdentifier: String, width: Int32?, height: Int32?, format: MediaImageFormat?, quality: Int32?) -> Signal { return Signal { subscriber in let queue = ThreadPoolQueue(threadPool: fetchPhotoWorkers) @@ -97,7 +97,12 @@ public func fetchPhotoLibraryResource(localIdentifier: String) -> Signal Signal Bool { if let to = to as? PhotoLibraryMediaResource { - return self.localIdentifier == to.localIdentifier && self.uniqueId == to.uniqueId + if self.localIdentifier != to.localIdentifier { + return false + } + if self.uniqueId != to.uniqueId { + return false + } + if self.width != to.width { + return false + } + if self.height != to.height { + return false + } + if self.format != to.format { + return false + } + if self.quality != to.quality { + return false + } + return true } else { return false } diff --git a/submodules/MozjpegBinding/BUILD b/submodules/MozjpegBinding/BUILD index 55ef3065ee..15083f04d7 100644 --- a/submodules/MozjpegBinding/BUILD +++ b/submodules/MozjpegBinding/BUILD @@ -5,6 +5,7 @@ objc_library( enable_modules = True, srcs = glob([ "Sources/**/*.m", + "Sources/**/*.mm", "Sources/**/*.h", ]), hdrs = glob([ @@ -15,6 +16,7 @@ objc_library( ], deps = [ "//third-party/mozjpeg:mozjpeg", + "//third-party/libjxl:jxl", ], visibility = [ "//visibility:public", diff --git a/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h b/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h index 894c5238e5..27ec64c926 100644 --- a/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h +++ b/submodules/MozjpegBinding/Public/MozjpegBinding/MozjpegBinding.h @@ -1,6 +1,17 @@ #import +#ifdef __cplusplus +extern "C" { +#endif + NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage); NSArray * _Nonnull extractJPEGDataScans(NSData * _Nonnull data); NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size); UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData); + +NSData * _Nullable compressJPEGXLData(UIImage * _Nonnull sourceImage, int quality); +UIImage * _Nullable decompressJPEGXLData(NSData * _Nonnull data); + +#ifdef __cplusplus +} +#endif diff --git a/submodules/MozjpegBinding/Sources/MozjpegBinding.m b/submodules/MozjpegBinding/Sources/MozjpegBinding.m deleted file mode 100644 index a71a5f5c87..0000000000 --- a/submodules/MozjpegBinding/Sources/MozjpegBinding.m +++ /dev/null @@ -1,335 +0,0 @@ -#import - -#import -#import -#import - -static NSData *getHeaderPattern() { - static NSData *value = nil; - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - value = [[NSData alloc] initWithBase64EncodedString:@"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDACgcHiMeGSgjISMtKygwPGRBPDc3PHtYXUlkkYCZlo+AjIqgtObDoKrarYqMyP/L2u71////m8H////6/+b9//j/2wBDASstLTw1PHZBQXb4pYyl+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj/wAARCAAAAAADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwA=" options:0]; - }); - return value; -} - -static NSData *getFooterPattern() { - static NSData *value = nil; - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - value = [[NSData alloc] initWithBase64EncodedString:@"/9k=" options:0]; - }); - return value; -} - -NSArray * _Nonnull extractJPEGDataScans(NSData * _Nonnull data) { - NSMutableArray *result = [[NSMutableArray alloc] init]; - - const uint8_t *dataBytes = data.bytes; - int offset = 0; - while (offset < data.length) { - bool found = false; - for (int i = offset + 2; i < data.length - 1; i++) { - if (dataBytes[i] == 0xffU && dataBytes[i + 1] == 0xdaU) { - if (offset != 0) { - [result addObject:@(i)]; - } - offset = i; - found = true; - } - } - if (!found) { - break; - } - } - -#if DEBUG - static NSString *sessionPrefix = nil; - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - sessionPrefix = [NSString stringWithFormat:@"%u", arc4random()]; - }); - - NSString *randomId = [NSString stringWithFormat:@"%u", arc4random()]; - NSString *dirPath = [[NSTemporaryDirectory() stringByAppendingPathComponent:sessionPrefix] stringByAppendingPathComponent:randomId]; - [[NSFileManager defaultManager] createDirectoryAtPath:dirPath withIntermediateDirectories:true attributes:nil error:nil]; - for (int i = 0; i < result.count + 1; i++) { - NSString *filePath = [dirPath stringByAppendingPathComponent:[NSString stringWithFormat:@"%d.jpg", i]]; - if (i == result.count) { - [data writeToFile:filePath atomically:true]; - } else { - [[data subdataWithRange:NSMakeRange(0, [result[i] intValue])] writeToFile:filePath atomically:true]; - } - } - NSLog(@"Path: %@", dirPath); -#endif - - return result; -} - -NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage) { - int width = (int)(sourceImage.size.width * sourceImage.scale); - int height = (int)(sourceImage.size.height * sourceImage.scale); - - int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); - uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height)); - - CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); - CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; - - CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); - - UIGraphicsPushContext(targetContext); - - CGColorSpaceRelease(colorSpace); - - CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage); - - UIGraphicsPopContext(); - - int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); - uint8_t *buffer = malloc(bufferBytesPerRow * height); - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); - - uint32_t r = ((*color >> 16) & 0xff); - uint32_t g = ((*color >> 8) & 0xff); - uint32_t b = (*color & 0xff); - - buffer[y * bufferBytesPerRow + x * 3 + 0] = r; - buffer[y * bufferBytesPerRow + x * 3 + 1] = g; - buffer[y * bufferBytesPerRow + x * 3 + 2] = b; - } - } - - CGContextRelease(targetContext); - - free(targetMemory); - - struct jpeg_compress_struct cinfo; - struct jpeg_error_mgr jerr; - cinfo.err = jpeg_std_error(&jerr); - jpeg_create_compress(&cinfo); - - uint8_t *outBuffer = NULL; - unsigned long outSize = 0; - jpeg_mem_dest(&cinfo, &outBuffer, &outSize); - - cinfo.image_width = (uint32_t)width; - cinfo.image_height = (uint32_t)height; - cinfo.input_components = 3; - cinfo.in_color_space = JCS_RGB; - jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST); - jpeg_set_defaults(&cinfo); - cinfo.arith_code = FALSE; - cinfo.dct_method = JDCT_ISLOW; - cinfo.optimize_coding = TRUE; - jpeg_set_quality(&cinfo, 72, 1); - jpeg_simple_progression(&cinfo); - jpeg_start_compress(&cinfo, 1); - - JSAMPROW rowPointer[1]; - while (cinfo.next_scanline < cinfo.image_height) { - rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow); - jpeg_write_scanlines(&cinfo, rowPointer, 1); - } - - jpeg_finish_compress(&cinfo); - - NSData *result = [[NSData alloc] initWithBytes:outBuffer length:outSize]; - - jpeg_destroy_compress(&cinfo); - - free(buffer); - - return result; -} - -NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size) { - CGSize fittedSize = image.size; - if (fittedSize.width > size.width) { - fittedSize = CGSizeMake(size.width, (int)((fittedSize.height * size.width / MAX(fittedSize.width, 1.0f)))); - } - if (fittedSize.height > size.height) { - fittedSize = CGSizeMake((int)((fittedSize.width * size.height / MAX(fittedSize.height, 1.0f))), size.height); - } - - int width = (int)fittedSize.width; - int height = (int)fittedSize.height; - - int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); - uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height)); - - CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); - CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; - - CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); - - UIGraphicsPushContext(targetContext); - - CGColorSpaceRelease(colorSpace); - - CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), image.CGImage); - - UIGraphicsPopContext(); - - int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); - uint8_t *buffer = malloc(bufferBytesPerRow * height); - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); - - uint32_t r = ((*color >> 16) & 0xff); - uint32_t g = ((*color >> 8) & 0xff); - uint32_t b = (*color & 0xff); - - buffer[y * bufferBytesPerRow + x * 3 + 0] = r; - buffer[y * bufferBytesPerRow + x * 3 + 1] = g; - buffer[y * bufferBytesPerRow + x * 3 + 2] = b; - } - } - - CGContextRelease(targetContext); - - free(targetMemory); - - struct jpeg_compress_struct cinfo; - struct jpeg_error_mgr jerr; - cinfo.err = jpeg_std_error(&jerr); - jpeg_create_compress(&cinfo); - - uint8_t *outBuffer = NULL; - unsigned long outSize = 0; - jpeg_mem_dest(&cinfo, &outBuffer, &outSize); - - cinfo.image_width = (uint32_t)width; - cinfo.image_height = (uint32_t)height; - cinfo.input_components = 3; - cinfo.in_color_space = JCS_RGB; - jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST); - jpeg_set_defaults(&cinfo); - cinfo.arith_code = FALSE; - cinfo.dct_method = JDCT_ISLOW; - cinfo.optimize_coding = FALSE; - jpeg_set_quality(&cinfo, 20, 1); - jpeg_start_compress(&cinfo, 1); - - JSAMPROW rowPointer[1]; - while (cinfo.next_scanline < cinfo.image_height) { - rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow); - jpeg_write_scanlines(&cinfo, rowPointer, 1); - } - - jpeg_finish_compress(&cinfo); - - NSMutableData *serializedData = nil; - - NSData *headerPattern = getHeaderPattern(); - NSData *footerPattern = getFooterPattern(); - if (outBuffer[164] == height && outBuffer[166] == width && headerPattern != nil && footerPattern != nil) { - outBuffer[164] = 0; - outBuffer[166] = 0; - - if (memcmp(headerPattern.bytes, outBuffer, headerPattern.length) == 0) { - if (memcmp(footerPattern.bytes, outBuffer + outSize - footerPattern.length, footerPattern.length) == 0) { - serializedData = [[NSMutableData alloc] init]; - uint8_t version = 1; - [serializedData appendBytes:&version length:1]; - uint8_t outWidth = (uint8_t)width; - uint8_t outHeight = (uint8_t)height; - [serializedData appendBytes:&outHeight length:1]; - [serializedData appendBytes:&outWidth length:1]; - unsigned long contentSize = outSize - headerPattern.length - footerPattern.length; - [serializedData appendBytes:outBuffer + headerPattern.length length:contentSize]; - } - } - } - - jpeg_destroy_compress(&cinfo); - - free(buffer); - - return serializedData; -} - -UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData) { - long unsigned int jpegSize = sourceData.length; - unsigned char *_compressedImage = (unsigned char *)sourceData.bytes; - - int jpegSubsamp, width, height; - - tjhandle _jpegDecompressor = tjInitDecompress(); - - if (tjDecompressHeader2(_jpegDecompressor, _compressedImage, jpegSize, &width, &height, &jpegSubsamp) != 0) { - return nil; - } - - int sourceBytesPerRow = (3 * width + 31) & ~0x1F; - int targetBytesPerRow = (4 * width + 31) & ~0x1F; - - unsigned char *buffer = malloc(sourceBytesPerRow * height); - - tjDecompress2(_jpegDecompressor, _compressedImage, jpegSize, buffer, width, sourceBytesPerRow, height, TJPF_RGB, TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE); - - tjDestroy(_jpegDecompressor); - - vImage_Buffer source; - source.width = width; - source.height = height; - source.rowBytes = sourceBytesPerRow; - source.data = buffer; - - vImage_Buffer target; - target.width = width; - target.height = height; - target.rowBytes = targetBytesPerRow; - - unsigned char *targetBuffer = malloc(targetBytesPerRow * height); - target.data = targetBuffer; - - vImageConvert_RGB888toARGB8888(&source, nil, 0xff, &target, false, kvImageDoNotTile); - - free(buffer); - - vImage_Buffer permuteTarget; - permuteTarget.width = width; - permuteTarget.height = height; - permuteTarget.rowBytes = targetBytesPerRow; - - unsigned char *permuteTargetBuffer = malloc(targetBytesPerRow * height); - permuteTarget.data = permuteTargetBuffer; - - const uint8_t permuteMap[4] = {3,2,1,0}; - vImagePermuteChannels_ARGB8888(&target, &permuteTarget, permuteMap, kvImageDoNotTile); - - free(targetBuffer); - - NSData *resultData = [[NSData alloc] initWithBytesNoCopy:permuteTargetBuffer length:targetBytesPerRow * height deallocator:^(void * _Nonnull bytes, __unused NSUInteger length) { - free(bytes); - }]; - - CGDataProviderRef dataProvider = CGDataProviderCreateWithCFData((__bridge CFDataRef)resultData); - - static CGColorSpaceRef imageColorSpace; - static CGBitmapInfo bitmapInfo; - static dispatch_once_t onceToken; - dispatch_once(&onceToken, ^{ - UIGraphicsBeginImageContextWithOptions(CGSizeMake(1, 1), YES, 0); - UIImage *refImage = UIGraphicsGetImageFromCurrentImageContext(); - imageColorSpace = CGColorSpaceRetain(CGImageGetColorSpace(refImage.CGImage)); - bitmapInfo = CGImageGetBitmapInfo(refImage.CGImage); - UIGraphicsEndImageContext(); - }); - - CGImageRef cgImg = CGImageCreate(width, height, 8, 32, targetBytesPerRow, imageColorSpace, bitmapInfo, dataProvider, NULL, true, kCGRenderingIntentDefault); - - CGDataProviderRelease(dataProvider); - - UIImage *resultImage = [[UIImage alloc] initWithCGImage:cgImg]; - CGImageRelease(cgImg); - - return resultImage; -} diff --git a/submodules/MozjpegBinding/Sources/MozjpegBinding.mm b/submodules/MozjpegBinding/Sources/MozjpegBinding.mm new file mode 100644 index 0000000000..8bf4263244 --- /dev/null +++ b/submodules/MozjpegBinding/Sources/MozjpegBinding.mm @@ -0,0 +1,780 @@ +#import + +#define USE_JPEGLI false + +#import +#import + +#import + +#include +#include +#include +#include +//#include +//#include + +#include +#include +#include +#include +#include + +static inline float JXLGetDistance(int32_t quality) { + if (quality == 0) { + return 1.0f; + } else if (quality >= 30) { + return 0.1f + (float)(100 - MIN(100, quality)) * 0.09f; + } else { + return 6.24f + (float)pow(2.5f, (30.0 - quality) / 5.0) / 6.25f; + } +} + +NSData * _Nullable compressJPEGXLData(UIImage * _Nonnull sourceImage, int quality) { + int width = (int)(sourceImage.size.width * sourceImage.scale); + int height = (int)(sourceImage.size.height * sourceImage.scale); + + int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); + uint8_t *targetMemory = (uint8_t *)malloc((int)(targetBytesPerRow * height)); + + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; + + CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); + + UIGraphicsPushContext(targetContext); + + CGColorSpaceRelease(colorSpace); + + CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage); + + UIGraphicsPopContext(); + + int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); + int bufferSize = bufferBytesPerRow * height; + uint8_t *buffer = (uint8_t *)malloc(bufferBytesPerRow * height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); + + uint32_t r = ((*color >> 16) & 0xff); + uint32_t g = ((*color >> 8) & 0xff); + uint32_t b = (*color & 0xff); + + buffer[y * bufferBytesPerRow + x * 3 + 0] = r; + buffer[y * bufferBytesPerRow + x * 3 + 1] = g; + buffer[y * bufferBytesPerRow + x * 3 + 2] = b; + } + } + + CGContextRelease(targetContext); + + free(targetMemory); + + auto enc = JxlEncoderMake(nullptr); + + JxlPixelFormat pixel_format = {3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 16}; + + JxlBasicInfo basic_info; + JxlEncoderInitBasicInfo(&basic_info); + basic_info.xsize = width; + basic_info.ysize = height; + basic_info.bits_per_sample = 32; + basic_info.exponent_bits_per_sample = 8; + basic_info.uses_original_profile = JXL_FALSE; + if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc.get(), &basic_info)) { + free(buffer); + return nil; + } + + JxlColorEncoding color_encoding = {}; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + if (JXL_ENC_SUCCESS != JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) { + free(buffer); + return nil; + } + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), nullptr); + + JxlEncoderSetFrameDistance(frame_settings, JXLGetDistance(quality)); + JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 8); + + if (JXL_ENC_SUCCESS != JxlEncoderAddImageFrame(frame_settings, &pixel_format, buffer, bufferSize)) { + free(buffer); + return nil; + } + JxlEncoderCloseInput(enc.get()); + + NSMutableData *result = [[NSMutableData alloc] initWithLength:64]; + uint8_t *next_out = (uint8_t *)result.mutableBytes; + size_t avail_out = result.length - (next_out - ((uint8_t *)result.mutableBytes)); + + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - ((uint8_t *)result.mutableBytes); + [result setLength:result.length * 2]; + next_out = ((uint8_t *)result.mutableBytes) + offset; + avail_out = result.length - offset; + } + } + [result setLength:next_out - ((uint8_t *)result.mutableBytes)]; + if (JXL_ENC_SUCCESS != process_result) { + free(buffer); + return nil; + } + + free(buffer); + return result; + + /*auto runner = JxlThreadParallelRunnerMake( + nullptr, + 8); + if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(), + JxlThreadParallelRunner, + runner.get())) { + fprintf(stderr, "JxlEncoderSetParallelRunner failed\n"); + return false; + }*/ +} + +UIImage * _Nullable decompressJPEGXLData(NSData * _Nonnull data) { + //const uint8_t* jxl, size_t size, std::vector* pixels, size_t* xsize, size_t* ysize, std::vector* icc_profile + + auto dec = JxlDecoderMake(nullptr); + if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE)) { + return nil; + } + + /*if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(), JxlResizableParallelRunner, runner.get())) { + fprintf(stderr, "JxlDecoderSetParallelRunner failed\n"); + return false; + }*/ + + JxlBasicInfo info; + JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + + JxlDecoderSetInput(dec.get(), (uint8_t const *)data.bytes, data.length); + JxlDecoderCloseInput(dec.get()); + + int xsize = 0; + int ysize = 0; + std::vector icc_profile; + + std::vector pixels; + + while (true) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + + if (status == JXL_DEC_ERROR) { + return nil; + } else if (status == JXL_DEC_NEED_MORE_INPUT) { + return nil; + } else if (status == JXL_DEC_BASIC_INFO) { + if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) { + return nil; + } + xsize = info.xsize; + ysize = info.ysize; + //JxlResizableParallelRunnerSetThreads(runner.get(), JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize)); + } else if (status == JXL_DEC_COLOR_ENCODING) { + // Get the ICC color profile of the pixel data + size_t icc_size; + if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, &icc_size)) { + fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n"); + return nil; + } + icc_profile.resize(icc_size); + if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, icc_profile.data(), icc_profile.size())) { + return nil; + } + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + size_t buffer_size; + if (JXL_DEC_SUCCESS != JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) { + return nil; + } + if (buffer_size != xsize * ysize * 16) { + return nil; + } + pixels.resize(xsize * ysize * 4); + void* pixels_buffer = (void*)pixels.data(); + size_t pixels_buffer_size = pixels.size() * sizeof(float); + if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format, pixels_buffer, pixels_buffer_size)) { + return nil; + } + } else if (status == JXL_DEC_FULL_IMAGE) { + // Nothing to do. Do not yet return. If the image is an animation, more + // full frames may be decoded. This example only keeps the last one. + } else if (status == JXL_DEC_SUCCESS) { + // All decoding successfully finished. + // It's not required to call JxlDecoderReleaseInput(dec.get()) here since + // the decoder will be destroyed. + + int targetBytesPerRow = xsize * 4; + uint8_t *permuteTargetBuffer = (uint8_t *)malloc(targetBytesPerRow * ysize); + memcpy(permuteTargetBuffer, pixels.data(), pixels.size()); + + NSData *resultData = [[NSData alloc] initWithBytesNoCopy:permuteTargetBuffer length:targetBytesPerRow * ysize deallocator:^(void * _Nonnull bytes, __unused NSUInteger length) { + free(bytes); + }]; + + CGDataProviderRef dataProvider = CGDataProviderCreateWithCFData((__bridge CFDataRef)resultData); + + static CGColorSpaceRef imageColorSpace; + static CGBitmapInfo bitmapInfo; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + UIGraphicsBeginImageContextWithOptions(CGSizeMake(1, 1), YES, 0); + UIImage *refImage = UIGraphicsGetImageFromCurrentImageContext(); + imageColorSpace = CGColorSpaceRetain(CGImageGetColorSpace(refImage.CGImage)); + bitmapInfo = CGImageGetBitmapInfo(refImage.CGImage); + UIGraphicsEndImageContext(); + }); + + CGImageRef cgImg = CGImageCreate(xsize, ysize, 8, 32, targetBytesPerRow, imageColorSpace, bitmapInfo, dataProvider, NULL, true, kCGRenderingIntentDefault); + + CGDataProviderRelease(dataProvider); + + UIImage *resultImage = [[UIImage alloc] initWithCGImage:cgImg]; + CGImageRelease(cgImg); + + return resultImage; + } else { + return nil; + } + } + + return nil; +} + +static NSData *getHeaderPattern() { + static NSData *value = nil; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + value = [[NSData alloc] initWithBase64EncodedString:@"/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDACgcHiMeGSgjISMtKygwPGRBPDc3PHtYXUlkkYCZlo+AjIqgtObDoKrarYqMyP/L2u71////m8H////6/+b9//j/2wBDASstLTw1PHZBQXb4pYyl+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj4+Pj/wAARCAAAAAADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwA=" options:0]; + }); + return value; +} + +static NSData *getFooterPattern() { + static NSData *value = nil; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + value = [[NSData alloc] initWithBase64EncodedString:@"/9k=" options:0]; + }); + return value; +} + +NSArray * _Nonnull extractJPEGDataScans(NSData * _Nonnull data) { + NSMutableArray *result = [[NSMutableArray alloc] init]; + + const uint8_t *dataBytes = (const uint8_t *)data.bytes; + int offset = 0; + while (offset < data.length) { + bool found = false; + for (int i = offset + 2; i < data.length - 1; i++) { + if (dataBytes[i] == 0xffU && dataBytes[i + 1] == 0xdaU) { + if (offset != 0) { + [result addObject:@(i)]; + } + offset = i; + found = true; + } + } + if (!found) { + break; + } + } + +#if DEBUG + static NSString *sessionPrefix = nil; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + sessionPrefix = [NSString stringWithFormat:@"%u", arc4random()]; + }); + + NSString *randomId = [NSString stringWithFormat:@"%u", arc4random()]; + NSString *dirPath = [[NSTemporaryDirectory() stringByAppendingPathComponent:sessionPrefix] stringByAppendingPathComponent:randomId]; + [[NSFileManager defaultManager] createDirectoryAtPath:dirPath withIntermediateDirectories:true attributes:nil error:nil]; + for (int i = 0; i < result.count + 1; i++) { + NSString *filePath = [dirPath stringByAppendingPathComponent:[NSString stringWithFormat:@"%d.jpg", i]]; + if (i == result.count) { + [data writeToFile:filePath atomically:true]; + } else { + [[data subdataWithRange:NSMakeRange(0, [result[i] intValue])] writeToFile:filePath atomically:true]; + } + } + NSLog(@"Path: %@", dirPath); +#endif + + return result; +} + +#if USE_JPEGLI +NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage) { + int width = (int)(sourceImage.size.width * sourceImage.scale); + int height = (int)(sourceImage.size.height * sourceImage.scale); + + int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); + uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height)); + + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; + + CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); + + UIGraphicsPushContext(targetContext); + + CGColorSpaceRelease(colorSpace); + + CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage); + + UIGraphicsPopContext(); + + int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); + uint8_t *buffer = malloc(bufferBytesPerRow * height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); + + uint32_t r = ((*color >> 16) & 0xff); + uint32_t g = ((*color >> 8) & 0xff); + uint32_t b = (*color & 0xff); + + buffer[y * bufferBytesPerRow + x * 3 + 0] = r; + buffer[y * bufferBytesPerRow + x * 3 + 1] = g; + buffer[y * bufferBytesPerRow + x * 3 + 2] = b; + } + } + + CGContextRelease(targetContext); + + free(targetMemory); + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + + uint8_t *outBuffer = NULL; + unsigned long outSize = 0; + jpeg_mem_dest(&cinfo, &outBuffer, &outSize); + + cinfo.image_width = (uint32_t)width; + cinfo.image_height = (uint32_t)height; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + //jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST); + jpeg_set_defaults(&cinfo); + cinfo.arith_code = FALSE; + cinfo.dct_method = JDCT_ISLOW; + cinfo.optimize_coding = TRUE; + jpeg_set_quality(&cinfo, 72, 1); + jpeg_simple_progression(&cinfo); + jpeg_start_compress(&cinfo, 1); + + JSAMPROW rowPointer[1]; + while (cinfo.next_scanline < cinfo.image_height) { + rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow); + jpeg_write_scanlines(&cinfo, rowPointer, 1); + } + + jpeg_finish_compress(&cinfo); + + NSData *result = [[NSData alloc] initWithBytes:outBuffer length:outSize]; + + jpeg_destroy_compress(&cinfo); + + free(buffer); + + return result; +} +#else +NSData * _Nullable compressJPEGData(UIImage * _Nonnull sourceImage) { + int width = (int)(sourceImage.size.width * sourceImage.scale); + int height = (int)(sourceImage.size.height * sourceImage.scale); + + int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); + uint8_t *targetMemory = (uint8_t *)malloc((int)(targetBytesPerRow * height)); + + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; + + CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); + + UIGraphicsPushContext(targetContext); + + CGColorSpaceRelease(colorSpace); + + CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), sourceImage.CGImage); + + UIGraphicsPopContext(); + + int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); + uint8_t *buffer = (uint8_t *)malloc(bufferBytesPerRow * height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); + + uint32_t r = ((*color >> 16) & 0xff); + uint32_t g = ((*color >> 8) & 0xff); + uint32_t b = (*color & 0xff); + + buffer[y * bufferBytesPerRow + x * 3 + 0] = r; + buffer[y * bufferBytesPerRow + x * 3 + 1] = g; + buffer[y * bufferBytesPerRow + x * 3 + 2] = b; + } + } + + CGContextRelease(targetContext); + + free(targetMemory); + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + + uint8_t *outBuffer = NULL; + unsigned long outSize = 0; + jpeg_mem_dest(&cinfo, &outBuffer, &outSize); + + cinfo.image_width = (uint32_t)width; + cinfo.image_height = (uint32_t)height; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST); + jpeg_set_defaults(&cinfo); + cinfo.arith_code = FALSE; + cinfo.dct_method = JDCT_ISLOW; + cinfo.optimize_coding = TRUE; + jpeg_set_quality(&cinfo, 72, 1); + jpeg_simple_progression(&cinfo); + jpeg_start_compress(&cinfo, 1); + + JSAMPROW rowPointer[1]; + while (cinfo.next_scanline < cinfo.image_height) { + rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow); + jpeg_write_scanlines(&cinfo, rowPointer, 1); + } + + jpeg_finish_compress(&cinfo); + + NSData *result = [[NSData alloc] initWithBytes:outBuffer length:outSize]; + + jpeg_destroy_compress(&cinfo); + + free(buffer); + + return result; +} +#endif + +#if USE_JPEGLI +NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size) { + CGSize fittedSize = image.size; + if (fittedSize.width > size.width) { + fittedSize = CGSizeMake(size.width, (int)((fittedSize.height * size.width / MAX(fittedSize.width, 1.0f)))); + } + if (fittedSize.height > size.height) { + fittedSize = CGSizeMake((int)((fittedSize.width * size.height / MAX(fittedSize.height, 1.0f))), size.height); + } + + int width = (int)fittedSize.width; + int height = (int)fittedSize.height; + + int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); + uint8_t *targetMemory = malloc((int)(targetBytesPerRow * height)); + + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; + + CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); + + UIGraphicsPushContext(targetContext); + + CGColorSpaceRelease(colorSpace); + + CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), image.CGImage); + + UIGraphicsPopContext(); + + int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); + uint8_t *buffer = malloc(bufferBytesPerRow * height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); + + uint32_t r = ((*color >> 16) & 0xff); + uint32_t g = ((*color >> 8) & 0xff); + uint32_t b = (*color & 0xff); + + buffer[y * bufferBytesPerRow + x * 3 + 0] = r; + buffer[y * bufferBytesPerRow + x * 3 + 1] = g; + buffer[y * bufferBytesPerRow + x * 3 + 2] = b; + } + } + + CGContextRelease(targetContext); + + free(targetMemory); + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + + uint8_t *outBuffer = NULL; + unsigned long outSize = 0; + jpeg_mem_dest(&cinfo, &outBuffer, &outSize); + + cinfo.image_width = (uint32_t)width; + cinfo.image_height = (uint32_t)height; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + //jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST); + jpeg_set_defaults(&cinfo); + cinfo.arith_code = FALSE; + cinfo.dct_method = JDCT_ISLOW; + cinfo.optimize_coding = FALSE; + jpeg_set_quality(&cinfo, 20, 1); + jpeg_start_compress(&cinfo, 1); + + JSAMPROW rowPointer[1]; + while (cinfo.next_scanline < cinfo.image_height) { + rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow); + jpeg_write_scanlines(&cinfo, rowPointer, 1); + } + + jpeg_finish_compress(&cinfo); + + NSMutableData *serializedData = nil; + + NSData *headerPattern = getHeaderPattern(); + NSData *footerPattern = getFooterPattern(); + if (outBuffer[164] == height && outBuffer[166] == width && headerPattern != nil && footerPattern != nil) { + outBuffer[164] = 0; + outBuffer[166] = 0; + + if (memcmp(headerPattern.bytes, outBuffer, headerPattern.length) == 0) { + if (memcmp(footerPattern.bytes, outBuffer + outSize - footerPattern.length, footerPattern.length) == 0) { + serializedData = [[NSMutableData alloc] init]; + uint8_t version = 1; + [serializedData appendBytes:&version length:1]; + uint8_t outWidth = (uint8_t)width; + uint8_t outHeight = (uint8_t)height; + [serializedData appendBytes:&outHeight length:1]; + [serializedData appendBytes:&outWidth length:1]; + unsigned long contentSize = outSize - headerPattern.length - footerPattern.length; + [serializedData appendBytes:outBuffer + headerPattern.length length:contentSize]; + } + } + } + + jpeg_destroy_compress(&cinfo); + + free(buffer); + + return serializedData; +} +#else +NSData * _Nullable compressMiniThumbnail(UIImage * _Nonnull image, CGSize size) { + CGSize fittedSize = image.size; + if (fittedSize.width > size.width) { + fittedSize = CGSizeMake(size.width, (int)((fittedSize.height * size.width / MAX(fittedSize.width, 1.0f)))); + } + if (fittedSize.height > size.height) { + fittedSize = CGSizeMake((int)((fittedSize.width * size.height / MAX(fittedSize.height, 1.0f))), size.height); + } + + int width = (int)fittedSize.width; + int height = (int)fittedSize.height; + + int targetBytesPerRow = ((4 * (int)width) + 31) & (~31); + uint8_t *targetMemory = (uint8_t *)malloc((int)(targetBytesPerRow * height)); + + CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + CGBitmapInfo bitmapInfo = kCGImageAlphaNoneSkipFirst | kCGBitmapByteOrder32Host; + + CGContextRef targetContext = CGBitmapContextCreate(targetMemory, width, height, 8, targetBytesPerRow, colorSpace, bitmapInfo); + + UIGraphicsPushContext(targetContext); + + CGColorSpaceRelease(colorSpace); + + CGContextDrawImage(targetContext, CGRectMake(0, 0, width, height), image.CGImage); + + UIGraphicsPopContext(); + + int bufferBytesPerRow = ((3 * (int)width) + 31) & (~31); + uint8_t *buffer = (uint8_t *)malloc(bufferBytesPerRow * height); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + uint32_t *color = ((uint32_t *)&targetMemory[y * targetBytesPerRow + x * 4]); + + uint32_t r = ((*color >> 16) & 0xff); + uint32_t g = ((*color >> 8) & 0xff); + uint32_t b = (*color & 0xff); + + buffer[y * bufferBytesPerRow + x * 3 + 0] = r; + buffer[y * bufferBytesPerRow + x * 3 + 1] = g; + buffer[y * bufferBytesPerRow + x * 3 + 2] = b; + } + } + + CGContextRelease(targetContext); + + free(targetMemory); + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + + uint8_t *outBuffer = NULL; + unsigned long outSize = 0; + jpeg_mem_dest(&cinfo, &outBuffer, &outSize); + + cinfo.image_width = (uint32_t)width; + cinfo.image_height = (uint32_t)height; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpeg_c_set_int_param(&cinfo, JINT_COMPRESS_PROFILE, JCP_FASTEST); + jpeg_set_defaults(&cinfo); + cinfo.arith_code = FALSE; + cinfo.dct_method = JDCT_ISLOW; + cinfo.optimize_coding = FALSE; + jpeg_set_quality(&cinfo, 20, 1); + jpeg_start_compress(&cinfo, 1); + + JSAMPROW rowPointer[1]; + while (cinfo.next_scanline < cinfo.image_height) { + rowPointer[0] = (JSAMPROW)(buffer + cinfo.next_scanline * bufferBytesPerRow); + jpeg_write_scanlines(&cinfo, rowPointer, 1); + } + + jpeg_finish_compress(&cinfo); + + NSMutableData *serializedData = nil; + + NSData *headerPattern = getHeaderPattern(); + NSData *footerPattern = getFooterPattern(); + if (outBuffer[164] == height && outBuffer[166] == width && headerPattern != nil && footerPattern != nil) { + outBuffer[164] = 0; + outBuffer[166] = 0; + + if (memcmp(headerPattern.bytes, outBuffer, headerPattern.length) == 0) { + if (memcmp(footerPattern.bytes, outBuffer + outSize - footerPattern.length, footerPattern.length) == 0) { + serializedData = [[NSMutableData alloc] init]; + uint8_t version = 1; + [serializedData appendBytes:&version length:1]; + uint8_t outWidth = (uint8_t)width; + uint8_t outHeight = (uint8_t)height; + [serializedData appendBytes:&outHeight length:1]; + [serializedData appendBytes:&outWidth length:1]; + unsigned long contentSize = outSize - headerPattern.length - footerPattern.length; + [serializedData appendBytes:outBuffer + headerPattern.length length:contentSize]; + } + } + } + + jpeg_destroy_compress(&cinfo); + + free(buffer); + + return serializedData; +} +#endif + +#if USE_JPEGLI +UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData) { + return [UIImage imageWithData:sourceData]; +} +#else +UIImage * _Nullable decompressImage(NSData * _Nonnull sourceData) { + long unsigned int jpegSize = sourceData.length; + unsigned char *_compressedImage = (unsigned char *)sourceData.bytes; + + int jpegSubsamp, width, height; + + tjhandle _jpegDecompressor = tjInitDecompress(); + + if (tjDecompressHeader2(_jpegDecompressor, _compressedImage, jpegSize, &width, &height, &jpegSubsamp) != 0) { + return nil; + } + + int sourceBytesPerRow = (3 * width + 31) & ~0x1F; + int targetBytesPerRow = (4 * width + 31) & ~0x1F; + + unsigned char *buffer = (uint8_t *)malloc(sourceBytesPerRow * height); + + tjDecompress2(_jpegDecompressor, _compressedImage, jpegSize, buffer, width, sourceBytesPerRow, height, TJPF_RGB, TJFLAG_FASTDCT | TJFLAG_FASTUPSAMPLE); + + tjDestroy(_jpegDecompressor); + + vImage_Buffer source; + source.width = width; + source.height = height; + source.rowBytes = sourceBytesPerRow; + source.data = buffer; + + vImage_Buffer target; + target.width = width; + target.height = height; + target.rowBytes = targetBytesPerRow; + + unsigned char *targetBuffer = (uint8_t *)malloc(targetBytesPerRow * height); + target.data = targetBuffer; + + vImageConvert_RGB888toARGB8888(&source, nil, 0xff, &target, false, kvImageDoNotTile); + + free(buffer); + + vImage_Buffer permuteTarget; + permuteTarget.width = width; + permuteTarget.height = height; + permuteTarget.rowBytes = targetBytesPerRow; + + unsigned char *permuteTargetBuffer = (uint8_t *)malloc(targetBytesPerRow * height); + permuteTarget.data = permuteTargetBuffer; + + const uint8_t permuteMap[4] = {3,2,1,0}; + vImagePermuteChannels_ARGB8888(&target, &permuteTarget, permuteMap, kvImageDoNotTile); + + free(targetBuffer); + + NSData *resultData = [[NSData alloc] initWithBytesNoCopy:permuteTargetBuffer length:targetBytesPerRow * height deallocator:^(void * _Nonnull bytes, __unused NSUInteger length) { + free(bytes); + }]; + + CGDataProviderRef dataProvider = CGDataProviderCreateWithCFData((__bridge CFDataRef)resultData); + + static CGColorSpaceRef imageColorSpace; + static CGBitmapInfo bitmapInfo; + static dispatch_once_t onceToken; + dispatch_once(&onceToken, ^{ + UIGraphicsBeginImageContextWithOptions(CGSizeMake(1, 1), YES, 0); + UIImage *refImage = UIGraphicsGetImageFromCurrentImageContext(); + imageColorSpace = CGColorSpaceRetain(CGImageGetColorSpace(refImage.CGImage)); + bitmapInfo = CGImageGetBitmapInfo(refImage.CGImage); + UIGraphicsEndImageContext(); + }); + + CGImageRef cgImg = CGImageCreate(width, height, 8, 32, targetBytesPerRow, imageColorSpace, bitmapInfo, dataProvider, NULL, true, kCGRenderingIntentDefault); + + CGDataProviderRelease(dataProvider); + + UIImage *resultImage = [[UIImage alloc] initWithCGImage:cgImg]; + CGImageRelease(cgImg); + + return resultImage; +} +#endif diff --git a/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift b/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift index 54a2987973..306c41662c 100644 --- a/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift +++ b/submodules/TelegramUIPreferences/Sources/ExperimentalUISettings.swift @@ -51,6 +51,7 @@ public struct ExperimentalUISettings: Codable, Equatable { public var disableBackgroundAnimation: Bool public var logLanguageRecognition: Bool public var storiesExperiment: Bool + public var storiesJpegExperiment: Bool public static var defaultSettings: ExperimentalUISettings { return ExperimentalUISettings( @@ -79,7 +80,8 @@ public struct ExperimentalUISettings: Codable, Equatable { disableImageContentAnalysis: false, disableBackgroundAnimation: false, logLanguageRecognition: false, - storiesExperiment: false + storiesExperiment: false, + storiesJpegExperiment: false ) } @@ -109,7 +111,8 @@ public struct ExperimentalUISettings: Codable, Equatable { disableImageContentAnalysis: Bool, disableBackgroundAnimation: Bool, logLanguageRecognition: Bool, - storiesExperiment: Bool + storiesExperiment: Bool, + storiesJpegExperiment: Bool ) { self.keepChatNavigationStack = keepChatNavigationStack self.skipReadHistory = skipReadHistory @@ -137,6 +140,7 @@ public struct ExperimentalUISettings: Codable, Equatable { self.disableBackgroundAnimation = disableBackgroundAnimation self.logLanguageRecognition = logLanguageRecognition self.storiesExperiment = storiesExperiment + self.storiesJpegExperiment = storiesJpegExperiment } public init(from decoder: Decoder) throws { @@ -168,6 +172,7 @@ public struct ExperimentalUISettings: Codable, Equatable { self.disableBackgroundAnimation = try container.decodeIfPresent(Bool.self, forKey: "disableBackgroundAnimation") ?? false self.logLanguageRecognition = try container.decodeIfPresent(Bool.self, forKey: "logLanguageRecognition") ?? false self.storiesExperiment = try container.decodeIfPresent(Bool.self, forKey: "storiesExperiment") ?? false + self.storiesJpegExperiment = try container.decodeIfPresent(Bool.self, forKey: "storiesJpegExperiment") ?? false } public func encode(to encoder: Encoder) throws { @@ -199,6 +204,7 @@ public struct ExperimentalUISettings: Codable, Equatable { try container.encode(self.disableBackgroundAnimation, forKey: "disableBackgroundAnimation") try container.encode(self.logLanguageRecognition, forKey: "logLanguageRecognition") try container.encode(self.storiesExperiment, forKey: "storiesExperiment") + try container.encode(self.storiesJpegExperiment, forKey: "storiesJpegExperiment") } } diff --git a/third-party/libjxl/BUILD b/third-party/libjxl/BUILD new file mode 100644 index 0000000000..ae93a6b1a0 --- /dev/null +++ b/third-party/libjxl/BUILD @@ -0,0 +1,129 @@ + +headers = [ + "jxl/codestream_header.h", + "jxl/cms_interface.h", + "jxl/color_encoding.h", + "jxl/decode_cxx.h", + "jxl/decode.h", + "jxl/encode_cxx.h", + "jxl/encode.h", + "jxl/jxl_export.h", + "jxl/jxl_threads_export.h", + "jxl/memory_manager.h", + "jxl/parallel_runner.h", + "jxl/stats.h", + "jxl/types.h", + "jxl/version.h", +] + +libs = [ + "jxl", +] + + +brotli_libs = [ + "libbrotlicommon", + "libbrotlidec", + "libbrotlienc", +] + +highway_libs = [ + "libhwy" +] + +filegroup( + name = "libjxl_sources", + srcs = glob([ + "libjxl/**/*" + ]), +) + +genrule( + name = "libjxl_build", + srcs = [ + "build-libjxl-bazel.sh", + ":libjxl_sources", + "@cmake_tar_gz//file", + ], + cmd_bash = + """ + set -ex + + if [ "$(TARGET_CPU)" == "ios_armv7" ]; then + BUILD_ARCH="armv7" + elif [ "$(TARGET_CPU)" == "ios_arm64" ]; then + BUILD_ARCH="arm64" + elif [ "$(TARGET_CPU)" == "ios_sim_arm64" ]; then + BUILD_ARCH="sim_arm64" + elif [ "$(TARGET_CPU)" == "ios_x86_64" ]; then + BUILD_ARCH="x86_64" + else + echo "Unsupported architecture $(TARGET_CPU)" + fi + + BUILD_DIR="$(RULEDIR)/build_$${BUILD_ARCH}" + rm -rf "$$BUILD_DIR" + mkdir -p "$$BUILD_DIR" + + CMAKE_DIR="$$(pwd)/$$BUILD_DIR/cmake" + rm -rf "$$CMAKE_DIR" + mkdir -p "$$CMAKE_DIR" + tar -xzf "$(location @cmake_tar_gz//file)" -C "$$CMAKE_DIR" + + cp $(location :build-libjxl-bazel.sh) "$$BUILD_DIR/" + + SOURCE_PATH="third-party/libjxl/libjxl" + + cp -R "$$SOURCE_PATH" "$$BUILD_DIR/" + + mkdir -p "$$BUILD_DIR/Public/jxl" + + PATH="$$PATH:$$CMAKE_DIR/cmake-3.23.1-macos-universal/CMake.app/Contents/bin" sh $$BUILD_DIR/build-libjxl-bazel.sh $$BUILD_ARCH "$$BUILD_DIR/libjxl" "$$BUILD_DIR" + """ + + "\n".join([ + "cp -f \"$$BUILD_DIR/build/lib/include/{}\" \"$(location Public/{})\"".format(header, header) for header in headers + ]) + + "\n" + + "\n".join([ + "cp -f \"$$BUILD_DIR/build/lib/lib{}.a\" \"$(location Public/jxl/lib/lib{}.a)\"".format(lib, lib) for lib in libs + ]) + + "\n" + + "\n".join([ + "cp -f \"$$BUILD_DIR/build/third_party/brotli/{}.a\" \"$(location Public/jxl/lib/{}.a)\"".format(lib, lib) for lib in brotli_libs + ]) + + "\n" + + "\n".join([ + "cp -f \"$$BUILD_DIR/build/third_party/highway/{}.a\" \"$(location Public/jxl/lib/{}.a)\"".format(lib, lib) for lib in highway_libs + ]), + outs = ["Public/" + x for x in headers] + + ["Public/jxl/lib/lib{}.a".format(x) for x in libs] + + ["Public/jxl/lib/{}.a".format(x) for x in brotli_libs] + + ["Public/jxl/lib/{}.a".format(x) for x in highway_libs], + visibility = [ + "//visibility:public", + ] +) + +cc_library( + name = "jxl_lib", + srcs = [":Public/jxl/lib/lib" + x + ".a" for x in libs] + + [":Public/jxl/lib/" + x + ".a" for x in brotli_libs] + + [":Public/jxl/lib/" + x + ".a" for x in highway_libs], +) + +objc_library( + name = "jxl", + module_name = "jxl", + enable_modules = True, + hdrs = [":Public/" + x for x in headers], + includes = [ + "Public", + "Public/jxl", + ], + deps = [ + ":jxl_lib", + ], + visibility = [ + "//visibility:public", + ], +) diff --git a/third-party/libjxl/build-libjxl-bazel.sh b/third-party/libjxl/build-libjxl-bazel.sh new file mode 100755 index 0000000000..42188d8120 --- /dev/null +++ b/third-party/libjxl/build-libjxl-bazel.sh @@ -0,0 +1,65 @@ +#! /bin/sh + +set -e + +ARCH="$1" + +SOURCE_DIR="$2" +BUILD_DIR=$(echo "$(cd "$(dirname "$3")"; pwd -P)/$(basename "$3")") + +RSSS="9" + +CMAKE_OPTIONS="-DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DJPEGXL_ENABLE_BENCHMARK=0 -DJPEGXL_ENABLE_FUZZERS=0 -DJPEGXL_ENABLE_TOOLS=0 -DJPEGXL_ENABLE_JPEGLI=0 -DJPEGXL_ENABLE_DOXYGEN=0 -DJPEGXL_ENABLE_MANPAGES=0 -DJPEGXL_ENABLE_BENCHMARK=0 -DJPEGXL_ENABLE_EXAMPLES=0 -DJPEGXL_BUNDLE_LIBPNG=0 -DJPEGXL_ENABLE_JNI=0 -DJPEGXL_ENABLE_SJPEG=0 -DJPEGXL_ENABLE_OPENEXR=0 -DJPEGXL_ENABLE_TRANSCODE_JPEG=0 -DJPEGXL_STATIC=1 -DJPEGXL_ENABLE_BOXES=0" + +if [ "$ARCH" = "arm64" ]; then + IOS_PLATFORMDIR="$(xcode-select -p)/Platforms/iPhoneOS.platform" + IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneOS*.sdk) + export CFLAGS="-Wall -arch arm64 -miphoneos-version-min=11.0 -funwind-tables" + + cd "$BUILD_DIR" + mkdir build + cd build + + touch toolchain.cmake + echo "set(CMAKE_SYSTEM_NAME Darwin)" >> toolchain.cmake + echo "set(CMAKE_SYSTEM_PROCESSOR aarch64)" >> toolchain.cmake + echo "set(CMAKE_C_COMPILER $(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang)" >> toolchain.cmake + + cmake -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} $CMAKE_OPTIONS ../libjxl + make +elif [ "$ARCH" = "sim_arm64" ]; then + IOS_PLATFORMDIR="$(xcode-select -p)/Platforms/iPhoneSimulator.platform" + IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneSimulator*.sdk) + export CFLAGS="-Wall -arch arm64 --target=arm64-apple-ios11.0-simulator -miphonesimulator-version-min=11.0 -funwind-tables" + + cd "$BUILD_DIR" + mkdir build + cd build + + touch toolchain.cmake + echo "set(CMAKE_SYSTEM_NAME Darwin)" >> toolchain.cmake + echo "set(CMAKE_SYSTEM_PROCESSOR aarch64)" >> toolchain.cmake + echo "set(CMAKE_C_COMPILER $(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang)" >> toolchain.cmake + + cmake -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} $CMAKE_OPTIONS ../libjxl + make +elif [ "$ARCH" = "x86_64" ]; then + IOS_PLATFORMDIR="$(xcode-select -p)/Platforms/iPhoneSimulator.platform" + IOS_SYSROOT=($IOS_PLATFORMDIR/Developer/SDKs/iPhoneSimulator*.sdk) + export CFLAGS="-Wall -arch x86_64 -miphoneos-version-min=11.0 -funwind-tables" + + cd "$BUILD_DIR" + mkdir build + cd build + + touch toolchain.cmake + echo "set(CMAKE_SYSTEM_NAME Darwin)" >> toolchain.cmake + echo "set(CMAKE_SYSTEM_PROCESSOR AMD64)" >> toolchain.cmake + echo "set(CMAKE_C_COMPILER $(xcode-select -p)/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang)" >> toolchain.cmake + + cmake -DCMAKE_TOOLCHAIN_FILE=toolchain.cmake -DCMAKE_OSX_SYSROOT=${IOS_SYSROOT[0]} $CMAKE_OPTIONS ../libjxl + make +else + echo "Unsupported architecture $ARCH" + exit 1 +fi diff --git a/third-party/libjxl/libjxl/AUTHORS b/third-party/libjxl/libjxl/AUTHORS new file mode 100644 index 0000000000..ab9dabe3af --- /dev/null +++ b/third-party/libjxl/libjxl/AUTHORS @@ -0,0 +1,73 @@ +# List of the project authors for copyright purposes. When contributing to the +# project add your name or your organization's name to this list. See +# CONTRIBUTING.md for details. +# +# For organizations: +# Organization +# +# For individuals: +# Name +# +# Please keep each list sorted. If you wish to change your email address please +# send a pull request. + +# Organizations: +Cloudinary Ltd. <*@cloudinary.com> +Google LLC <*@google.com> + +# Individuals: +a-shvedov +Alex Xu (Hello71) +Alexander Sago +Alistair Barrow +Andrius Lukas Narbutas +Aous Naman +Artem Selishchev +Biswapriyo Nath +CanadianBaconBoi +Damiano Albani +Daniel Novomeský +David Burnett +dependabot[bot] +Diego Pino +Dirk Lemstra +Don Olmstead +Dong Xu +Even Rouault +Fred Brennan +gi-man +Gilles Devillers (GilDev) +Heiko Becker +Jim Robinson +Jon Sneyers +Jonathan Brown (Jonnyawsom3) +Joshua Root +Kai Hollberg +Kleis Auke Wolthuizen +L. E. Segovia +Leo Izen +Lovell Fuller +Maarten DB +Marcin Konicki +Martin Strunz +Mathieu Malaterre +Mikk Leini +Misaki Kasumi +Moonchild Straver +Nicholas Hayes <0xC0000054@users.noreply.github.com> +Nigel Tao +Petr Diblík +Pieter Wuille +roland-rollo +Samuel Leong +Sandro +Sergey Fedorov +Stephan T. Lavavej +Sylvestre Ledru +Thomas Bonfort +tmkk +Vincent Torri +xiota +Yonatan Nebenzhal +Ziemowit Zabawa +源文雨 <41315874+fumiama@users.noreply.github.com> diff --git a/third-party/libjxl/libjxl/BUILD.bazel b/third-party/libjxl/libjxl/BUILD.bazel new file mode 100644 index 0000000000..0b81fc7b8a --- /dev/null +++ b/third-party/libjxl/libjxl/BUILD.bazel @@ -0,0 +1,22 @@ +package(default_visibility = ["//:__subpackages__"]) + +filegroup( + name = "testdata", + srcs = glob([ + "testdata/**/*.icc", + "testdata/**/*.pam", + "testdata/**/*.pfm", + "testdata/**/*.pgm", + "testdata/**/*.pnm", + "testdata/**/*.ppm", + "testdata/**/*.png", + "testdata/**/*.jpg", + "testdata/**/*.jxl", + "testdata/**/*.gif", + "testdata/**/*.y4m", + "testdata/**/*.jxl", + "testdata/**/*.png", + "testdata/**/*.jpg", + "testdata/position_encoding/*.txt", + ]), +) diff --git a/third-party/libjxl/libjxl/BUILDING.md b/third-party/libjxl/libjxl/BUILDING.md new file mode 100644 index 0000000000..7e9bc2aad3 --- /dev/null +++ b/third-party/libjxl/libjxl/BUILDING.md @@ -0,0 +1,85 @@ +# Compilation + +For more details and other workflows see the "Advanced guide" below. + +## Checking out the code + +```bash +git clone https://github.com/libjxl/libjxl.git --recursive --shallow-submodules +``` + +This repository uses git submodules to handle some third party dependencies +under `third_party`, that's why it is important to pass `--recursive`. If you +didn't check out with `--recursive`, or any submodule has changed, run: + +```bash +git submodule update --init --recursive --depth 1 --recommend-shallow +``` + +The `--shallow-submodules` and `--depth 1 --recommend-shallow` options create +shallow clones which only downloads the commits requested, and is all that is +needed to build `libjxl`. Should full clones be necessary, you could always run: + +```bash +git submodule foreach git fetch --unshallow +git submodule update --init --recursive +``` + +which pulls the rest of the commits in the submodules. + +Important: If you downloaded a zip file or tarball from the web interface you +won't get the needed submodules and the code will not compile. You can download +these external dependencies from source running `./deps.sh`. The git workflow +described above is recommended instead. + +## Installing dependencies + +Required dependencies for compiling the code, in a Debian/Ubuntu based +distribution run: + +```bash +sudo apt install cmake pkg-config libbrotli-dev +``` + +Optional dependencies for supporting other formats in the `cjxl`/`djxl` tools, +in a Debian/Ubuntu based distribution run: + +```bash +sudo apt install libgif-dev libjpeg-dev libopenexr-dev libpng-dev libwebp-dev +``` + +We recommend using a recent Clang compiler (version 7 or newer), for that +install clang and set `CC` and `CXX` variables. + +```bash +sudo apt install clang +export CC=clang CXX=clang++ +``` + +## Building + +```bash +cd libjxl +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF .. +cmake --build . -- -j$(nproc) +``` + +The encoder/decoder tools will be available in the `build/tools` directory. + +## Installing + +```bash +sudo cmake --install . +``` + + +## Building JPEG XL for developers + +For experienced developers, we provide build instructions for several other environments: + +* [Building on Debian](doc/developing_in_debian.md) +* Building on Windows with [vcpkg](doc/developing_in_windows_vcpkg.md) (Visual Studio 2019) +* Building on Windows with [MSYS2](doc/developing_in_windows_msys.md) +* [Cross Compiling for Windows with Crossroad](doc/developing_with_crossroad.md) diff --git a/third-party/libjxl/libjxl/BUILDING_Haiku.md b/third-party/libjxl/libjxl/BUILDING_Haiku.md new file mode 100644 index 0000000000..1ffca1453c --- /dev/null +++ b/third-party/libjxl/libjxl/BUILDING_Haiku.md @@ -0,0 +1,20 @@ +## Disclaimer + +Haiku builds are not officially supported, i.e. the build might not work at all, +some tests may fail and some sub-projects are excluded from build. + +This manual outlines Haiku-specific setup. For general building and testing +instructions see "[BUILDING](BUILDING.md)" and +"[Building and Testing changes](doc/building_and_testing.md)". + +## Dependencies + +```shell +pkgman install llvm9_clang ninja cmake doxygen libjpeg_turbo_devel giflib_devel +``` + +## Building + +```shell +TEST_STACK_LIMIT=none CMAKE_FLAGS="-I/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/8.3.0/include/c++ -I/boot/system/develop/tools/lib/gcc/x86_64-unknown-haiku/8.3.0/include/c++/x86_64-unknown-haiku" CMAKE_SHARED_LINKER_FLAGS="-shared -Xlinker -soname=libjpegxl.so -lpthread" ./ci.sh opt +``` diff --git a/third-party/libjxl/libjxl/BUILDING_OSX.md b/third-party/libjxl/libjxl/BUILDING_OSX.md new file mode 100644 index 0000000000..b5f5e34db7 --- /dev/null +++ b/third-party/libjxl/libjxl/BUILDING_OSX.md @@ -0,0 +1,41 @@ +## Disclaimer + +OSX builds have "best effort" support, i.e. build might not work at all, some +tests may fail and some sub-projects are excluded from build. + +This manual outlines OSX specific setup. For general building and testing +instructions see "[BUILDING](BUILDING.md)" and +"[Building and Testing changes](doc/building_and_testing.md)". + +[Homebrew](https://brew.sh/) is a popular package manager. JPEG XL library and +binaries could be installed using it: + +```bash +brew install jpeg-xl +``` + +## Dependencies + +Make sure that `brew doctor` does not report serious problems and up-to-date +version of XCode is installed. + +Installing (actually, building) `clang` might take a couple hours. + +```bash +brew install llvm +``` + +```bash +brew install coreutils cmake giflib jpeg-turbo libpng ninja zlib +``` + +Before building the project check that `which clang` is +`/usr/local/opt/llvm/bin/clang`, not the one provided by XCode. If not, update +`PATH` environment variable. + +Also, setting `CMAKE_PREFIX_PATH` might be necessary for correct include paths +resolving, e.g.: + +```bash +export CMAKE_PREFIX_PATH=`brew --prefix giflib`:`brew --prefix jpeg-turbo`:`brew --prefix libpng`:`brew --prefix zlib` +``` \ No newline at end of file diff --git a/third-party/libjxl/libjxl/CHANGELOG.md b/third-party/libjxl/libjxl/CHANGELOG.md new file mode 100644 index 0000000000..c1235da14e --- /dev/null +++ b/third-party/libjxl/libjxl/CHANGELOG.md @@ -0,0 +1,320 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## Unreleased + +### Added + - encoder API: add `JxlEncoderSetExtraChannelDistance` to adjust the quality + of extra channels (like alpha) separately. + - encoder API: new api functions for streaming encoding: + - `JxlEncoderSetOutputCallback`, + - `JxlEncoderChunkedImageFrameStart`, + - `JxlEncoderChunkedImageFrameAddPart` and new + - `JXL_ENC_FRAME_SETTING_BUFFERING` enum value. + - encoder API: new options for more fine-grained control over metadata + preservation when using `JxlEncoderAddJPEGFrame`: + - `JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF` + - `JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP` + - `JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF` + - encoder API: new function `JxlEncoderSetUpsamplingMode` to change the upsampling + method, e.g. to use nearest-neighbor upsampling for pixel art + - cjxl can now be used to explicitly add/update/strip Exif/XMP/JUMBF metadata using + the decoder-hints syntax, e.g. `cjxl input.ppm -x exif=input.exif output.jxl` + - djxl can now be used to extract Exif/XMP/JUMBF metadata + +### Removed + - API: the Butteraugli API (`jxl/butteraugli.h`) was removed. + - encoder and decoder API: all deprecated functions were removed: + `JxlDecoderDefaultPixelFormat`, `JxlEncoderOptionsSetLossless`, + `JxlEncoderOptionsSetEffort`, `JxlEncoderOptionsSetDecodingSpeed`, + `JxlEncoderOptionsSetDistance`, `JxlEncoderOptionsCreate`, as well as + the deprecated enumerator values `JXL_DEC_EXTENSIONS`, `JXL_ENC_NOT_SUPPORTED`, + `JXL_TYPE_BOOLEAN`, `JXL_TYPE_UINT32`, and deprecated type `JxlEncoderOptions`. + - decoder API: the signature of `JxlDecoderGetColorAsEncodedProfile`, + `JxlDecoderGetICCProfileSize`, and `JxlDecoderGetColorAsICCProfile` + changed: a deprecated unused argument was removed. + +### Changed + - changed the name of the cjxl flag `photon_noise` to `photon_noise_iso` + +## [0.8.0] - 2023-01-18 + +### Added + - decoder API: new function `JxlDecoderSetImageBitDepth` to set the bit depth + of the output buffer. + - decoder API proposal: add `JxlDecoderSetOutputColorProfile` and + `JxlDecoderSetCms` to enable decoding to desired colorspace; NB: not + implemented yet. + - encoder API: new function `JxlEncoderSetFrameBitDepth` to set the bit depth + of the input buffer. + - encoder API: add an effort 10 option for lossless compression; using this + setting requires calling `JxlEncoderAllowExpertOptions`. + - encoder API: new `JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES` enum value to + allow explicit control of metadata compression + +### Removed + - common API: removed `JxlIntrinsicSizeHeader` + - decoder API: removed deprecated `JXL_DEC_NEED_DC_OUT_BUFFER` and + `JXL_DEC_DC_IMAGE` events, `JxlDecoderDCOutBufferSize` and + `JxlDecoderSetDCOutBuffer` functions + +### Changed / clarified + - encoder API: `JxlEncoderProcessOutput` requires at least 32 bytes of output + space to proceed and guarantees that at least one byte will be written + +## [0.7] - 2022-07-21 + +### Added + - Export version information in headers. + - decoder API: Ability to decode the content of metadata boxes: + `JXL_DEC_BOX`, `JXL_DEC_BOX_NEED_MORE_OUTPUT`, `JxlDecoderSetBoxBuffer`, + `JxlDecoderGetBoxType`, `JxlDecoderGetBoxSizeRaw` and + `JxlDecoderSetDecompressBoxes`. + - decoder API: ability to mark the input is finished: `JxlDecoderCloseInput`. + - decoder API: ability to request updates on different progressive events using + `JxlDecoderSetProgressiveDetail`; currently supported events are + `kDC`, `kLastPasses` and `kPasses`. + - decoder API: ability to specify desired intensity target using + `JxlDecoderSetDesiredIntensityTarget` + - decoder API: new function `JxlDecoderSetCoalesced` to allow decoding + non-coalesced (unblended) frames, e.g. layers of a composite still image + or the cropped frames of a recompressed GIF/APNG. + - decoder API: new function `JxlDecoderSetUnpremultiplyAlpha` to set + preference for getting an associated alpha channel with premultiplied or + unpremultiplied colors. + - decoder API: field added to `JxlFrameHeader`: a `JxlLayerInfo` struct + that contains crop dimensions and offsets and blending information for + the non-coalesced case. + - decoder API: new function `JxlDecoderGetExtraChannelBlendInfo` to get + the blending information for extra channels in the non-coalesced case. + - decoder API: new function `JxlDecoderSetMultithreadedImageOutCallback`, + allowing output callbacks to receive more information about the number of + threads on which they are running. + - decoder API: new function `JxlDecoderSkipCurrentFrame` to skip processing + the current frame after a progressive detail is reached. + - decoder API: new function `JxlDecoderGetIntendedDownsamplingRatio` to get + the intended downsampling ratio of progressive steps, based on the + information in the frame header. + - decoder API: new function `JxlDecoderSetRenderSpotcolors` to allow disabling + rendering of spot colors. + - decoder/encoder API: add two fields to `JXLBasicInfo`: `intrinsic_xsize` + and `intrinsic_ysize` to signal the intrinsic size. + - encoder API: ability to add metadata boxes, added new functions + `JxlEncoderAddBox`, `JxlEncoderUseBoxes`, `JxlEncoderCloseBoxes` and + `JxlEncoderCloseFrames`. + - encoder API: added ability to set several encoder options / extra fields to + frames using `JxlEncoderSetFrameName`, `JxlEncoderFrameSettingsSetOption`, + `JxlEncoderFrameSettingsSetFloatOption`. + - encoder API: added ability to check required codestream compatibility level + and force specified using `JxlEncoderGetRequiredCodestreamLevel` and + `JxlEncoderSetCodestreamLevel`. + - encoder API: added ability to force emitting box-based container format + using `JxlEncoderUseContainer`. + - encoder API: added ability to store JPEG metadata for lossless reconstruction + using `JxlEncoderStoreJPEGMetadata` + - encoder API: new functions `JxlEncoderSetFrameHeader` and + `JxlEncoderSetExtraChannelBlendInfo` to set animation + and blending parameters of the frame, and `JxlEncoderInitFrameHeader` and + `JxlEncoderInitBlendInfo` to initialize the structs to set. + - encoder API: ability to encode arbitrary extra channels: + `JxlEncoderInitExtraChannelInfo`, `JxlEncoderSetExtraChannelInfo`, + `JxlEncoderSetExtraChannelName` and `JxlEncoderSetExtraChannelBuffer`. + - encoder API: ability to plug custom CMS implementation using + `JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms)` + - encoder API: added `JxlEncoderGetError` to retrieve last encoder error. + +### Changed +- decoder API: using `JxlDecoderCloseInput` at the end of all input is required + when using JXL_DEC_BOX, and is now also encouraged in other cases, but not + required in those other cases for backwards compatibility. +- encoder API: `JxlEncoderCloseInput` now closes both frames and boxes input. +- CLI: `cjxl` and `djxl` have been reimplemented on the base of public decoder + and encoder API; dropped dependency on `gflags` for argument parsing. + +### Deprecated +- decoder API: `JXL_DEC_EXTENSIONS` event: use `JXL_DEC_BASIC_INFO` +- decoder / encoder API: pixel types `JXL_TYPE_BOOLEAN` and `JXL_TYPE_UINT32`: + consider using `JXL_TYPE_UINT8` and `JXL_TYPE_FLOAT` correspondingly. +- decoder API: pixel format parameter for `JxlDecoderGetColorAsEncodedProfile` + and `JxlDecoderGetICCProfileSize`: pass `NULL`. +- decoder API: `JxlDecoderDefaultPixelFormat` +- encoder API: `JxlEncoderOptions`: use `JxlEncoderFrameSettings` instead. +- encoder API: `JxlEncoderOptionsCreate`: use `JxlEncoderFrameSettingsCreate` + instead. +- encoder API: `JxlEncoderOptionsSetDistance`: use `JxlEncoderSetFrameDistance` + instead. +- encoder API: `JxlEncoderOptionsSetLossless`: use `JxlEncoderSetFrameLossless` + instead. +- encoder API: `JxlEncoderOptionsSetEffort`: use + `JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, effort)` + instead. +- encoder API: `JxlEncoderOptionsSetDecodingSpeed`: use + `JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, tier)` + instead. +- encoder API: deprecated `JXL_ENC_NOT_SUPPORTED`, the encoder returns + `JXL_ENC_ERROR` instead and there is no need to handle + `JXL_ENC_NOT_SUPPORTED`. + +## [0.6.1] - 2021-10-29 +### Changed + - Security: Fix OOB read in splines rendering (#735 - + [CVE-2021-22563](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22563)) + - Security: Fix OOB copy (read/write) in out-of-order/multi-threaded decoding + (#708 - [CVE-2021-22564](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-22564)) + - Fix segfault in `djxl` tool with `--allow_partial_files` flag (#781). + - Fix border in extra channels when using upsampling (#796) + +## [0.6] - 2021-10-04 +### Added + - API: New functions to decode extra channels: + `JxlDecoderExtraChannelBufferSize` and `JxlDecoderSetExtraChannelBuffer`. + - API: New function `JxlEncoderInitBasicInfo` to initialize `JxlBasicInfo` + (only needed when encoding). NOTE: it is now required to call this function + when using the encoder. Padding was added to the struct for forward + compatibility. + - API: Support for encoding oriented images. + - API: FLOAT16 support in the encoder API. + - Rewrite of the GDK pixbuf loader plugin. Added proper color management and + animation support. + - Rewrite of GIMP plugin. Added compression parameters dialog and switched to + using the public C API. + - Debian packages for GDK pixbuf loader (`libjxl-gdk-pixbuf`) and GIMP + (`libjxl-gimp-plugin`) plugins. + - `cjxl`/`djxl` support for `stdin` and `stdout`. + +### Changed + - API: Renamed the field `alpha_associated` in `JxlExtraChannelInfo` to + `alpha_premultiplied`, to match the corresponding name in `JxlBasicInfo`. + - Improved the 2x2 downscaling method in the encoder for the optional color + channel resampling for low bit rates. + - Fixed: the combination of floating point original data, XYB color encoding, + and Modular mode was broken (in both encoder and decoder). It now works. + NOTE: this can cause the current encoder to write jxl bitstreams that do + not decode with the old decoder. In particular this will happen when using + cjxl with PFM, EXR, or floating point PSD input, and a combination of XYB + and modular mode is used (which caused an encoder error before), e.g. + using options like `-m -q 80` (lossy modular), `-d 4.5` or `--progressive_dc=1` + (modular DC frame), or default lossy encoding on an image where patches + end up being used. There is no problem when using cjxl with PNG, JPEG, GIF, + APNG, PPM, PGM, PGX, or integer (8-bit or 16-bit) PSD input. + - `libjxl` static library now bundles skcms, fixing static linking in + downstream projects when skcms is used. + - Spline rendering performance improvements. + - Butteraugli changes for less visual masking. + +## [0.5] - 2021-08-02 +### Added + - API: New function to decode the image using a callback outputting a part of a + row per call. + - API: 16-bit float output support. + - API: `JxlDecoderRewind` and `JxlDecoderSkipFrames` functions to skip more + efficiently to earlier animation frames. + - API: `JxlDecoderSetPreferredColorProfile` function to choose color profile in + certain circumstances. + - encoder: Adding `center_x` and `center_y` flags for more control of the tile + order. + - New encoder speeds `lightning` (1) and `thunder` (2). + +### Changed + - Re-licensed the project under a BSD 3-Clause license. See the + [LICENSE](LICENSE) and [PATENTS](PATENTS) files for details. + - Full JPEG XL part 1 specification support: Implemented all the spec required + to decode files to pixels, including cases that are not used by the encoder + yet. Part 2 of the spec (container format) is final but not fully implemented + here. + - Butteraugli metric improvements. Exact numbers are different from previous + versions. + - Memory reductions during decoding. + - Reduce the size of the jxl_dec library by removing dependencies. + - A few encoding speedups. + - Clarify the security policy. + - Significant encoding improvements (~5 %) and less ringing. + - Butteraugli metric to have some less masking. + - `cjxl` flag `--speed` is deprecated and replaced by the `--effort` synonym. + +### Removed +- API for returning a downsampled DC was deprecated + (`JxlDecoderDCOutBufferSize` and `JxlDecoderSetDCOutBuffer`) and will be + removed in the next release. + +## [0.3.7] - 2021-03-29 +### Changed + - Fix a rounding issue in 8-bit decoding. + +## [0.3.6] - 2021-03-25 +### Changed + - Fix a bug that could result in the generation of invalid codestreams as + well as failure to decode valid streams. + +## [0.3.5] - 2021-03-23 +### Added + - New encode-time options for faster decoding at the cost of quality. + - Man pages for cjxl and djxl. + +### Changed + - Memory usage improvements. + - Faster decoding to 8-bit output with the C API. + - GIMP plugin: avoid the sRGB conversion dialog for sRGB images, do not show + a console window on Windows. + - Various bug fixes. + +## [0.3.4] - 2021-03-16 +### Changed + - Improved box parsing. + - Improved metadata handling. + - Performance and memory usage improvements. + +## [0.3.3] - 2021-03-05 +### Changed + - Performance improvements for small images. + - Add a (flag-protected) non-high-precision mode with better speed. + - Significantly speed up the PQ EOTF. + - Allow optional HDR tone mapping in djxl (--tone_map, --display_nits). + - Change the behavior of djxl -j to make it consistent with cjxl (#153). + - Improve image quality. + - Improve EXIF handling. + +## [0.3.2] - 2021-02-12 +### Changed + - Fix embedded ICC encoding regression + [#149](https://gitlab.com/wg1/jpeg-xl/-/issues/149). + +## [0.3.1] - 2021-02-10 +### Changed + - New experimental Butteraugli API (`jxl/butteraugli.h`). + - Encoder improvements to low quality settings. + - Bug fixes, including fuzzer-found potential security bug fixes. + - Fixed `-q 100` and `-d 0` not triggering lossless modes. + +## [0.3] - 2021-01-29 +### Changed + - Minor change to the Decoder C API to accommodate future work for other ways + to provide input. + - Future decoder C API changes will be backwards compatible. + - Lots of bug fixes since the previous version. + +## [0.2] - 2020-12-24 +### Added + - JPEG XL bitstream format is frozen. Files encoded with 0.2 will be supported + by future versions. + +### Changed + - Files encoded with previous versions are not supported. + +## [0.1.1] - 2020-12-01 + +## [0.1] - 2020-11-14 +### Added + - Initial release of an encoder (`cjxl`) and decoder (`djxl`) that work + together as well as a benchmark tool for comparison with other codecs + (`benchmark_xl`). + - Note: JPEG XL format is in the final stages of standardization, minor changes + to the codestream format are still possible but we are not expecting any + changes beyond what is required by bug fixing. + - API: new decoder API in C, check the `examples/` directory for its example + usage. The C API is a work in progress and likely to change both in API and + ABI in future releases. diff --git a/third-party/libjxl/libjxl/CMakeLists.txt b/third-party/libjxl/libjxl/CMakeLists.txt new file mode 100644 index 0000000000..89c274c9ab --- /dev/null +++ b/third-party/libjxl/libjxl/CMakeLists.txt @@ -0,0 +1,527 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Ubuntu bionic ships with cmake 3.10. +cmake_minimum_required(VERSION 3.10) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") + +# Honor VISIBILITY_INLINES_HIDDEN on all types of targets. +if(POLICY CMP0063) + cmake_policy(SET CMP0063 NEW) +endif() +# Pass CMAKE_EXE_LINKER_FLAGS to CC and CXX compilers when testing if they work. +if(POLICY CMP0065) + cmake_policy(SET CMP0065 NEW) +endif() + +# Set PIE flags for POSITION_INDEPENDENT_CODE targets, added in 3.14. +if(POLICY CMP0083) + cmake_policy(SET CMP0083 NEW) +endif() + +project(LIBJXL LANGUAGES C CXX) + +include(CheckCXXSourceCompiles) +check_cxx_source_compiles( + "int main() { + #if !defined(__EMSCRIPTEN__) + static_assert(false, \"__EMSCRIPTEN__ is not defined\"); + #endif + return 0; + }" + JPEGXL_EMSCRIPTEN +) + +message(STATUS "CMAKE_SYSTEM_PROCESSOR is ${CMAKE_SYSTEM_PROCESSOR}") +include(CheckCXXCompilerFlag) +check_cxx_compiler_flag("-fsanitize=fuzzer-no-link" CXX_FUZZERS_SUPPORTED) +check_cxx_compiler_flag("-Xclang -mconstructor-aliases" CXX_CONSTRUCTOR_ALIASES_SUPPORTED) +check_cxx_compiler_flag("-fmacro-prefix-map=OLD=NEW" CXX_MACRO_PREFIX_MAP) +check_cxx_compiler_flag("-fno-rtti" CXX_NO_RTTI_SUPPORTED) + +# Enabled PIE binaries by default if supported. +include(CheckPIESupported OPTIONAL RESULT_VARIABLE CHECK_PIE_SUPPORTED) +if(CHECK_PIE_SUPPORTED) + check_pie_supported(LANGUAGES CXX) + if(CMAKE_CXX_LINK_PIE_SUPPORTED) + set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) + endif() +endif() + +if(PROVISION_DEPENDENCIES) + # Run script to provision dependencies. + find_program (BASH_PROGRAM bash) + if(BASH_PROGRAM) + execute_process( + COMMAND ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/deps.sh + RESULT_VARIABLE PROVISION_DEPENDENCIES_RESULT) + endif() + if(NOT PROVISION_DEPENDENCIES_RESULT EQUAL "0") + message(FATAL_ERROR "${CMAKE_CURRENT_SOURCE_DIR}/deps.sh failed with ${PROVISION_DEPENDENCIES_RESULT}") + endif() +endif() + +### Project build options: +if(CXX_FUZZERS_SUPPORTED) + # Enabled by default except on arm64, Windows and Apple builds. + set(ENABLE_FUZZERS_DEFAULT true) +endif() +find_package(PkgConfig) +if(NOT APPLE AND NOT WIN32 AND NOT HAIKU AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64") + pkg_check_modules(TCMallocMinimalVersionCheck QUIET IMPORTED_TARGET + libtcmalloc_minimal) + if(TCMallocMinimalVersionCheck_FOUND AND + NOT TCMallocMinimalVersionCheck_VERSION VERSION_EQUAL 2.8.0) + # Enabled by default except on Windows and Apple builds for + # tcmalloc != 2.8.0. tcmalloc 2.8.1 already has a fix for this issue. + set(ENABLE_TCMALLOC_DEFAULT true) + else() + message(STATUS + "tcmalloc version ${TCMallocMinimalVersionCheck_VERSION} -- " + "tcmalloc 2.8.0 disabled due to " + "https://github.com/gperftools/gperftools/issues/1204") + endif() +endif() + +check_cxx_source_compiles( + "int main() { + #if !defined(HWY_DISABLED_TARGETS) + static_assert(false, \"HWY_DISABLED_TARGETS is not defined\"); + #endif + return 0; + }" + JXL_HWY_DISABLED_TARGETS_FORCED +) + +set(WARNINGS_AS_ERRORS_DEFAULT false) + +if((SANITIZER STREQUAL "msan") OR JPEGXL_EMSCRIPTEN) + set(BUNDLE_LIBPNG_DEFAULT YES) +else() + set(BUNDLE_LIBPNG_DEFAULT NO) +endif() + +# Standard cmake naming for building shared libraries. +get_property(SHARED_LIBS_SUPPORTED GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS) +option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" ${SHARED_LIBS_SUPPORTED}) + +set(JPEGXL_ENABLE_FUZZERS ${ENABLE_FUZZERS_DEFAULT} CACHE BOOL + "Build JPEGXL fuzzer targets.") +set(JPEGXL_ENABLE_DEVTOOLS false CACHE BOOL + "Build JPEGXL developer tools.") +set(JPEGXL_ENABLE_TOOLS true CACHE BOOL + "Build JPEGXL user tools: cjxl and djxl.") +set(JPEGXL_ENABLE_JPEGLI true CACHE BOOL + "Build jpegli library.") +set(JPEGXL_ENABLE_JPEGLI_LIBJPEG true CACHE BOOL + "Build libjpeg.so shared library based on jpegli.") +set(JPEGXL_INSTALL_JPEGLI_LIBJPEG false CACHE BOOL + "Install jpegli version of libjpeg.so system-wide.") +set(JPEGLI_LIBJPEG_LIBRARY_VERSION "62.3.0" CACHE STRING + "Library version of the libjpeg.so shared library that we build.") +set(JPEGLI_LIBJPEG_LIBRARY_SOVERSION "62" CACHE STRING + "Library so-version of the libjpeg.so shared library that we build.") +set(JPEGXL_ENABLE_DOXYGEN true CACHE BOOL + "Generate C API documentation using Doxygen.") +set(JPEGXL_ENABLE_MANPAGES true CACHE BOOL + "Build and install man pages for the command-line tools.") +set(JPEGXL_ENABLE_BENCHMARK true CACHE BOOL + "Build JPEGXL benchmark tools.") +set(JPEGXL_ENABLE_EXAMPLES true CACHE BOOL + "Build JPEGXL library usage examples.") +set(JPEGXL_BUNDLE_LIBPNG ${BUNDLE_LIBPNG_DEFAULT} CACHE BOOL + "Build libpng from source and link it statically.") +set(JPEGXL_ENABLE_JNI true CACHE BOOL + "Build JPEGXL JNI Java wrapper, if Java dependencies are installed.") +set(JPEGXL_ENABLE_SJPEG true CACHE BOOL + "Build JPEGXL with support for encoding with sjpeg.") +set(JPEGXL_ENABLE_OPENEXR true CACHE BOOL + "Build JPEGXL with support for OpenEXR if available.") +set(JPEGXL_ENABLE_SKCMS true CACHE BOOL + "Build with skcms instead of lcms2.") +set(JPEGXL_BUNDLE_SKCMS true CACHE BOOL + "When building with skcms, bundle it into libjxl.a.") +set(JPEGXL_ENABLE_VIEWERS false CACHE BOOL + "Build JPEGXL viewer tools for evaluation.") +set(JPEGXL_ENABLE_TCMALLOC ${ENABLE_TCMALLOC_DEFAULT} CACHE BOOL + "Build JPEGXL using gperftools (tcmalloc) allocator.") +set(JPEGXL_ENABLE_PLUGINS false CACHE BOOL + "Build third-party plugins to support JPEG XL in other applications.") +set(JPEGXL_ENABLE_COVERAGE false CACHE BOOL + "Enable code coverage tracking for libjxl. This also enables debug and disables optimizations.") +set(JPEGXL_ENABLE_SIZELESS_VECTORS false CACHE BOOL + "Builds in support for SVE/RVV vectorization") +set(JPEGXL_ENABLE_TRANSCODE_JPEG true CACHE BOOL + "Builds in support for decoding transcoded JXL files back to JPEG,\ + disabling it makes the decoder reject JXL_DEC_JPEG_RECONSTRUCTION events,\ + (default enabled)") +set(JPEGXL_ENABLE_BOXES true CACHE BOOL + "Builds in support for decoding boxes in JXL files,\ + disabling it makes the decoder reject JXL_DEC_BOX events,\ + (default enabled)") +set(JPEGXL_STATIC false CACHE BOOL + "Build tools as static binaries.") +set(JPEGXL_WARNINGS_AS_ERRORS ${WARNINGS_AS_ERRORS_DEFAULT} CACHE BOOL + "Treat warnings as errors during compilation.") +set(JPEGXL_DEP_LICENSE_DIR "" CACHE STRING + "Directory where to search for system dependencies \"copyright\" files.") +set(JPEGXL_FORCE_NEON false CACHE BOOL + "Set flags to enable NEON in arm if not enabled by your toolchain.") +set(JPEGXL_TEST_TOOLS false CACHE BOOL + "Run scripts that test the encoding / decoding tools.") +set(JPEGXL_ENABLE_AVX512 false CACHE BOOL + "Build with AVX512 support (faster on CPUs that support it, but larger binary size).") +set(JPEGXL_ENABLE_AVX512_ZEN4 false CACHE BOOL + "Build with Zen4-optimized AVX512 support (faster on CPUs that support it, but larger binary size).") + +# Force system dependencies. +set(JPEGXL_FORCE_SYSTEM_BROTLI false CACHE BOOL + "Force using system installed brotli instead of third_party/brotli source.") +set(JPEGXL_FORCE_SYSTEM_GTEST false CACHE BOOL + "Force using system installed googletest (gtest/gmock) instead of third_party/googletest source.") +set(JPEGXL_FORCE_SYSTEM_LCMS2 false CACHE BOOL + "Force using system installed lcms2 instead of third_party/lcms source.") +set(JPEGXL_FORCE_SYSTEM_HWY false CACHE BOOL + "Force using system installed highway (libhwy-dev) instead of third_party/highway source.") + +# Check minimum compiler versions. Older compilers are not supported and fail +# with hard to understand errors. +if (NOT CMAKE_C_COMPILER_ID STREQUAL CMAKE_CXX_COMPILER_ID) + message(FATAL_ERROR "Different C/C++ compilers set: " + "${CMAKE_C_COMPILER_ID} vs ${CMAKE_CXX_COMPILER_ID}") +endif() +if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + # Android NDK's toolchain.cmake fakes the clang version in + # CMAKE_CXX_COMPILER_VERSION with an incorrect number, so ignore this. + if (NOT CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION MATCHES "clang" + AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5) + message(FATAL_ERROR + "Minimum Clang version required is Clang 5, please update.") + endif() +elseif (CMAKE_CXX_COMPILER_ID MATCHES "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7) + message(FATAL_ERROR + "Minimum GCC version required is 7, please update.") + endif() +endif() + +message(STATUS + "Compiled IDs C:${CMAKE_C_COMPILER_ID}, C++:${CMAKE_CXX_COMPILER_ID}") + +# Always disable SSSE3 since it is rare to have SSSE3 but not SSE4 +set(HWY_DISABLED_TARGETS "HWY_SSSE3") +if (NOT JPEGXL_ENABLE_AVX512) + message(STATUS "Disabled AVX512 (set JPEGXL_ENABLE_AVX512 to enable it)") + set(HWY_DISABLED_TARGETS "${HWY_DISABLED_TARGETS}|HWY_AVX3") + add_definitions(-DFJXL_ENABLE_AVX512=0) +endif() +if (NOT JPEGXL_ENABLE_AVX512_ZEN4) + message(STATUS "Disabled AVX512_ZEN4 (set JPEGXL_ENABLE_AVX512_ZEN4 to enable it)") + set(HWY_DISABLED_TARGETS "${HWY_DISABLED_TARGETS}|HWY_AVX3_ZEN4") +endif() + + + +# CMAKE_EXPORT_COMPILE_COMMANDS is used to generate the compilation database +# used by clang-tidy. +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +if(JPEGXL_STATIC) + set(BUILD_SHARED_LIBS 0) + # Clang developers say that in case to use "static" we have to build stdlib + # ourselves; for real use case we don't care about stdlib, as it is "granted", + # so just linking all other libraries is fine. + if (NOT MSVC AND NOT APPLE) + set(CMAKE_FIND_LIBRARY_SUFFIXES .a) + set(CMAKE_EXE_LINKER_FLAGS + "${CMAKE_EXE_LINKER_FLAGS} -static -static-libgcc -static-libstdc++") + endif() +endif() # JPEGXL_STATIC + +# Threads +set(THREADS_PREFER_PTHREAD_FLAG YES) +find_package(Threads REQUIRED) + +# These settings are important to drive check_cxx_source_compiles +# See CMP0067 (min cmake version is 3.10 anyway) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CXX_STANDARD_REQUIRED YES) + +# Atomics +find_package(Atomics REQUIRED) + +if(JPEGXL_STATIC) + if (MINGW) + # In MINGW libstdc++ uses pthreads directly. When building statically a + # program (regardless of whether the source code uses pthread or not) the + # toolchain will add stdc++ and pthread to the linking step but stdc++ will + # be linked statically while pthread will be linked dynamically. + # To avoid this and have pthread statically linked with need to pass it in + # the command line with "-Wl,-Bstatic -lpthread -Wl,-Bdynamic" but the + # linker will discard it if not used by anything else up to that point in + # the linker command line. If the program or any dependency don't use + # pthread directly -lpthread is discarded and libstdc++ (added by the + # toolchain later) will then use the dynamic version. For this we also need + # to pass -lstdc++ explicitly before -lpthread. For pure C programs -lstdc++ + # will be discarded anyway. + # This adds these flags as dependencies for *all* targets. Adding this to + # CMAKE_EXE_LINKER_FLAGS instead would cause them to be included before any + # object files and therefore discarded. This should be set in the + # INTERFACE_LINK_LIBRARIES of Threads::Threads but some third_part targets + # don't depend on it. + link_libraries(-Wl,-Bstatic -lstdc++ -lpthread -Wl,-Bdynamic) + elseif(CMAKE_USE_PTHREADS_INIT) + # "whole-archive" is not supported on OSX. + if (NOT APPLE) + # Set pthreads as a whole-archive, otherwise weak symbols in the static + # libraries will discard pthreads symbols leading to segmentation fault at + # runtime. + message(STATUS "Using -lpthread as --whole-archive") + set_target_properties(Threads::Threads PROPERTIES + INTERFACE_LINK_LIBRARIES + "-Wl,--whole-archive;-lpthread;-Wl,--no-whole-archive") + endif() + endif() +endif() # JPEGXL_STATIC + +if (JPEGXL_EMSCRIPTEN) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread") +endif() + +if (CXX_MACRO_PREFIX_MAP) + add_compile_options(-fmacro-prefix-map=${CMAKE_CURRENT_SOURCE_DIR}=.) +endif() + +if (CXX_NO_RTTI_SUPPORTED) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti") +endif() + +# Internal flags for coverage builds: +set(JPEGXL_COVERAGE_FLAGS) +set(JPEGXL_COVERAGE_LINK_FLAGS) + +if (MSVC) + # TODO(janwas): add flags + add_definitions(-D_CRT_SECURE_NO_WARNINGS) +else () + # Global compiler flags for all targets here and in subdirectories. + add_definitions( + # Avoid changing the binary based on the current time and date. + -D__DATE__="redacted" + -D__TIMESTAMP__="redacted" + -D__TIME__="redacted" + ) + + # TODO(eustas): JXL currently compiles, but does not pass tests... + if (NOT JXL_HWY_DISABLED_TARGETS_FORCED) + if (NOT JPEGXL_ENABLE_SIZELESS_VECTORS) + set(HWY_DISABLED_TARGETS "${HWY_DISABLED_TARGETS}|HWY_SVE|HWY_SVE2|HWY_SVE_256|HWY_SVE2_128|HWY_RVV") + endif() + add_definitions(-DHWY_DISABLED_TARGETS=\(${HWY_DISABLED_TARGETS}\)) + endif() + + # In CMake before 3.12 it is problematic to pass repeated flags like -Xclang. + # For this reason we place them in CMAKE_CXX_FLAGS instead. + # See https://gitlab.kitware.com/cmake/cmake/issues/15826 + + # Machine flags. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funwind-tables") + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -mrelax-all") + endif() + if (CXX_CONSTRUCTOR_ALIASES_SUPPORTED) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Xclang -mconstructor-aliases") + endif() + + if(WIN32) + # Not supported by clang-cl, but frame pointers are default on Windows + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") + endif() + + # CPU flags - remove once we have NEON dynamic dispatch + + # TODO(janwas): this also matches M1, but only ARMv7 is intended/needed. + if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm") + if(JPEGXL_FORCE_NEON) + # GCC requires these flags, otherwise __ARM_NEON is undefined. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ + -mfpu=neon-vfpv4 -mfloat-abi=hard") + endif() + endif() + + # Force build with optimizations in release mode. + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O2") + + add_compile_options( + # Ignore this to allow redefining __DATE__ and others. + -Wno-builtin-macro-redefined + + # Global warning settings. + -Wall + ) + + if (JPEGXL_WARNINGS_AS_ERRORS) + add_compile_options(-Werror) + endif () + + if(JPEGXL_ENABLE_COVERAGE) + set(JPEGXL_COVERAGE_FLAGS + -g -O0 -fprofile-arcs -ftest-coverage + -DJXL_ENABLE_ASSERT=0 -DJXL_ENABLE_CHECK=0 + ) + set(JPEGXL_COVERAGE_LINK_FLAGS + --coverage + ) + endif() # JPEGXL_ENABLE_COVERAGE +endif () # !MSVC + +include(GNUInstallDirs) + +# Separately build/configure testing frameworks and other third_party libraries +# to allow disabling tests in those libraries. +include(third_party/testing.cmake) +add_subdirectory(third_party) +# Copy the JXL license file to the output build directory. +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" + ${PROJECT_BINARY_DIR}/LICENSE.jpeg-xl COPYONLY) + +# Enable tests regardless of where they are defined. +enable_testing() +include(CTest) +# Specify default location of `testdata`: +if(NOT DEFINED JPEGXL_TEST_DATA_PATH) + set(JPEGXL_TEST_DATA_PATH "${PROJECT_SOURCE_DIR}/testdata") +endif() + +# Libraries. +add_subdirectory(lib) + +if(BUILD_TESTING) + # Script to run tests over the source code in bash. + find_program (BASH_PROGRAM bash) + if(BASH_PROGRAM) + add_test( + NAME bash_test + COMMAND ${BASH_PROGRAM} ${CMAKE_CURRENT_SOURCE_DIR}/bash_test.sh) + endif() +endif() # BUILD_TESTING + +# Documentation generated by Doxygen +if(JPEGXL_ENABLE_DOXYGEN) + find_package(Doxygen) + if(DOXYGEN_FOUND) + set(DOXYGEN_GENERATE_HTML "YES") + set(DOXYGEN_GENERATE_XML "YES") + set(DOXYGEN_STRIP_FROM_PATH "${CMAKE_CURRENT_SOURCE_DIR}/lib/include") + set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "README.md") + if(JPEGXL_WARNINGS_AS_ERRORS) + set(DOXYGEN_WARN_AS_ERROR "YES") + endif() + set(DOXYGEN_QUIET "YES") + doxygen_add_docs(doc + "${CMAKE_CURRENT_SOURCE_DIR}/lib/include" + "${CMAKE_CURRENT_SOURCE_DIR}/doc/api.txt" + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + COMMENT "Generating C API documentation") + + # Add sphinx doc build step for readthedocs.io (requires doxygen too). + find_program(SPHINX_BUILD_PROGRAM sphinx-build) + if(SPHINX_BUILD_PROGRAM) + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rtd/nonexistent" + COMMENT "Generating readthedocs.io output on ${CMAKE_CURRENT_BINARY_DIR}/rtd" + COMMAND ${SPHINX_BUILD_PROGRAM} -q -W -b html -j auto + ${CMAKE_SOURCE_DIR}/doc/sphinx + ${CMAKE_CURRENT_BINARY_DIR}/rtd + DEPENDS doc + ) + # This command runs the documentation generation every time since the output + # target file doesn't exist. + add_custom_target(rtd-html + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/rtd/nonexistent + ) + else() # SPHINX_BUILD_PROGRAM\ + message(WARNING "sphinx-build not found, skipping rtd documentation") + endif() # SPHINX_BUILD_PROGRAM + + else() + # Create a "doc" target for compatibility since "doc" is not otherwise added to + # the build when doxygen is not installed. + add_custom_target(doc false + COMMENT "Error: Can't generate doc since Doxygen not installed.") + endif() # DOXYGEN_FOUND +endif() # JPEGXL_ENABLE_DOXYGEN + +if(JPEGXL_ENABLE_MANPAGES) + find_program(ASCIIDOC a2x) + if(ASCIIDOC) + file(STRINGS "${ASCIIDOC}" ASCIIDOC_SHEBANG LIMIT_COUNT 1) + if(ASCIIDOC_SHEBANG MATCHES "/sh|/bash" OR MINGW) + set(ASCIIDOC_PY_FOUND ON) + # Run the program directly and set ASCIIDOC as empty. + set(ASCIIDOC_PY "${ASCIIDOC}") + set(ASCIIDOC "") + elseif(ASCIIDOC_SHEBANG MATCHES "python2") + find_package(Python2 COMPONENTS Interpreter) + set(ASCIIDOC_PY_FOUND "${Python2_Interpreter_FOUND}") + set(ASCIIDOC_PY Python2::Interpreter) + elseif(ASCIIDOC_SHEBANG MATCHES "python3") + find_package(Python3 COMPONENTS Interpreter) + set(ASCIIDOC_PY_FOUND "${Python3_Interpreter_FOUND}") + set(ASCIIDOC_PY Python3::Interpreter) + else() + find_package(Python COMPONENTS Interpreter QUIET) + if(NOT Python_Interpreter_FOUND) + find_program(ASCIIDOC_PY python) + if(ASCIIDOC_PY) + set(ASCIIDOC_PY_FOUND ON) + endif() + else() + set(ASCIIDOC_PY_FOUND "${Python_Interpreter_FOUND}") + set(ASCIIDOC_PY Python::Interpreter) + endif() + endif() + + if (ASCIIDOC_PY_FOUND) + set(MANPAGE_FILES "") + set(MANPAGES "") + foreach(PAGE IN ITEMS cjxl djxl) + # Invoking the Python interpreter ourselves instead of running the a2x binary + # directly is necessary on MSYS2, otherwise it is run through cmd.exe which + # does not recognize it. + add_custom_command( + OUTPUT "${PAGE}.1" + COMMAND "${ASCIIDOC_PY}" + ARGS ${ASCIIDOC} + --format manpage --destination-dir="${CMAKE_CURRENT_BINARY_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/doc/man/${PAGE}.txt" + MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/doc/man/${PAGE}.txt") + list(APPEND MANPAGE_FILES "${CMAKE_CURRENT_BINARY_DIR}/${PAGE}.1") + list(APPEND MANPAGES "${PAGE}.1") + endforeach() + add_custom_target(manpages ALL DEPENDS ${MANPAGES}) + install(FILES ${MANPAGE_FILES} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) + endif() # ASCIIDOC_PY_FOUND + else() + message(WARNING "asciidoc was not found, the man pages will not be installed.") + endif() # ASCIIDOC +endif() # JPEGXL_ENABLE_MANPAGES + +# Example usage code. +if (JPEGXL_ENABLE_EXAMPLES) + include(examples/examples.cmake) +endif () + +# Plugins for third-party software +if (JPEGXL_ENABLE_PLUGINS) + add_subdirectory(plugins) +endif () + +# Binary tools +add_subdirectory(tools) diff --git a/third-party/libjxl/libjxl/CODE_OF_CONDUCT.md b/third-party/libjxl/libjxl/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..b2d81a3214 --- /dev/null +++ b/third-party/libjxl/libjxl/CODE_OF_CONDUCT.md @@ -0,0 +1,93 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of +experience, education, socio-economic status, nationality, personal appearance, +race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban temporarily or permanently any +contributor for other behaviors that they deem inappropriate, threatening, +offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +This Code of Conduct also applies outside the project spaces when the Project +Steward has a reasonable belief that an individual's behavior may have a +negative impact on the project or its community. + +## Conflict Resolution + +We do not believe that all conflict is bad; healthy debate and disagreement +often yield positive results. However, it is never okay to be disrespectful or +to engage in behavior that violates the project’s code of conduct. + +If you see someone violating the code of conduct, you are encouraged to address +the behavior directly with those involved. Many issues can be resolved quickly +and easily, and this gives people more control over the outcome of their +dispute. If you are unable to resolve the matter for any reason, or if the +behavior is threatening or harassing, report it. We are dedicated to providing +an environment where participants feel welcome and safe. + +Reports should be directed to Jyrki Alakuijala , the +Project Steward(s) for JPEG XL. It is the Project Steward’s duty to +receive and address reported violations of the code of conduct. They will then +work with a committee consisting of representatives from the Open Source +Programs Office and the Google Open Source Strategy team. If for any reason you +are uncomfortable reaching out to the Project Steward, please email +opensource@google.com. + +We will investigate every complaint, but you may not receive a direct response. +We will use our discretion in determining when and how to follow up on reported +incidents, which may range from not taking action to permanent expulsion from +the project and project-sponsored spaces. We will notify the accused of the +report and provide them an opportunity to discuss it before any action is taken. +The identity of the reporter will be omitted from the details of the report +supplied to the accused. In potentially harmful situations, such as ongoing +harassment or threats to anyone's safety, we may take action without notice. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 1.4, +available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html diff --git a/third-party/libjxl/libjxl/CONTRIBUTING.md b/third-party/libjxl/libjxl/CONTRIBUTING.md new file mode 100644 index 0000000000..cb6459797c --- /dev/null +++ b/third-party/libjxl/libjxl/CONTRIBUTING.md @@ -0,0 +1,132 @@ +# Contributing to libjxl + +## Contributing with bug reports + +For security-related issues please see [SECURITY.md](SECURITY.md). + +We welcome suggestions, feature requests and bug reports. Before opening a new +issue please take a look if there is already an existing one in the following +link: + + * https://github.com/libjxl/libjxl/issues + +## Contributing with patches and Pull Requests + +We'd love to accept your contributions to the JPEG XL Project. Please read +through this section before sending a Pull Request. + +### Contributor License Agreements + +Our project is open source under the terms outlined in the [LICENSE](LICENSE) +and [PATENTS](PATENTS) files. Before we can accept your contributions, even for +small changes, there are just a few small guidelines you need to follow: + +Please fill out either the individual or corporate Contributor License Agreement +(CLA) with Google. JPEG XL Project is an an effort by multiple individuals and +companies, including the initial contributors Cloudinary and Google, but Google +is the legal entity in charge of receiving these CLA and relicensing this +software: + + * If you are an individual writing original source code and you're sure you + own the intellectual property, then you'll need to sign an [individual + CLA](https://code.google.com/legal/individual-cla-v1.0.html). + + * If you work for a company that wants to allow you to contribute your work, + then you'll need to sign a [corporate + CLA](https://code.google.com/legal/corporate-cla-v1.0.html). + +Follow either of the two links above to access the appropriate CLA and +instructions for how to sign and return it. Once we receive it, we'll be able +to accept your pull requests. + +***NOTE***: Only original source code from you and other people that have signed +the CLA can be accepted into the main repository. + +### License + +Contributions are licensed under the project's [LICENSE](LICENSE). Each new +file must include the following header when possible, with comment style adapted +to the language as needed: + +``` +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +``` + +### Code Reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. + +### Contribution philosophy + + * Prefer small changes, even if they don't implement a complete feature. Small + changes are easier to review and can be submitted faster. Think about what's + the smallest unit you can send that makes sense to review and submit in + isolation. For example, new modules that are not yet used by the tools but + have their own unittests are ok. If you have unrelated changes that + you discovered while working on something else, please send them in a + different Pull Request. If your are refactoring code and changing + functionality try to send the refactor first without any change in + functionality. Reviewers may ask you to split a Pull Request and it is + easier to create a smaller change from the beginning. + + * Describe your commits. Add a meaningful description to your commit message, explain what you are changing if it is not trivially obvious, but more importantly explain *why* you are making those changes. For example "Fix + build" is not a good commit message, describe what build and if it makes sense + why is this fixing it or why was it failing without this. It is very likely + that people far in the future without any context you have right now will be + looking at your commit trying to figure out why was the change introduced. If + related to an issue in this or another repository include a link to it. + + * Code Style: We follow the [Google C++ Coding + Style](https://google.github.io/styleguide/cppguide.html). A + [clang-format](https://clang.llvm.org/docs/ClangFormat.html) configuration + file is available to automatically format your code, you can invoke it with + the `./ci.sh lint` helper tool. + + * Testing: Test your change and explain in the commit message *how* your + commit was tested. For example adding unittests or in some cases just testing + with the existing ones is enough. In any case, mention what testing was + performed so reviewers can evaluate whether that's enough testing. In many + cases, testing that the Continuous Integration workflow passes is enough. + + * Make one commit per Pull Request / review, unless there's a good reason not + to. If you have multiple changes send multiple Pull Requests and each one can + have its own review. + + * When addressing comments from reviewers prefer to squash or fixup your + edits and force-push your commit. When merging changes into the repository we + don't want to include the history of code review back and forth changes or + typos. Reviewers can click on the "force-pushed" automatic comment on a Pull + Request to see the changes between versions. We use "Rebase and merge" policy + to keep a linear git history which is easier to reason about. + + * Your change must pass the build and test workflows. There's a `ci.sh` script + to help building and testing these configurations. See [building and + testing](doc/building_and_testing.md) for more details. + +### Contributing checklist. + + * Sign the CLA (only needed once per user, see above). + + * AUTHORS: If this is your first contribution, add your name or your + company name to the [AUTHORS](AUTHORS) file for copyright tracking purposes. + + * Style guide. Check `./ci.sh lint`. + + * Meaningful commit description: What and *why*, links to issues, testing + procedure. + + * Squashed multiple edits into a single commit. + + * Upload your changes to your fork and [create a Pull + Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request). + +# Community Guidelines + +This project follows [Google's Open Source Community +Guidelines](https://opensource.google.com/conduct/). diff --git a/third-party/libjxl/libjxl/CONTRIBUTORS b/third-party/libjxl/libjxl/CONTRIBUTORS new file mode 100644 index 0000000000..848096f921 --- /dev/null +++ b/third-party/libjxl/libjxl/CONTRIBUTORS @@ -0,0 +1,23 @@ +# This files lists individuals who made significant contributions to the JPEG XL +# code base, such as design, adding features, performing experiments, ... +# Small changes such as a small bugfix or fixing spelling errors are not +# included. If you'd like to be included in this file thanks to a significant +# contribution, feel free to send a pull request changing this file. +Alex Deymo +Alexander Rhatushnyak +Evgenii Kliuchnikov +Iulia-Maria Comșa +Jan Wassenberg +Jon Sneyers +Jyrki Alakuijala +Krzysztof Potempa +Lode Vandevenne +Luca Versari +Martin Bruse +Moritz Firsching +Renata Khasanova +Robert Obryk +Sami Boukortt +Sebastian Gomez-Gonzalez +Thomas Fischbacher +Zoltan Szabadka diff --git a/third-party/libjxl/libjxl/LICENSE b/third-party/libjxl/libjxl/LICENSE new file mode 100644 index 0000000000..c66034b105 --- /dev/null +++ b/third-party/libjxl/libjxl/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) the JPEG XL Project Authors. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third-party/libjxl/libjxl/PATENTS b/third-party/libjxl/libjxl/PATENTS new file mode 100644 index 0000000000..c95b8f4105 --- /dev/null +++ b/third-party/libjxl/libjxl/PATENTS @@ -0,0 +1,22 @@ +Additional IP Rights Grant (Patents) + +"This implementation" means the copyrightable works distributed by +Google as part of the JPEG XL project. + +Google hereby grants to You a perpetual, worldwide, non-exclusive, +no-charge, royalty-free, irrevocable (except as stated in this section) +patent license to make, have made, use, offer to sell, sell, import, +transfer and otherwise run, modify and propagate the contents of this +implementation of JPEG XL, where such license applies only to those patent +claims, both currently owned or controlled by Google and acquired in +the future, licensable by Google that are necessarily infringed by this +implementation of JPEG XL. This grant does not include claims that would be +infringed only as a consequence of further modification of this +implementation. If you or your agent or exclusive licensee institute or +order or agree to the institution of patent litigation against any +entity (including a cross-claim or counterclaim in a lawsuit) alleging +that this implementation of JPEG XL or any code incorporated within this +implementation of JPEG XL constitutes direct or contributory patent +infringement, or inducement of patent infringement, then any patent +rights granted to you under this License for this implementation of JPEG XL +shall terminate as of the date such litigation is filed. diff --git a/third-party/libjxl/libjxl/README.md b/third-party/libjxl/libjxl/README.md new file mode 100644 index 0000000000..1e9a9adbd1 --- /dev/null +++ b/third-party/libjxl/libjxl/README.md @@ -0,0 +1,133 @@ +# JPEG XL reference implementation + +[![Build/Test](https://github.com/libjxl/libjxl/actions/workflows/build_test.yml/badge.svg)]( +https://github.com/libjxl/libjxl/actions/workflows/build_test.yml) +[![Build/Test Cross](https://github.com/libjxl/libjxl/actions/workflows/build_test_cross.yml/badge.svg)]( +https://github.com/libjxl/libjxl/actions/workflows/build_test_cross.yml) +[![Conformance](https://github.com/libjxl/libjxl/actions/workflows/conformance.yml/badge.svg)]( +https://github.com/libjxl/libjxl/actions/workflows/conformance.yml) +[![CIFuzz](https://github.com/libjxl/libjxl/actions/workflows/fuzz.yml/badge.svg)]( +https://github.com/libjxl/libjxl/actions/workflows/fuzz.yml) +[![Releases](https://github.com/libjxl/libjxl/actions/workflows/release.yaml/badge.svg)]( +https://github.com/libjxl/libjxl/actions/workflows/release.yaml) +[![Doc](https://readthedocs.org/projects/libjxl/badge/?version=latest)]( +https://libjxl.readthedocs.io/en/latest/?badge=latest) +[![codecov](https://codecov.io/gh/libjxl/libjxl/branch/main/graph/badge.svg)]( +https://codecov.io/gh/libjxl/libjxl) + +JXL logo + +This repository contains a reference implementation of JPEG XL (encoder and +decoder), called `libjxl`. This software library is +[used by many applications that support JPEG XL](doc/software_support.md). + +JPEG XL was standardized in 2022 as [ISO/IEC 18181](https://jpeg.org/jpegxl/workplan.html). +The [core codestream](doc/format_overview.md#codestream-features) is specified in 18181-1, +the [file format](doc/format_overview.md#file-format-features) in 18181-2. +[Decoder conformance](https://github.com/libjxl/conformance) is defined in 18181-3, +and 18181-4 is the [reference software](https://github.com/libjxl/libjxl). + +The library API, command line options, and tools in this repository are subject +to change, however files encoded with `cjxl` conform to the JPEG XL specification +and can be decoded with current and future `djxl` decoders or the `libjxl` decoding library. + +## Installation + +In most Linux distributions, installing `libjxl` is just a matter of using the package management system. +For example in Debian-based distributions: `apt install libjxl-tools` will install `cjxl` and `djxl` +and other tools like `benchmark_xl` are available in the package `libjxl-devtools`. +On MacOS, you can use [Homebrew](https://brew.sh/): `brew install jpeg-xl`. + +[![libjxl packaging status](https://repology.org/badge/vertical-allrepos/libjxl.svg?exclude_unsupported=1&columns=3&exclude_sources=modules,site&header=libjxl%20packaging%20status)](https://repology.org/project/libjxl/versions) + +From the [releases page](https://github.com/libjxl/libjxl/releases/) the following can be downloaded: + - Windows binaries + - Debian and Ubuntu .deb packages + +Of course you can also [build libjxl from sources](BUILDING.md). + + +## Usage + +To encode a source image to JPEG XL with default settings: + +```bash +cjxl input.png output.jxl +``` + +The desired visual fidelity can be selected using the `--distance` parameter +(in units of just-noticeable difference, where 0 is lossless and the most useful lossy range is 0.5 .. 3.0), +or using `--quality` (on a scale from 0 to 100, roughly matching libjpeg). +The [encode effort](doc/encode_effort.md) can be selected using the `--effort` parameter. + +For more settings run `cjxl --help` or for a full list of options +run `cjxl -v -v --help`. + +To decode a JPEG XL file run: + +```bash +djxl input.jxl output.png +``` + +When possible `cjxl`/`djxl` are able to read/write the following +image formats: .exr, .gif, .jpeg/.jpg, .pfm, .pgm/.ppm, .pgx, .png. +Specifically for JPEG files, the default `cjxl` behavior is to apply lossless +recompression and the default `djxl` behavior is to reconstruct the original +JPEG file (when the extension of the output file is .jpg). + +### Benchmarking + +For speed benchmarks on single images in single or multi-threaded decoding +`djxl` can print decoding speed information. See `djxl --help` for details +on the decoding options and note that the output image is optional for +benchmarking purposes. + +For more comprehensive benchmarking options, see the +[benchmarking guide](doc/benchmarking.md). + +### Library API + +Besides the `libjxl` library [API documentation](https://libjxl.readthedocs.io/en/latest/), +there are [example applications](examples/) and [plugins](plugins/) that can be used as a reference or +starting point for developers who wish to integrate `libjxl` in their project. + + +## License + +This software is available under a 3-clause BSD license which can be found in +the [LICENSE](LICENSE) file, with an "Additional IP Rights Grant" as outlined in +the [PATENTS](PATENTS) file. + +Please note that the PATENTS file only mentions Google since Google is the legal +entity receiving the Contributor License Agreements (CLA) from all contributors +to the JPEG XL Project, including the initial main contributors to the JPEG XL +format: Cloudinary and Google. + +## Additional documentation + +### Codec description + +* [JPEG XL Format Overview](doc/format_overview.md) +* [Introductory paper](https://www.spiedigitallibrary.org/proceedings/Download?fullDOI=10.1117%2F12.2529237) (open-access) +* [XL Overview](doc/xl_overview.md) - a brief introduction to the source code modules +* [JPEG XL white paper](https://ds.jpeg.org/whitepapers/jpeg-xl-whitepaper.pdf) +* [JPEG XL official website](https://jpeg.org/jpegxl) +* [JPEG XL community website](https://jpegxl.info) + +### Development process + +* [More information on testing/build options](doc/building_and_testing.md) +* [Git guide for JPEG XL](doc/developing_in_github.md) - for developers +* [Fuzzing](doc/fuzzing.md) - for developers +* [Building Web Assembly artifacts](doc/building_wasm.md) +* [Test coverage on Codecov.io](https://app.codecov.io/gh/libjxl/libjxl) - for + developers +* [libjxl documentation on readthedocs.io](https://libjxl.readthedocs.io/) + +### Contact + +If you encounter a bug or other issue with the software, please open an Issue here. + +There is a [subreddit about JPEG XL](https://www.reddit.com/r/jpegxl/), and +informal chatting with developers and early adopters of `libjxl` can be done on the +[JPEG XL Discord server](https://discord.gg/DqkQgDRTFu). diff --git a/third-party/libjxl/libjxl/SECURITY.md b/third-party/libjxl/libjxl/SECURITY.md new file mode 100644 index 0000000000..d03012a63a --- /dev/null +++ b/third-party/libjxl/libjxl/SECURITY.md @@ -0,0 +1,73 @@ +# Security and Vulnerability Policy for libjxl + +## TL;DR: + +CPE prefix: `cpe:2.3:a:libjxl_project:libjxl` + +To report a security issue, please email libjxl-security@google.com. + +Include in your email a description of the issue, the steps you took to create +the issue, affected versions, and if known, mitigations for the issue. Our +vulnerability management team will acknowledge receiving your email within 3 +working days. + +This project follows a 90 day disclosure timeline. + +For all other bugs, where there are no security implications about disclosing +the unpatched bug, open a [new issue](https://github.com/libjxl/libjxl/issues) +checking first for existing similar issues. If in doubt about the security +impact of a bug you discovered, email first. + +## Policy overview + +libjxl's Security Policy is based on the [Google Open Source program +guidelines](https://github.com/google/oss-vulnerability-guide) for coordinated +vulnerability disclosure. + +Early versions of `libjxl` had a different security policy that didn't provide +security and vulnerability disclosure support. Versions up to and including +0.3.7 are not covered and won't receive any security advisory. + +Only released versions, starting from version 0.5, are covered by this policy. +Development branches, arbitrary commits from `main` branch or even releases with +backported features externally patched on top are not covered. Only those +versions with a release tag in `libjxl`'s repository are covered, starting from +version 0.5. + +## What's a "Security bug" + +A security bug is a bug that can potentially be exploited to let an attacker +gain unauthorized access or privileges such as disclosing information or +arbitrary code execution. Not all fuzzer-found bugs and not all assert() +failures are considered security bugs in libjxl. For a detailed explanation and +examples see our [Security Vulnerabilities Playbook](doc/vuln_playbook.md). + +## What to expect + +To report a security issue, please email libjxl-security@google.com with all the +details about the bug you encountered. + + * Include a description of the issue, steps to reproduce, etc. Compiler + versions, flags, exact version used and even CPU are often relevant given our + usage of SIMD and run-time dispatch of SIMD instructions. + + * A member of our security team will reply to you within 3 business days. Note + that business days are different in different countries. + + * We will evaluate the issue and we may require more input from your side to + reproduce it. + + * If the issue fits in the description of a security bug, we will issue a + CVE, publish a fix and make a new minor or patch release with it. There is + a maximum of 90 day disclosure timeline, we ask you to not publish the + details before the 90 day deadline or the release date (whichever comes + first). + + * In the case that we publish a CVE we will credit the external researcher who + reported the issue. When reporting security issues please let us know if you + need to include specific information while doing so, like for example a + company affiliation. + +Our security team follows the [Security Vulnerabilities +Playbook](doc/vuln_playbook.md). For more details about the process and policies +please take a look at it. diff --git a/third-party/libjxl/libjxl/WORKSPACE b/third-party/libjxl/libjxl/WORKSPACE new file mode 100644 index 0000000000..ba493442ae --- /dev/null +++ b/third-party/libjxl/libjxl/WORKSPACE @@ -0,0 +1,768 @@ +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository", "new_git_repository") + +http_archive( + name = "bazel_skylib", + sha256 = "74d544d96f4a5bb630d465ca8bbcfe231e3594e5aae57e1edbf17a6eb3ca2506", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + "https://github.com/bazelbuild/bazel-skylib/releases/download/1.3.0/bazel-skylib-1.3.0.tar.gz", + ], +) + +load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace") + +bazel_skylib_workspace() + +local_repository( + name = "highway", + path = "third_party/highway", +) + +local_repository( + name = "brotli", + path = "third_party/brotli", +) + +new_local_repository( + name = "googletest", + build_file = "third_party/googletest/BUILD.bazel", + path = "third_party/googletest", +) + +new_local_repository( + name = "skcms", + build_file_content = """ +cc_library( + name = "skcms", + srcs = [ + "skcms.cc", + "skcms_internal.h", + "src/Transform_inl.h", + ], + hdrs = ["skcms.h"], + visibility = ["//visibility:public"], +) + """, + path = "third_party/skcms", +) + +new_git_repository( + name = "zlib", + build_file_content = """ +cc_library( + name = "zlib", + defines = ["HAVE_UNISTD_H"], + srcs = [ + "adler32.c", + "compress.c", + "crc32.c", + "crc32.h", + "deflate.c", + "deflate.h", + "gzclose.c", + "gzguts.h", + "gzlib.c", + "gzread.c", + "gzwrite.c", + "infback.c", + "inffast.c", + "inffast.h", + "inffixed.h", + "inflate.c", + "inflate.h", + "inftrees.c", + "inftrees.h", + "trees.c", + "trees.h", + "uncompr.c", + "zconf.h", + "zutil.c", + "zutil.h", + ], + hdrs = ["zlib.h"], + includes = ["."], + visibility = ["//visibility:public"], +) + """, + remote = "https://github.com/madler/zlib", + tag = "v1.2.13", +) + +new_local_repository( + name = "png", + build_file_content = """ +genrule( + name = "pnglibconf", + srcs = ["scripts/pnglibconf.h.prebuilt"], + outs = ["pnglibconf.h"], + cmd = "cp -f $< $@", +) +cc_library( + name = "png", + srcs = [ + "png.c", + "pngconf.h", + "pngdebug.h", + "pngerror.c", + "pngget.c", + "pnginfo.h", + ":pnglibconf", + "pngmem.c", + "pngpread.c", + "pngpriv.h", + "pngread.c", + "pngrio.c", + "pngrtran.c", + "pngrutil.c", + "pngset.c", + "pngstruct.h", + "pngtrans.c", + "pngwio.c", + "pngwrite.c", + "pngwtran.c", + "pngwutil.c", + ], + hdrs = ["png.h"], + includes = ["."], + linkopts = ["-lm"], + visibility = ["//visibility:public"], + deps = ["@zlib//:zlib"], +) + """, + path = "third_party/libpng", +) + +new_git_repository( + name = "libjpeg_turbo", + build_file_content = """ +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +SUBSTITUTIONS = { + "@BUILD@" : "20230208", + "@CMAKE_PROJECT_NAME@" : "libjpeg-turbo", + "@COPYRIGHT_YEAR@" : "2023", + "@INLINE@" : "__inline__", + "@JPEG_LIB_VERSION@" : "62", + "@LIBJPEG_TURBO_VERSION_NUMBER@" : "2001091", + "@SIZE_T@" : "8", + "@THREAD_LOCAL@" : "__thread", + "@VERSION@" : "2.1.91", +} +YES_DEFINES = [ + "C_ARITH_CODING_SUPPORTED", "D_ARITH_CODING_SUPPORTED", + "HAVE_BUILTIN_CTZL", "MEM_SRCDST_SUPPORTED" +] +NO_DEFINES = [ + "WITH_SIMD", "RIGHT_SHIFT_IS_UNSIGNED", "HAVE_INTRIN_H" +] +SUBSTITUTIONS.update({ + "#cmakedefine " + key : "#define " + key for key in YES_DEFINES +}) +SUBSTITUTIONS.update({ + "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES +}) +[ + expand_template( + name = "expand_" + src, + template = src + ".in", + out = src, + substitutions = SUBSTITUTIONS, + visibility = ["//visibility:public"], + ) for src in ["jconfig.h", "jconfigint.h", "jversion.h"] +] +JPEG16_SOURCES = [ + "jccolor.c", + "jcdiffct.c", + "jclossls.c", + "jcmainct.c", + "jcprepct.c", + "jcsample.c", + "jdcolor.c", + "jddiffct.c", + "jdlossls.c", + "jdmainct.c", + "jdmerge.c", + "jdpostct.c", + "jdsample.c", + "jquant1.c", + "jquant2.c", + "jutils.c", +] +JPEG12_SOURCES = JPEG16_SOURCES + [ + "jccoefct.c", + "jcdctmgr.c", + "jdcoefct.c", + "jddctmgr.c", + "jfdctfst.c", + "jfdctint.c", + "jidctflt.c", + "jidctfst.c", + "jidctint.c", + "jidctred.c", +] +JPEG_SOURCES = JPEG12_SOURCES + [ + "jaricom.c", + "jcapimin.c", + "jcapistd.c", + "jcarith.c", + "jchuff.c", + "jcicc.c", + "jcinit.c", + "jclhuff.c", + "jcmarker.c", + "jcmaster.c", + "jcomapi.c", + "jcparam.c", + "jcphuff.c", + "jdapimin.c", + "jdapistd.c", + "jdarith.c", + "jdatadst.c", + "jdatasrc.c", + "jdhuff.c", + "jdicc.c", + "jdinput.c", + "jdlhuff.c", + "jdmarker.c", + "jdmaster.c", + "jdphuff.c", + "jdtrans.c", + "jerror.c", + "jfdctflt.c", + "jmemmgr.c", + "jmemnobs.c", +] +JPEG_HEADERS = [ + "jccolext.c", + "jchuff.h", + "jcmaster.h", + "jconfig.h", + "jconfigint.h", + "jdcoefct.h", + "jdcol565.c", + "jdcolext.c", + "jdct.h", + "jdhuff.h", + "jdmainct.h", + "jdmaster.h", + "jdmerge.h", + "jdmrg565.c", + "jdmrgext.c", + "jdsample.h", + "jerror.h", + "jinclude.h", + "jlossls.h", + "jmemsys.h", + "jmorecfg.h", + "jpeg_nbits_table.h", + "jpegapicomp.h", + "jpegint.h", + "jpeglib.h", + "jsamplecomp.h", + "jsimd.h", + "jsimddct.h", + "jstdhuff.c", + "jversion.h", +] +cc_library( + name = "jpeg16", + srcs = JPEG16_SOURCES, + hdrs = JPEG_HEADERS, + local_defines = ["BITS_IN_JSAMPLE=16"], + visibility = ["//visibility:public"], +) +cc_library( + name = "jpeg12", + srcs = JPEG12_SOURCES, + hdrs = JPEG_HEADERS, + local_defines = ["BITS_IN_JSAMPLE=12"], + visibility = ["//visibility:public"], +) +cc_library( + name = "jpeg", + srcs = JPEG_SOURCES, + hdrs = JPEG_HEADERS, + deps = [":jpeg16", ":jpeg12"], + includes = ["."], + visibility = ["//visibility:public"], +) +exports_files([ + "jmorecfg.h", + "jpeglib.h", +]) + """, + remote = "https://github.com/libjpeg-turbo/libjpeg-turbo.git", + tag = "2.1.91", +) + +http_archive( + name = "gif", + build_file_content = """ +cc_library( + name = "gif", + srcs = [ + "dgif_lib.c", "egif_lib.c", "gifalloc.c", "gif_err.c", "gif_font.c", + "gif_hash.c", "openbsd-reallocarray.c", "gif_hash.h", + "gif_lib_private.h" + ], + hdrs = ["gif_lib.h"], + includes = ["."], + visibility = ["//visibility:public"], +) + """, + sha256 = "31da5562f44c5f15d63340a09a4fd62b48c45620cd302f77a6d9acf0077879bd", + strip_prefix = "giflib-5.2.1", + url = "https://netcologne.dl.sourceforge.net/project/giflib/giflib-5.2.1.tar.gz", +) + +new_git_repository( + name = "imath", + build_file_content = """ +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +SUBSTITUTIONS = { + "@IMATH_INTERNAL_NAMESPACE@": "Imath_3_1", + "@IMATH_LIB_VERSION@": "3.1.4", + "@IMATH_NAMESPACE_CUSTOM@": "0", + "@IMATH_NAMESPACE@": "Imath", + "@IMATH_PACKAGE_NAME@": "Imath 3.1.4", + "@IMATH_VERSION_MAJOR@": "3", + "@IMATH_VERSION_MINOR@": "1", + "@IMATH_VERSION_PATCH@": "4", + "@IMATH_VERSION@": "3.1.4", +} +YES_DEFINES = [ + "IMATH_HALF_USE_LOOKUP_TABLE", "IMATH_ENABLE_API_VISIBILITY", +] +NO_DEFINES = [ + "IMATH_HAVE_LARGE_STACK", +] +ONE_DEFINES = [ + "IMATH_USE_NOEXCEPT", +] +SUBSTITUTIONS.update({ + "#cmakedefine " + key : "#define " + key for key in YES_DEFINES +}) +SUBSTITUTIONS.update({ + "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES +}) +SUBSTITUTIONS.update({ + "#cmakedefine01 " + key : "#define " + key + " 1" for key in ONE_DEFINES +}) +expand_template( + name = "expand_ImathConfig", + template = "config/ImathConfig.h.in", + out = "src/Imath/ImathConfig.h", + substitutions = SUBSTITUTIONS, +) +cc_library( + name = "Imath", + srcs = [ + "src/Imath/ImathColorAlgo.cpp", + ":src/Imath/ImathConfig.h", + "src/Imath/ImathFun.cpp", + "src/Imath/ImathMatrixAlgo.cpp", + "src/Imath/ImathRandom.cpp", + "src/Imath/half.cpp", + "src/Imath/toFloat.h", + ], + hdrs = [ + "src/Imath/ImathBox.h", + "src/Imath/ImathBoxAlgo.h", + "src/Imath/ImathColor.h", + "src/Imath/ImathColorAlgo.h", + "src/Imath/ImathEuler.h", + "src/Imath/ImathExport.h", + "src/Imath/ImathForward.h", + "src/Imath/ImathFrame.h", + "src/Imath/ImathFrustum.h", + "src/Imath/ImathFrustumTest.h", + "src/Imath/ImathFun.h", + "src/Imath/ImathGL.h", + "src/Imath/ImathGLU.h", + "src/Imath/ImathInt64.h", + "src/Imath/ImathInterval.h", + "src/Imath/ImathLine.h", + "src/Imath/ImathLineAlgo.h", + "src/Imath/ImathMath.h", + "src/Imath/ImathMatrix.h", + "src/Imath/ImathMatrixAlgo.h", + "src/Imath/ImathNamespace.h", + "src/Imath/ImathPlane.h", + "src/Imath/ImathPlatform.h", + "src/Imath/ImathQuat.h", + "src/Imath/ImathRandom.h", + "src/Imath/ImathRoots.h", + "src/Imath/ImathShear.h", + "src/Imath/ImathSphere.h", + "src/Imath/ImathTypeTraits.h", + "src/Imath/ImathVec.h", + "src/Imath/ImathVecAlgo.h", + "src/Imath/half.h", + "src/Imath/halfFunction.h", + "src/Imath/halfLimits.h", + ], + includes = ["src/Imath"], + visibility = ["//visibility:public"], +) +""", + remote = "https://github.com/AcademySoftwareFoundation/imath", + tag = "v3.1.5", +) + +new_git_repository( + name = "openexr", + build_file_content = """ +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +SUBSTITUTIONS = { + "@IEX_INTERNAL_NAMESPACE@": "Iex_3_0", + "@IEX_NAMESPACE_CUSTOM@": "0", + "@IEX_NAMESPACE@": "Iex", + "@ILMTHREAD_INTERNAL_NAMESPACE@": "IlmThread_3_0", + "@ILMTHREAD_NAMESPACE_CUSTOM@": "0", + "@ILMTHREAD_NAMESPACE@": "IlmThread", + "@OPENEXR_IMF_NAMESPACE@": "Imf", + "@OPENEXR_INTERNAL_IMF_NAMESPACE@": "Imf_3_0", + "@OPENEXR_LIB_VERSION@": "3.0.4", + "@OPENEXR_NAMESPACE_CUSTOM@": "0", + "@OPENEXR_PACKAGE_NAME@": "OpenEXR 3.0.4", + "@OPENEXR_VERSION_EXTRA@": "", + "@OPENEXR_VERSION_MAJOR@": "3", + "@OPENEXR_VERSION_MINOR@": "0", + "@OPENEXR_VERSION_PATCH@": "4", + "@OPENEXR_VERSION@": "3.0.4", +} +YES_DEFINES = [ + "OPENEXR_ENABLE_API_VISIBILITY", "OPENEXR_IMF_HAVE_COMPLETE_IOMANIP", + "OPENEXR_HAVE_LARGE_STACK", +] +NO_DEFINES = [ + "HAVE_UCONTEXT_H", "IEX_HAVE_CONTROL_REGISTER_SUPPORT", + "IEX_HAVE_SIGCONTEXT_CONTROL_REGISTER_SUPPORT", "OPENEXR_IMF_HAVE_DARWIN", + "OPENEXR_IMF_HAVE_GCC_INLINE_ASM_AVX", "OPENEXR_IMF_HAVE_LINUX_PROCFS", + "OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN", +] +ONE_DEFINES = [ + "ILMTHREAD_THREADING_ENABLED", +] +ZERO_DEFINES = [ + "ILMTHREAD_HAVE_POSIX_SEMAPHORES", +] +SUBSTITUTIONS.update({ + "#cmakedefine " + key : "#define " + key for key in YES_DEFINES +}) +SUBSTITUTIONS.update({ + "#cmakedefine " + key : "// #define " + key for key in NO_DEFINES +}) +SUBSTITUTIONS.update({ + "#cmakedefine01 " + key : "#define " + key + " 1" for key in ONE_DEFINES +}) +SUBSTITUTIONS.update({ + "#cmakedefine01 " + key : "#define " + key + " 0" for key in ZERO_DEFINES +}) +[ + expand_template( + name = "expand_" + item, + template = "cmake/" + item + ".h.in", + out = "src/lib/Iex/" + item + ".h", + substitutions = SUBSTITUTIONS, + ) for item in ["IexConfig", "IexConfigInternal"] +] +[ +expand_template( + name = "expand_" + item, + template = "cmake/" + item + ".h.in", + out = "src/lib/IlmThread/" + item + ".h", + substitutions = SUBSTITUTIONS, + ) for item in ["IlmThreadConfig"] +] +[ +expand_template( + name = "expand_" + item, + template = "cmake/" + item + ".h.in", + out = "src/lib/OpenEXR/" + item + ".h", + substitutions = SUBSTITUTIONS, + ) for item in ["OpenEXRConfig", "OpenEXRConfigInternal"] +] +cc_library( + name = "Iex", + srcs = [ + "src/lib/Iex/IexBaseExc.cpp", + "src/lib/Iex/IexMathFloatExc.cpp", + "src/lib/Iex/IexMathFpu.cpp", + "src/lib/Iex/IexThrowErrnoExc.cpp", + ], + hdrs = [ + "src/lib/Iex/Iex.h", + "src/lib/Iex/IexBaseExc.h", + ":src/lib/Iex/IexConfig.h", + ":src/lib/Iex/IexConfigInternal.h", + "src/lib/Iex/IexErrnoExc.h", + "src/lib/Iex/IexExport.h", + "src/lib/Iex/IexForward.h", + "src/lib/Iex/IexMacros.h", + "src/lib/Iex/IexMathExc.h", + "src/lib/Iex/IexMathFloatExc.h", + "src/lib/Iex/IexMathFpu.h", + "src/lib/Iex/IexMathIeeeExc.h", + "src/lib/Iex/IexNamespace.h", + "src/lib/Iex/IexThrowErrnoExc.h", + ":src/lib/OpenEXR/OpenEXRConfig.h", + ], + includes = [ + "src/lib/Iex", + "src/lib/OpenEXR", + ], +) + +cc_library( + name = "IlmThread", + srcs = [ + "src/lib/IlmThread/IlmThread.cpp", + "src/lib/IlmThread/IlmThreadPool.cpp", + "src/lib/IlmThread/IlmThreadSemaphore.cpp", + "src/lib/IlmThread/IlmThreadSemaphoreOSX.cpp", + "src/lib/IlmThread/IlmThreadSemaphorePosix.cpp", + "src/lib/IlmThread/IlmThreadSemaphorePosixCompat.cpp", + "src/lib/IlmThread/IlmThreadSemaphoreWin32.cpp", + ], + hdrs = [ + "src/lib/IlmThread/IlmThread.h", + ":src/lib/IlmThread/IlmThreadConfig.h", + "src/lib/IlmThread/IlmThreadExport.h", + "src/lib/IlmThread/IlmThreadForward.h", + "src/lib/IlmThread/IlmThreadMutex.h", + "src/lib/IlmThread/IlmThreadNamespace.h", + "src/lib/IlmThread/IlmThreadPool.h", + "src/lib/IlmThread/IlmThreadSemaphore.h", + ], + includes = ["src/lib/IlmThread"], + deps = [":Iex"], +) +cc_library( + name = "OpenEXR", + srcs = [ + "src/lib/OpenEXR/ImfAcesFile.cpp", + "src/lib/OpenEXR/ImfAttribute.cpp", + "src/lib/OpenEXR/ImfB44Compressor.cpp", + "src/lib/OpenEXR/ImfBoxAttribute.cpp", + "src/lib/OpenEXR/ImfCRgbaFile.cpp", + "src/lib/OpenEXR/ImfChannelList.cpp", + "src/lib/OpenEXR/ImfChannelListAttribute.cpp", + "src/lib/OpenEXR/ImfChromaticities.cpp", + "src/lib/OpenEXR/ImfChromaticitiesAttribute.cpp", + "src/lib/OpenEXR/ImfCompositeDeepScanLine.cpp", + "src/lib/OpenEXR/ImfCompressionAttribute.cpp", + "src/lib/OpenEXR/ImfCompressor.cpp", + "src/lib/OpenEXR/ImfConvert.cpp", + "src/lib/OpenEXR/ImfDeepCompositing.cpp", + "src/lib/OpenEXR/ImfDeepFrameBuffer.cpp", + "src/lib/OpenEXR/ImfDeepImageStateAttribute.cpp", + "src/lib/OpenEXR/ImfDeepScanLineInputFile.cpp", + "src/lib/OpenEXR/ImfDeepScanLineInputPart.cpp", + "src/lib/OpenEXR/ImfDeepScanLineOutputFile.cpp", + "src/lib/OpenEXR/ImfDeepScanLineOutputPart.cpp", + "src/lib/OpenEXR/ImfDeepTiledInputFile.cpp", + "src/lib/OpenEXR/ImfDeepTiledInputPart.cpp", + "src/lib/OpenEXR/ImfDeepTiledOutputFile.cpp", + "src/lib/OpenEXR/ImfDeepTiledOutputPart.cpp", + "src/lib/OpenEXR/ImfDoubleAttribute.cpp", + "src/lib/OpenEXR/ImfDwaCompressor.cpp", + "src/lib/OpenEXR/ImfEnvmap.cpp", + "src/lib/OpenEXR/ImfEnvmapAttribute.cpp", + "src/lib/OpenEXR/ImfFastHuf.cpp", + "src/lib/OpenEXR/ImfFloatAttribute.cpp", + "src/lib/OpenEXR/ImfFloatVectorAttribute.cpp", + "src/lib/OpenEXR/ImfFrameBuffer.cpp", + "src/lib/OpenEXR/ImfFramesPerSecond.cpp", + "src/lib/OpenEXR/ImfGenericInputFile.cpp", + "src/lib/OpenEXR/ImfGenericOutputFile.cpp", + "src/lib/OpenEXR/ImfHeader.cpp", + "src/lib/OpenEXR/ImfHuf.cpp", + "src/lib/OpenEXR/ImfIDManifest.cpp", + "src/lib/OpenEXR/ImfIDManifestAttribute.cpp", + "src/lib/OpenEXR/ImfIO.cpp", + "src/lib/OpenEXR/ImfInputFile.cpp", + "src/lib/OpenEXR/ImfInputPart.cpp", + "src/lib/OpenEXR/ImfInputPartData.cpp", + "src/lib/OpenEXR/ImfIntAttribute.cpp", + "src/lib/OpenEXR/ImfKeyCode.cpp", + "src/lib/OpenEXR/ImfKeyCodeAttribute.cpp", + "src/lib/OpenEXR/ImfLineOrderAttribute.cpp", + "src/lib/OpenEXR/ImfLut.cpp", + "src/lib/OpenEXR/ImfMatrixAttribute.cpp", + "src/lib/OpenEXR/ImfMisc.cpp", + "src/lib/OpenEXR/ImfMultiPartInputFile.cpp", + "src/lib/OpenEXR/ImfMultiPartOutputFile.cpp", + "src/lib/OpenEXR/ImfMultiView.cpp", + "src/lib/OpenEXR/ImfOpaqueAttribute.cpp", + "src/lib/OpenEXR/ImfOutputFile.cpp", + "src/lib/OpenEXR/ImfOutputPart.cpp", + "src/lib/OpenEXR/ImfOutputPartData.cpp", + "src/lib/OpenEXR/ImfPartType.cpp", + "src/lib/OpenEXR/ImfPizCompressor.cpp", + "src/lib/OpenEXR/ImfPreviewImage.cpp", + "src/lib/OpenEXR/ImfPreviewImageAttribute.cpp", + "src/lib/OpenEXR/ImfPxr24Compressor.cpp", + "src/lib/OpenEXR/ImfRational.cpp", + "src/lib/OpenEXR/ImfRationalAttribute.cpp", + "src/lib/OpenEXR/ImfRgbaFile.cpp", + "src/lib/OpenEXR/ImfRgbaYca.cpp", + "src/lib/OpenEXR/ImfRle.cpp", + "src/lib/OpenEXR/ImfRleCompressor.cpp", + "src/lib/OpenEXR/ImfScanLineInputFile.cpp", + "src/lib/OpenEXR/ImfStandardAttributes.cpp", + "src/lib/OpenEXR/ImfStdIO.cpp", + "src/lib/OpenEXR/ImfStringAttribute.cpp", + "src/lib/OpenEXR/ImfStringVectorAttribute.cpp", + "src/lib/OpenEXR/ImfSystemSpecific.cpp", + "src/lib/OpenEXR/ImfTestFile.cpp", + "src/lib/OpenEXR/ImfThreading.cpp", + "src/lib/OpenEXR/ImfTileDescriptionAttribute.cpp", + "src/lib/OpenEXR/ImfTileOffsets.cpp", + "src/lib/OpenEXR/ImfTiledInputFile.cpp", + "src/lib/OpenEXR/ImfTiledInputPart.cpp", + "src/lib/OpenEXR/ImfTiledMisc.cpp", + "src/lib/OpenEXR/ImfTiledOutputFile.cpp", + "src/lib/OpenEXR/ImfTiledOutputPart.cpp", + "src/lib/OpenEXR/ImfTiledRgbaFile.cpp", + "src/lib/OpenEXR/ImfTimeCode.cpp", + "src/lib/OpenEXR/ImfTimeCodeAttribute.cpp", + "src/lib/OpenEXR/ImfVecAttribute.cpp", + "src/lib/OpenEXR/ImfVersion.cpp", + "src/lib/OpenEXR/ImfWav.cpp", + "src/lib/OpenEXR/ImfZip.cpp", + "src/lib/OpenEXR/ImfZipCompressor.cpp", + "src/lib/OpenEXR/b44ExpLogTable.h", + "src/lib/OpenEXR/dwaLookups.h", + ], + hdrs = [ + ":src/lib/Iex/IexConfig.h", + ":src/lib/Iex/IexConfigInternal.h", + ":src/lib/IlmThread/IlmThreadConfig.h", + "src/lib/OpenEXR/ImfAcesFile.h", + "src/lib/OpenEXR/ImfArray.h", + "src/lib/OpenEXR/ImfAttribute.h", + "src/lib/OpenEXR/ImfAutoArray.h", + "src/lib/OpenEXR/ImfB44Compressor.h", + "src/lib/OpenEXR/ImfBoxAttribute.h", + "src/lib/OpenEXR/ImfCRgbaFile.h", + "src/lib/OpenEXR/ImfChannelList.h", + "src/lib/OpenEXR/ImfChannelListAttribute.h", + "src/lib/OpenEXR/ImfCheckedArithmetic.h", + "src/lib/OpenEXR/ImfChromaticities.h", + "src/lib/OpenEXR/ImfChromaticitiesAttribute.h", + "src/lib/OpenEXR/ImfCompositeDeepScanLine.h", + "src/lib/OpenEXR/ImfCompression.h", + "src/lib/OpenEXR/ImfCompressionAttribute.h", + "src/lib/OpenEXR/ImfCompressor.h", + "src/lib/OpenEXR/ImfConvert.h", + "src/lib/OpenEXR/ImfDeepCompositing.h", + "src/lib/OpenEXR/ImfDeepFrameBuffer.h", + "src/lib/OpenEXR/ImfDeepImageState.h", + "src/lib/OpenEXR/ImfDeepImageStateAttribute.h", + "src/lib/OpenEXR/ImfDeepScanLineInputFile.h", + "src/lib/OpenEXR/ImfDeepScanLineInputPart.h", + "src/lib/OpenEXR/ImfDeepScanLineOutputFile.h", + "src/lib/OpenEXR/ImfDeepScanLineOutputPart.h", + "src/lib/OpenEXR/ImfDeepTiledInputFile.h", + "src/lib/OpenEXR/ImfDeepTiledInputPart.h", + "src/lib/OpenEXR/ImfDeepTiledOutputFile.h", + "src/lib/OpenEXR/ImfDeepTiledOutputPart.h", + "src/lib/OpenEXR/ImfDoubleAttribute.h", + "src/lib/OpenEXR/ImfDwaCompressor.h", + "src/lib/OpenEXR/ImfDwaCompressorSimd.h", + "src/lib/OpenEXR/ImfEnvmap.h", + "src/lib/OpenEXR/ImfEnvmapAttribute.h", + "src/lib/OpenEXR/ImfExport.h", + "src/lib/OpenEXR/ImfFastHuf.h", + "src/lib/OpenEXR/ImfFloatAttribute.h", + "src/lib/OpenEXR/ImfFloatVectorAttribute.h", + "src/lib/OpenEXR/ImfForward.h", + "src/lib/OpenEXR/ImfFrameBuffer.h", + "src/lib/OpenEXR/ImfFramesPerSecond.h", + "src/lib/OpenEXR/ImfGenericInputFile.h", + "src/lib/OpenEXR/ImfGenericOutputFile.h", + "src/lib/OpenEXR/ImfHeader.h", + "src/lib/OpenEXR/ImfHuf.h", + "src/lib/OpenEXR/ImfIDManifest.h", + "src/lib/OpenEXR/ImfIDManifestAttribute.h", + "src/lib/OpenEXR/ImfIO.h", + "src/lib/OpenEXR/ImfInputFile.h", + "src/lib/OpenEXR/ImfInputPart.h", + "src/lib/OpenEXR/ImfInputPartData.h", + "src/lib/OpenEXR/ImfInputStreamMutex.h", + "src/lib/OpenEXR/ImfInt64.h", + "src/lib/OpenEXR/ImfIntAttribute.h", + "src/lib/OpenEXR/ImfKeyCode.h", + "src/lib/OpenEXR/ImfKeyCodeAttribute.h", + "src/lib/OpenEXR/ImfLineOrder.h", + "src/lib/OpenEXR/ImfLineOrderAttribute.h", + "src/lib/OpenEXR/ImfLut.h", + "src/lib/OpenEXR/ImfMatrixAttribute.h", + "src/lib/OpenEXR/ImfMisc.h", + "src/lib/OpenEXR/ImfMultiPartInputFile.h", + "src/lib/OpenEXR/ImfMultiPartOutputFile.h", + "src/lib/OpenEXR/ImfMultiView.h", + "src/lib/OpenEXR/ImfName.h", + "src/lib/OpenEXR/ImfNamespace.h", + "src/lib/OpenEXR/ImfOpaqueAttribute.h", + "src/lib/OpenEXR/ImfOptimizedPixelReading.h", + "src/lib/OpenEXR/ImfOutputFile.h", + "src/lib/OpenEXR/ImfOutputPart.h", + "src/lib/OpenEXR/ImfOutputPartData.h", + "src/lib/OpenEXR/ImfOutputStreamMutex.h", + "src/lib/OpenEXR/ImfPartHelper.h", + "src/lib/OpenEXR/ImfPartType.h", + "src/lib/OpenEXR/ImfPixelType.h", + "src/lib/OpenEXR/ImfPizCompressor.h", + "src/lib/OpenEXR/ImfPreviewImage.h", + "src/lib/OpenEXR/ImfPreviewImageAttribute.h", + "src/lib/OpenEXR/ImfPxr24Compressor.h", + "src/lib/OpenEXR/ImfRational.h", + "src/lib/OpenEXR/ImfRationalAttribute.h", + "src/lib/OpenEXR/ImfRgba.h", + "src/lib/OpenEXR/ImfRgbaFile.h", + "src/lib/OpenEXR/ImfRgbaYca.h", + "src/lib/OpenEXR/ImfRle.h", + "src/lib/OpenEXR/ImfRleCompressor.h", + "src/lib/OpenEXR/ImfScanLineInputFile.h", + "src/lib/OpenEXR/ImfSimd.h", + "src/lib/OpenEXR/ImfStandardAttributes.h", + "src/lib/OpenEXR/ImfStdIO.h", + "src/lib/OpenEXR/ImfStringAttribute.h", + "src/lib/OpenEXR/ImfStringVectorAttribute.h", + "src/lib/OpenEXR/ImfSystemSpecific.h", + "src/lib/OpenEXR/ImfTestFile.h", + "src/lib/OpenEXR/ImfThreading.h", + "src/lib/OpenEXR/ImfTileDescription.h", + "src/lib/OpenEXR/ImfTileDescriptionAttribute.h", + "src/lib/OpenEXR/ImfTileOffsets.h", + "src/lib/OpenEXR/ImfTiledInputFile.h", + "src/lib/OpenEXR/ImfTiledInputPart.h", + "src/lib/OpenEXR/ImfTiledMisc.h", + "src/lib/OpenEXR/ImfTiledOutputFile.h", + "src/lib/OpenEXR/ImfTiledOutputPart.h", + "src/lib/OpenEXR/ImfTiledRgbaFile.h", + "src/lib/OpenEXR/ImfTimeCode.h", + "src/lib/OpenEXR/ImfTimeCodeAttribute.h", + "src/lib/OpenEXR/ImfVecAttribute.h", + "src/lib/OpenEXR/ImfVersion.h", + "src/lib/OpenEXR/ImfWav.h", + "src/lib/OpenEXR/ImfXdr.h", + "src/lib/OpenEXR/ImfZip.h", + "src/lib/OpenEXR/ImfZipCompressor.h", + ":src/lib/OpenEXR/OpenEXRConfig.h", + ":src/lib/OpenEXR/OpenEXRConfigInternal.h", + ], + includes = ["src/lib/OpenEXR"], + deps = [ + ":IlmThread", + "@imath//:Imath", + "@zlib//:zlib", + ], + visibility = ["//visibility:public"], +) +""", + remote = "https://github.com/AcademySoftwareFoundation/openexr", + tag = "v3.1.5", +) diff --git a/third-party/libjxl/libjxl/bash_test.sh b/third-party/libjxl/libjxl/bash_test.sh new file mode 100755 index 0000000000..9a8665c55e --- /dev/null +++ b/third-party/libjxl/libjxl/bash_test.sh @@ -0,0 +1,317 @@ +#!/bin/bash +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Tests implemented in bash. These typically will run checks about the source +# code rather than the compiled one. + +MYDIR=$(dirname $(realpath "$0")) + +set -u + +test_includes() { + local ret=0 + local f + for f in $(git ls-files | grep -E '(\.cc|\.cpp|\.h)$'); do + if [ ! -e "$f" ]; then + continue + fi + # Check that the full paths to the public headers are not used, since users + # of the library will include the library as: #include "jxl/foobar.h". + if grep -i -H -n -E '#include\s*[<"]lib/include/jxl' "$f" >&2; then + echo "Don't add \"include/\" to the include path of public headers." >&2 + ret=1 + fi + + if [[ "${f#third_party/}" == "$f" ]]; then + # $f is not in third_party/ + + # Check that local files don't use the full path to third_party/ + # directory since the installed versions will not have that path. + # Add an exception for third_party/dirent.h. + if grep -v -F 'third_party/dirent.h' "$f" | \ + grep -i -H -n -E '#include\s*[<"]third_party/' >&2 && + [[ $ret -eq 0 ]]; then + cat >&2 <&2 + ret=1 + fi + done + return ${ret} +} + +test_copyright() { + local ret=0 + local f + for f in $( + git ls-files | grep -E \ + '(Dockerfile.*|\.c|\.cc|\.cpp|\.gni|\.h|\.java|\.sh|\.m|\.py|\.ui|\.yml)$'); do + if [ ! -e "$f" ]; then + continue + fi + if [[ "${f#third_party/}" == "$f" ]]; then + # $f is not in third_party/ + if ! head -n 10 "$f" | + grep -F 'Copyright (c) the JPEG XL Project Authors.' >/dev/null ; then + echo "$f: Missing Copyright blob near the top of the file." >&2 + ret=1 + fi + if ! head -n 10 "$f" | + grep -F 'Use of this source code is governed by a BSD-style' \ + >/dev/null ; then + echo "$f: Missing License blob near the top of the file." >&2 + ret=1 + fi + fi + done + return ${ret} +} + +# Check that we don't use "%zu" or "%zd" in format string for size_t. +test_printf_size_t() { + local ret=0 + if grep -n -E '%[0-9]*z[udx]' \ + $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$'); then + echo "Don't use '%zu' or '%zd' in a format string, instead use " \ + "'%\" PRIuS \"' or '%\" PRIdS \"'." >&2 + ret=1 + fi + + if grep -n -E 'gtest\.h' \ + $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$' | grep -v -F /testing.h); then + echo "Don't include gtest directly, instead include 'testing.h'. " >&2 + ret=1 + fi + + if grep -n -E 'gmock\.h' \ + $(git ls-files | grep -E '(\.c|\.cc|\.cpp|\.h)$' | grep -v -F /testing.h); then + echo "Don't include gmock directly, instead include 'testing.h'. " >&2 + ret=1 + fi + + local f + for f in $(git ls-files | grep -E "\.cc$" | xargs grep 'PRI[udx]S' | + cut -f 1 -d : | uniq); do + if [ ! -e "$f" ]; then + continue + fi + if ! grep -F printf_macros.h "$f" >/dev/null; then + echo "$f: Add lib/jxl/base/printf_macros.h for PRI.S, or use other " \ + "types for code outside lib/jxl library." >&2 + ret=1 + fi + done + + for f in $(git ls-files | grep -E "\.h$" | grep -v -E '(printf_macros\.h|testing\.h)' | + xargs grep -n 'PRI[udx]S'); do + # Having PRIuS / PRIdS in a header file means that printf_macros.h may + # be included before a system header, in particular before gtest headers. + # those may re-define PRIuS unconditionally causing a compile error. + echo "$f: Don't use PRI.S in header files. Sorry." + ret=1 + done + + return ${ret} +} + +# Check that "dec_" code doesn't depend on "enc_" headers. +test_dec_enc_deps() { + local ret=0 + local f + for f in $(git ls-files | grep -E '/dec_'); do + if [ ! -e "$f" ]; then + continue + fi + if [[ "${f#third_party/}" == "$f" ]]; then + # $f is not in third_party/ + if grep -n -H -E "#include.*/enc_" "$f" >&2; then + echo "$f: Don't include \"enc_*\" files from \"dec_*\" files." >&2 + ret=1 + fi + fi + done + return ${ret} +} + +# Check for git merge conflict markers. +test_merge_conflict() { + local ret=0 + TEXT_FILES='(\.cc|\.cpp|\.h|\.sh|\.m|\.py|\.md|\.txt|\.cmake)$' + for f in $(git ls-files | grep -E "${TEXT_FILES}"); do + if [ ! -e "$f" ]; then + continue + fi + if grep -E '^<<<<<<< ' "$f"; then + echo "$f: Found git merge conflict marker. Please resolve." >&2 + ret=1 + fi + done + return ${ret} +} + +# Check that the library and the package have the same version. This prevents +# accidentally having them out of sync. +get_version() { + local varname=$1 + local line=$(grep -F "set(${varname} " lib/CMakeLists.txt | head -n 1) + [[ -n "${line}" ]] + line="${line#set(${varname} }" + line="${line%)}" + echo "${line}" +} + +test_version() { + local major=$(get_version JPEGXL_MAJOR_VERSION) + local minor=$(get_version JPEGXL_MINOR_VERSION) + local patch=$(get_version JPEGXL_PATCH_VERSION) + # Check that the version is not empty + if [[ -z "${major}${minor}${patch}" ]]; then + echo "Couldn't parse version from CMakeLists.txt" >&2 + return 1 + fi + local pkg_version=$(head -n 1 debian/changelog) + # Get only the part between the first "jpeg-xl (" and the following ")". + pkg_version="${pkg_version#jpeg-xl (}" + pkg_version="${pkg_version%%)*}" + if [[ -z "${pkg_version}" ]]; then + echo "Couldn't parse version from debian package" >&2 + return 1 + fi + + local lib_version="${major}.${minor}.${patch}" + lib_version="${lib_version%.0}" + if [[ "${pkg_version}" != "${lib_version}"* ]]; then + echo "Debian package version (${pkg_version}) doesn't match library" \ + "version (${lib_version})." >&2 + return 1 + fi + return 0 +} + +# Check that the SHA versions in deps.sh matches the git submodules. +test_deps_version() { + while IFS= read -r line; do + if [[ "${line:0:10}" != "[submodule" ]]; then + continue + fi + line="${line#[submodule \"}" + line="${line%\"]}" + local varname=$(tr '[:lower:]' '[:upper:]' <<< "${line}") + varname="${varname/\//_}" + if ! grep -F "${varname}=" deps.sh >/dev/null; then + # Ignoring submodule not in deps.sh + continue + fi + local deps_sha=$(grep -F "${varname}=" deps.sh | cut -f 2 -d '"') + [[ -n "${deps_sha}" ]] + local git_sha=$(git ls-tree -r HEAD "${line}" | cut -f 1 | cut -f 3 -d ' ') + if [[ "${deps_sha}" != "${git_sha}" ]]; then + cat >&2 </dev/null; then + cat >&2 <&2; then + echo "Don't use \"%n\"." >&2 + ret=1 + fi + done + return ${ret} +} + +main() { + local ret=0 + cd "${MYDIR}" + + if ! git rev-parse >/dev/null 2>/dev/null; then + echo "Not a git checkout, skipping bash_test" + return 0 + fi + + IFS=$'\n' + for f in $(declare -F); do + local test_name=$(echo "$f" | cut -f 3 -d ' ') + # Runs all the local bash functions that start with "test_". + if [[ "${test_name}" == test_* ]]; then + echo "Test ${test_name}: Start" + if ${test_name}; then + echo "Test ${test_name}: PASS" + else + echo "Test ${test_name}: FAIL" + ret=1 + fi + fi + done + return ${ret} +} + +main "$@" diff --git a/third-party/libjxl/libjxl/ci.sh b/third-party/libjxl/libjxl/ci.sh new file mode 100755 index 0000000000..57e26d7340 --- /dev/null +++ b/third-party/libjxl/libjxl/ci.sh @@ -0,0 +1,1552 @@ +#!/usr/bin/env bash +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Continuous integration helper module. This module is meant to be called from +# the .gitlab-ci.yml file during the continuous integration build, as well as +# from the command line for developers. + +set -eu + +OS=`uname -s` + +MYDIR=$(dirname $(realpath "$0")) + +### Environment parameters: +TEST_STACK_LIMIT="${TEST_STACK_LIMIT:-256}" +CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-RelWithDebInfo} +CMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH:-} +CMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER:-} +CMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER:-} +CMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM:-} +SKIP_BUILD="${SKIP_BUILD:-0}" +SKIP_TEST="${SKIP_TEST:-0}" +TARGETS="${TARGETS:-all doc}" +TEST_SELECTOR="${TEST_SELECTOR:-}" +BUILD_TARGET="${BUILD_TARGET:-}" +ENABLE_WASM_SIMD="${ENABLE_WASM_SIMD:-0}" +if [[ -n "${BUILD_TARGET}" ]]; then + BUILD_DIR="${BUILD_DIR:-${MYDIR}/build-${BUILD_TARGET%%-*}}" +else + BUILD_DIR="${BUILD_DIR:-${MYDIR}/build}" +fi +# Whether we should post a message in the MR when the build fails. +POST_MESSAGE_ON_ERROR="${POST_MESSAGE_ON_ERROR:-1}" + +# Set default compilers to clang if not already set +export CC=${CC:-clang} +export CXX=${CXX:-clang++} + +# Time limit for the "fuzz" command in seconds (0 means no limit). +FUZZER_MAX_TIME="${FUZZER_MAX_TIME:-0}" + +SANITIZER="none" + + +if [[ "${BUILD_TARGET%%-*}" == "x86_64" || + "${BUILD_TARGET%%-*}" == "i686" ]]; then + # Default to building all targets, even if compiler baseline is SSE4 + HWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS:-HWY_EMU128} +else + HWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS:-} +fi + +# Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS +CMAKE_FLAGS=${CMAKE_FLAGS:-} +CMAKE_C_FLAGS="${CMAKE_C_FLAGS:-} ${CMAKE_FLAGS}" +CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS:-} ${CMAKE_FLAGS}" + +CMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR:-} +CMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS:-} +CMAKE_FIND_ROOT_PATH=${CMAKE_FIND_ROOT_PATH:-} +CMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS:-} +CMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS:-} +CMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE:-} + +if [[ "${ENABLE_WASM_SIMD}" -ne "0" ]]; then + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -msimd128" + CMAKE_C_FLAGS="${CMAKE_C_FLAGS} -msimd128" + CMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS} -msimd128" +fi + +if [[ "${ENABLE_WASM_SIMD}" -eq "2" ]]; then + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DHWY_WANT_WASM2" + CMAKE_C_FLAGS="${CMAKE_C_FLAGS} -DHWY_WANT_WASM2" +fi + +if [[ ! -z "${HWY_BASELINE_TARGETS}" ]]; then + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -DHWY_BASELINE_TARGETS=${HWY_BASELINE_TARGETS}" +fi + +# Version inferred from the CI variables. +CI_COMMIT_SHA=${CI_COMMIT_SHA:-${GITHUB_SHA:-}} +JPEGXL_VERSION=${JPEGXL_VERSION:-${CI_COMMIT_SHA:0:8}} + +# Benchmark parameters +STORE_IMAGES=${STORE_IMAGES:-1} +BENCHMARK_CORPORA="${MYDIR}/third_party/corpora" + +# Local flags passed to sanitizers. +UBSAN_FLAGS=( + -fsanitize=alignment + -fsanitize=bool + -fsanitize=bounds + -fsanitize=builtin + -fsanitize=enum + -fsanitize=float-cast-overflow + -fsanitize=float-divide-by-zero + -fsanitize=integer-divide-by-zero + -fsanitize=null + -fsanitize=object-size + -fsanitize=pointer-overflow + -fsanitize=return + -fsanitize=returns-nonnull-attribute + -fsanitize=shift-base + -fsanitize=shift-exponent + -fsanitize=unreachable + -fsanitize=vla-bound + + -fno-sanitize-recover=undefined + # Brunsli uses unaligned accesses to uint32_t, so alignment is just a warning. + -fsanitize-recover=alignment +) +# -fsanitize=function doesn't work on aarch64 and arm. +if [[ "${BUILD_TARGET%%-*}" != "aarch64" && + "${BUILD_TARGET%%-*}" != "arm" ]]; then + UBSAN_FLAGS+=( + -fsanitize=function + ) +fi +if [[ "${BUILD_TARGET%%-*}" != "arm" ]]; then + UBSAN_FLAGS+=( + -fsanitize=signed-integer-overflow + ) +fi + +CLANG_TIDY_BIN=$(which clang-tidy-6.0 clang-tidy-7 clang-tidy-8 clang-tidy | head -n 1) +# Default to "cat" if "colordiff" is not installed or if stdout is not a tty. +if [[ -t 1 ]]; then + COLORDIFF_BIN=$(which colordiff cat | head -n 1) +else + COLORDIFF_BIN="cat" +fi +FIND_BIN=$(which gfind find | head -n 1) +# "false" will disable wine64 when not installed. This won't allow +# cross-compiling. +WINE_BIN=$(which wine64 false | head -n 1) + +CLANG_VERSION="${CLANG_VERSION:-}" +# Detect the clang version suffix and store it in CLANG_VERSION. For example, +# "6.0" for clang 6 or "7" for clang 7. +detect_clang_version() { + if [[ -n "${CLANG_VERSION}" ]]; then + return 0 + fi + local clang_version=$("${CC:-clang}" --version | head -n1) + clang_version=${clang_version#"Debian "} + clang_version=${clang_version#"Ubuntu "} + local llvm_tag + case "${clang_version}" in + "clang version 6."*) + CLANG_VERSION="6.0" + ;; + "clang version "*) + # Any other clang version uses just the major version number. + local suffix="${clang_version#clang version }" + CLANG_VERSION="${suffix%%.*}" + ;; + "emcc"*) + # We can't use asan or msan in the emcc case. + ;; + *) + echo "Unknown clang version: ${clang_version}" >&2 + return 1 + esac +} + +# Temporary files cleanup hooks. +CLEANUP_FILES=() +cleanup() { + if [[ ${#CLEANUP_FILES[@]} -ne 0 ]]; then + rm -fr "${CLEANUP_FILES[@]}" + fi +} + +# Executed on exit. +on_exit() { + local retcode="$1" + # Always cleanup the CLEANUP_FILES. + cleanup + + # Post a message in the MR when requested with POST_MESSAGE_ON_ERROR but only + # if the run failed and we are not running from a MR pipeline. + if [[ ${retcode} -ne 0 && -n "${CI_BUILD_NAME:-}" && + -n "${POST_MESSAGE_ON_ERROR}" && -z "${CI_MERGE_REQUEST_ID:-}" && + "${CI_BUILD_REF_NAME}" = "master" ]]; then + load_mr_vars_from_commit + { set +xeu; } 2>/dev/null + local message="**Run ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} failed.** + +Check the output of the job at ${CI_JOB_URL:-} to see if this was your problem. +If it was, please rollback this change or fix the problem ASAP, broken builds +slow down development. Check if the error already existed in the previous build +as well. + +Pipeline: ${CI_PIPELINE_URL} + +Previous build commit: ${CI_COMMIT_BEFORE_SHA} +" + cmd_post_mr_comment "${message}" + fi +} + +trap 'retcode=$?; { set +x; } 2>/dev/null; on_exit ${retcode}' INT TERM EXIT + + +# These variables are populated when calling merge_request_commits(). + +# The current hash at the top of the current branch or merge request branch (if +# running from a merge request pipeline). +MR_HEAD_SHA="" +# The common ancestor between the current commit and the tracked branch, such +# as master. This includes a list +MR_ANCESTOR_SHA="" + +# Populate MR_HEAD_SHA and MR_ANCESTOR_SHA. +merge_request_commits() { + { set +x; } 2>/dev/null + # GITHUB_SHA is the current reference being build in GitHub Actions. + if [[ -n "${GITHUB_SHA:-}" ]]; then + # GitHub normally does a checkout of a merge commit on a shallow repository + # by default. We want to get a bit more of the history to be able to diff + # changes on the Pull Request if needed. This fetches 10 more commits which + # should be enough given that PR normally should have 1 commit. + git -C "${MYDIR}" fetch -q origin "${GITHUB_SHA}" --depth 10 + MR_HEAD_SHA="$(git rev-parse "FETCH_HEAD^2" 2>/dev/null || + echo "${GITHUB_SHA}")" + else + # CI_BUILD_REF is the reference currently being build in the CI workflow. + MR_HEAD_SHA=$(git -C "${MYDIR}" rev-parse -q "${CI_BUILD_REF:-HEAD}") + fi + + if [[ -n "${CI_MERGE_REQUEST_IID:-}" ]]; then + # Merge request pipeline in CI. In this case the upstream is called "origin" + # but it refers to the forked project that's the source of the merge + # request. We need to get the target of the merge request, for which we need + # to query that repository using our CI_JOB_TOKEN. + echo "machine gitlab.com login gitlab-ci-token password ${CI_JOB_TOKEN}" \ + >> "${HOME}/.netrc" + git -C "${MYDIR}" fetch "${CI_MERGE_REQUEST_PROJECT_URL}" \ + "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME}" + MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD) + elif [[ -n "${GITHUB_BASE_REF:-}" ]]; then + # Pull request workflow in GitHub Actions. GitHub checkout action uses + # "origin" as the remote for the git checkout. + git -C "${MYDIR}" fetch -q origin "${GITHUB_BASE_REF}" + MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q FETCH_HEAD) + else + # We are in a local branch, not a merge request. + MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q HEAD@{upstream} || true) + fi + + if [[ -z "${MR_ANCESTOR_SHA}" ]]; then + echo "Warning, not tracking any branch, using the last commit in HEAD.">&2 + # This prints the return value with just HEAD. + MR_ANCESTOR_SHA=$(git -C "${MYDIR}" rev-parse -q "${MR_HEAD_SHA}^") + else + # GitHub runs the pipeline on a merge commit, no need to look for the common + # ancestor in that case. + if [[ -z "${GITHUB_BASE_REF:-}" ]]; then + MR_ANCESTOR_SHA=$(git -C "${MYDIR}" merge-base \ + "${MR_ANCESTOR_SHA}" "${MR_HEAD_SHA}") + fi + fi + set -x +} + +# Load the MR iid from the landed commit message when running not from a +# merge request workflow. This is useful to post back results at the merge +# request when running pipelines from master. +load_mr_vars_from_commit() { + { set +x; } 2>/dev/null + if [[ -z "${CI_MERGE_REQUEST_IID:-}" ]]; then + local mr_iid=$(git rev-list --format=%B --max-count=1 HEAD | + grep -F "${CI_PROJECT_URL}" | grep -F "/merge_requests" | head -n 1) + # mr_iid contains a string like this if it matched: + # Part-of: + if [[ -n "${mr_iid}" ]]; then + mr_iid=$(echo "${mr_iid}" | + sed -E 's,^.*merge_requests/([0-9]+)>.*$,\1,') + CI_MERGE_REQUEST_IID="${mr_iid}" + CI_MERGE_REQUEST_PROJECT_ID=${CI_PROJECT_ID} + fi + fi + set -x +} + +# Posts a comment to the current merge request. +cmd_post_mr_comment() { + { set +x; } 2>/dev/null + local comment="$1" + if [[ -n "${BOT_TOKEN:-}" && -n "${CI_MERGE_REQUEST_IID:-}" ]]; then + local url="${CI_API_V4_URL}/projects/${CI_MERGE_REQUEST_PROJECT_ID}/merge_requests/${CI_MERGE_REQUEST_IID}/notes" + curl -X POST -g \ + -H "PRIVATE-TOKEN: ${BOT_TOKEN}" \ + --data-urlencode "body=${comment}" \ + --output /dev/null \ + "${url}" + fi + set -x +} + +# Set up and export the environment variables needed by the child processes. +export_env() { + if [[ "${BUILD_TARGET}" == *mingw32 ]]; then + # Wine needs to know the paths to the mingw dlls. These should be + # separated by ';'. + WINEPATH=$("${CC:-clang}" -print-search-dirs --target="${BUILD_TARGET}" \ + | grep -F 'libraries: =' | cut -f 2- -d '=' | tr ':' ';') + # We also need our own libraries in the wine path. + local real_build_dir=$(realpath "${BUILD_DIR}") + # Some library .dll dependencies are installed in /bin: + export WINEPATH="${WINEPATH};${real_build_dir};${real_build_dir}/third_party/brotli;/usr/${BUILD_TARGET}/bin" + + local prefix="${BUILD_DIR}/wineprefix" + mkdir -p "${prefix}" + export WINEPREFIX=$(realpath "${prefix}") + fi + # Sanitizers need these variables to print and properly format the stack + # traces: + LLVM_SYMBOLIZER=$("${CC:-clang}" -print-prog-name=llvm-symbolizer || true) + if [[ -n "${LLVM_SYMBOLIZER}" ]]; then + export ASAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}" + export MSAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}" + export UBSAN_SYMBOLIZER_PATH="${LLVM_SYMBOLIZER}" + fi +} + +cmake_configure() { + export_env + + if [[ "${STACK_SIZE:-0}" == 1 ]]; then + # Dump the stack size of each function in the .stack_sizes section for + # analysis. + CMAKE_C_FLAGS+=" -fstack-size-section" + CMAKE_CXX_FLAGS+=" -fstack-size-section" + fi + + local args=( + -B"${BUILD_DIR}" -H"${MYDIR}" + -DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}" + -G Ninja + -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" + -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" + -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" + -DCMAKE_MODULE_LINKER_FLAGS="${CMAKE_MODULE_LINKER_FLAGS}" + -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}" + -DJPEGXL_VERSION="${JPEGXL_VERSION}" + -DSANITIZER="${SANITIZER}" + # These are not enabled by default in cmake. + -DJPEGXL_ENABLE_VIEWERS=ON + -DJPEGXL_ENABLE_PLUGINS=ON + -DJPEGXL_ENABLE_DEVTOOLS=ON + # We always use libfuzzer in the ci.sh wrapper. + -DJPEGXL_FUZZER_LINK_FLAGS="-fsanitize=fuzzer" + ) + if [[ "${BUILD_TARGET}" != *mingw32 ]]; then + args+=( + -DJPEGXL_WARNINGS_AS_ERRORS=ON + ) + fi + if [[ -n "${BUILD_TARGET}" ]]; then + local system_name="Linux" + if [[ "${BUILD_TARGET}" == *mingw32 ]]; then + # When cross-compiling with mingw the target must be set to Windows and + # run programs with wine. + system_name="Windows" + args+=( + -DCMAKE_CROSSCOMPILING_EMULATOR="${WINE_BIN}" + # Normally CMake automatically defines MINGW=1 when building with the + # mingw compiler (x86_64-w64-mingw32-gcc) but we are normally compiling + # with clang. + -DMINGW=1 + ) + fi + # EMSCRIPTEN toolchain sets the right values itself + if [[ "${BUILD_TARGET}" != wasm* ]]; then + # If set, BUILD_TARGET must be the target triplet such as + # x86_64-unknown-linux-gnu. + args+=( + -DCMAKE_C_COMPILER_TARGET="${BUILD_TARGET}" + -DCMAKE_CXX_COMPILER_TARGET="${BUILD_TARGET}" + # Only the first element of the target triplet. + -DCMAKE_SYSTEM_PROCESSOR="${BUILD_TARGET%%-*}" + -DCMAKE_SYSTEM_NAME="${system_name}" + -DCMAKE_TOOLCHAIN_FILE="${CMAKE_TOOLCHAIN_FILE}" + ) + else + args+=( + # sjpeg confuses WASM SIMD with SSE. + -DSJPEG_ENABLE_SIMD=OFF + # Building shared libs is not very useful for WASM. + -DBUILD_SHARED_LIBS=OFF + ) + fi + args+=( + # These are needed to make googletest work when cross-compiling. + -DCMAKE_CROSSCOMPILING=1 + -DHAVE_STD_REGEX=0 + -DHAVE_POSIX_REGEX=0 + -DHAVE_GNU_POSIX_REGEX=0 + -DHAVE_STEADY_CLOCK=0 + -DHAVE_THREAD_SAFETY_ATTRIBUTES=0 + ) + if [[ -z "${CMAKE_FIND_ROOT_PATH}" ]]; then + # find_package() will look in this prefix for libraries. + CMAKE_FIND_ROOT_PATH="/usr/${BUILD_TARGET}" + fi + if [[ -z "${CMAKE_PREFIX_PATH}" ]]; then + CMAKE_PREFIX_PATH="/usr/${BUILD_TARGET}" + fi + # Use pkg-config for the target. If there's no pkg-config available for the + # target we can set the PKG_CONFIG_PATH to the appropriate path in most + # linux distributions. + local pkg_config=$(which "${BUILD_TARGET}-pkg-config" || true) + if [[ -z "${pkg_config}" ]]; then + pkg_config=$(which pkg-config) + export PKG_CONFIG_LIBDIR="/usr/${BUILD_TARGET}/lib/pkgconfig" + fi + if [[ -n "${pkg_config}" ]]; then + args+=(-DPKG_CONFIG_EXECUTABLE="${pkg_config}") + fi + fi + if [[ -n "${CMAKE_CROSSCOMPILING_EMULATOR}" ]]; then + args+=( + -DCMAKE_CROSSCOMPILING_EMULATOR="${CMAKE_CROSSCOMPILING_EMULATOR}" + ) + fi + if [[ -n "${CMAKE_FIND_ROOT_PATH}" ]]; then + args+=( + -DCMAKE_FIND_ROOT_PATH="${CMAKE_FIND_ROOT_PATH}" + ) + fi + if [[ -n "${CMAKE_PREFIX_PATH}" ]]; then + args+=( + -DCMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}" + ) + fi + if [[ -n "${CMAKE_C_COMPILER_LAUNCHER}" ]]; then + args+=( + -DCMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER}" + ) + fi + if [[ -n "${CMAKE_CXX_COMPILER_LAUNCHER}" ]]; then + args+=( + -DCMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER}" + ) + fi + if [[ -n "${CMAKE_MAKE_PROGRAM}" ]]; then + args+=( + -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}" + ) + fi + if [[ "${BUILD_TARGET}" == wasm* ]]; then + emcmake cmake "${args[@]}" "$@" + else + cmake "${args[@]}" "$@" + fi +} + +cmake_build_and_test() { + if [[ "${SKIP_BUILD}" -eq "1" ]]; then + return 0 + fi + # gtest_discover_tests() runs the test binaries to discover the list of tests + # at build time, which fails under qemu. + ASAN_OPTIONS=detect_leaks=0 cmake --build "${BUILD_DIR}" -- $TARGETS + # Pack test binaries if requested. + if [[ "${PACK_TEST:-}" == "1" ]]; then + (cd "${BUILD_DIR}" + ${FIND_BIN} -name '*.cmake' -a '!' -path '*CMakeFiles*' + # gtest / gmock / gtest_main shared libs + ${FIND_BIN} lib/ -name 'libg*.so*' + ${FIND_BIN} -type d -name tests -a '!' -path '*CMakeFiles*' + ) | tar -C "${BUILD_DIR}" -cf "${BUILD_DIR}/tests.tar.xz" -T - \ + --use-compress-program="xz --threads=$(nproc --all || echo 1) -6" + du -h "${BUILD_DIR}/tests.tar.xz" + # Pack coverage data if also available. + touch "${BUILD_DIR}/gcno.sentinel" + (cd "${BUILD_DIR}"; echo gcno.sentinel; ${FIND_BIN} -name '*gcno') | \ + tar -C "${BUILD_DIR}" -cvf "${BUILD_DIR}/gcno.tar.xz" -T - \ + --use-compress-program="xz --threads=$(nproc --all || echo 1) -6" + fi + + if [[ "${SKIP_TEST}" -ne "1" ]]; then + (cd "${BUILD_DIR}" + export UBSAN_OPTIONS=print_stacktrace=1 + [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}" + ctest -j $(nproc --all || echo 1) ${TEST_SELECTOR} --output-on-failure) + fi +} + +# Configure the build to strip unused functions. This considerably reduces the +# output size, specially for tests which only use a small part of the whole +# library. +strip_dead_code() { + # Emscripten does tree shaking without any extra flags. + if [[ "${BUILD_TARGET}" == wasm* ]]; then + return 0 + fi + # -ffunction-sections, -fdata-sections and -Wl,--gc-sections effectively + # discard all unreachable code, reducing the code size. For this to work, we + # need to also pass --no-export-dynamic to prevent it from exporting all the + # internal symbols (like functions) making them all reachable and thus not a + # candidate for removal. + CMAKE_CXX_FLAGS+=" -ffunction-sections -fdata-sections" + CMAKE_C_FLAGS+=" -ffunction-sections -fdata-sections" + if [[ "${OS}" == "Darwin" ]]; then + CMAKE_EXE_LINKER_FLAGS+=" -dead_strip" + CMAKE_SHARED_LINKER_FLAGS+=" -dead_strip" + else + CMAKE_EXE_LINKER_FLAGS+=" -Wl,--gc-sections -Wl,--no-export-dynamic" + CMAKE_SHARED_LINKER_FLAGS+=" -Wl,--gc-sections -Wl,--no-export-dynamic" + fi +} + +### Externally visible commands + +cmd_debug() { + CMAKE_BUILD_TYPE="Debug" + cmake_configure "$@" + cmake_build_and_test +} + +cmd_release() { + CMAKE_BUILD_TYPE="Release" + strip_dead_code + cmake_configure "$@" + cmake_build_and_test +} + +cmd_opt() { + CMAKE_BUILD_TYPE="RelWithDebInfo" + CMAKE_CXX_FLAGS+=" -DJXL_DEBUG_WARNING -DJXL_DEBUG_ON_ERROR" + cmake_configure "$@" + cmake_build_and_test +} + +cmd_coverage() { + # -O0 prohibits stack space reuse -> causes stack-overflow on dozens of tests. + TEST_STACK_LIMIT="none" + + cmd_release -DJPEGXL_ENABLE_COVERAGE=ON "$@" + + if [[ "${SKIP_TEST}" -ne "1" ]]; then + # If we didn't run the test we also don't print a coverage report. + cmd_coverage_report + fi +} + +cmd_coverage_report() { + LLVM_COV=$("${CC:-clang}" -print-prog-name=llvm-cov) + local real_build_dir=$(realpath "${BUILD_DIR}") + local gcovr_args=( + -r "${real_build_dir}" + --gcov-executable "${LLVM_COV} gcov" + # Only print coverage information for the libjxl directories. The rest + # is not part of the code under test. + --filter '.*jxl/.*' + --exclude '.*_gbench.cc' + --exclude '.*_test.cc' + --exclude '.*_testonly..*' + --exclude '.*_debug.*' + --exclude '.*test_utils..*' + --object-directory "${real_build_dir}" + ) + + ( + cd "${real_build_dir}" + gcovr "${gcovr_args[@]}" --html --html-details \ + --output="${real_build_dir}/coverage.html" + gcovr "${gcovr_args[@]}" --print-summary | + tee "${real_build_dir}/coverage.txt" + gcovr "${gcovr_args[@]}" --xml --output="${real_build_dir}/coverage.xml" + ) +} + +cmd_test() { + export_env + # Unpack tests if needed. + if [[ -e "${BUILD_DIR}/tests.tar.xz" && ! -d "${BUILD_DIR}/tests" ]]; then + tar -C "${BUILD_DIR}" -Jxvf "${BUILD_DIR}/tests.tar.xz" + fi + if [[ -e "${BUILD_DIR}/gcno.tar.xz" && ! -d "${BUILD_DIR}/gcno.sentinel" ]]; then + tar -C "${BUILD_DIR}" -Jxvf "${BUILD_DIR}/gcno.tar.xz" + fi + (cd "${BUILD_DIR}" + export UBSAN_OPTIONS=print_stacktrace=1 + [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}" + ctest -j $(nproc --all || echo 1) ${TEST_SELECTOR} --output-on-failure "$@") +} + +cmd_gbench() { + export_env + (cd "${BUILD_DIR}" + export UBSAN_OPTIONS=print_stacktrace=1 + lib/jxl_gbench \ + --benchmark_counters_tabular=true \ + --benchmark_out_format=json \ + --benchmark_out=gbench.json "$@" + ) +} + +cmd_asanfuzz() { + CMAKE_CXX_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1" + CMAKE_C_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1" + cmd_asan -DJPEGXL_ENABLE_FUZZERS=ON "$@" +} + +cmd_msanfuzz() { + # Install msan if needed before changing the flags. + detect_clang_version + local msan_prefix="${HOME}/.msan/${CLANG_VERSION}" + if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then + # Install msan libraries for this version if needed or if an older version + # with libc++abi was installed. + cmd_msan_install + fi + + CMAKE_CXX_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1" + CMAKE_C_FLAGS+=" -fsanitize=fuzzer-no-link -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1" + cmd_msan -DJPEGXL_ENABLE_FUZZERS=ON "$@" +} + +cmd_asan() { + SANITIZER="asan" + CMAKE_C_FLAGS+=" -DJXL_ENABLE_ASSERT=1 -g -DADDRESS_SANITIZER \ + -fsanitize=address ${UBSAN_FLAGS[@]}" + CMAKE_CXX_FLAGS+=" -DJXL_ENABLE_ASSERT=1 -g -DADDRESS_SANITIZER \ + -fsanitize=address ${UBSAN_FLAGS[@]}" + strip_dead_code + cmake_configure "$@" -DJPEGXL_ENABLE_TCMALLOC=OFF + cmake_build_and_test +} + +cmd_tsan() { + SANITIZER="tsan" + local tsan_args=( + -DJXL_ENABLE_ASSERT=1 + -g + -DTHREAD_SANITIZER + ${UBSAN_FLAGS[@]} + -fsanitize=thread + ) + CMAKE_C_FLAGS+=" ${tsan_args[@]}" + CMAKE_CXX_FLAGS+=" ${tsan_args[@]}" + + CMAKE_BUILD_TYPE="RelWithDebInfo" + cmake_configure "$@" -DJPEGXL_ENABLE_TCMALLOC=OFF + cmake_build_and_test +} + +cmd_msan() { + SANITIZER="msan" + detect_clang_version + local msan_prefix="${HOME}/.msan/${CLANG_VERSION}" + if [[ ! -d "${msan_prefix}" || -e "${msan_prefix}/lib/libc++abi.a" ]]; then + # Install msan libraries for this version if needed or if an older version + # with libc++abi was installed. + cmd_msan_install + fi + + local msan_c_flags=( + -fsanitize=memory + -fno-omit-frame-pointer + -fsanitize-memory-track-origins + + -DJXL_ENABLE_ASSERT=1 + -g + -DMEMORY_SANITIZER + + # Force gtest to not use the cxxbai. + -DGTEST_HAS_CXXABI_H_=0 + ) + local msan_cxx_flags=( + "${msan_c_flags[@]}" + + # Some C++ sources don't use the std at all, so the -stdlib=libc++ is unused + # in those cases. Ignore the warning. + -Wno-unused-command-line-argument + -stdlib=libc++ + + # We include the libc++ from the msan directory instead, so we don't want + # the std includes. + -nostdinc++ + -cxx-isystem"${msan_prefix}/include/c++/v1" + ) + + local msan_linker_flags=( + -L"${msan_prefix}"/lib + -Wl,-rpath -Wl,"${msan_prefix}"/lib/ + ) + + CMAKE_C_FLAGS+=" ${msan_c_flags[@]} ${UBSAN_FLAGS[@]}" + CMAKE_CXX_FLAGS+=" ${msan_cxx_flags[@]} ${UBSAN_FLAGS[@]}" + CMAKE_EXE_LINKER_FLAGS+=" ${msan_linker_flags[@]}" + CMAKE_MODULE_LINKER_FLAGS+=" ${msan_linker_flags[@]}" + CMAKE_SHARED_LINKER_FLAGS+=" ${msan_linker_flags[@]}" + strip_dead_code + cmake_configure "$@" \ + -DCMAKE_CROSSCOMPILING=1 -DRUN_HAVE_STD_REGEX=0 -DRUN_HAVE_POSIX_REGEX=0 \ + -DJPEGXL_ENABLE_TCMALLOC=OFF -DJPEGXL_WARNINGS_AS_ERRORS=OFF \ + -DCMAKE_REQUIRED_LINK_OPTIONS="${msan_linker_flags[@]}" + cmake_build_and_test +} + +# Install libc++ libraries compiled with msan in the msan_prefix for the current +# compiler version. +cmd_msan_install() { + local tmpdir=$(mktemp -d) + CLEANUP_FILES+=("${tmpdir}") + # Detect the llvm to install: + export CC="${CC:-clang}" + export CXX="${CXX:-clang++}" + detect_clang_version + # Allow overriding the LLVM checkout. + local llvm_root="${LLVM_ROOT:-}" + if [ -z "${llvm_root}" ]; then + local llvm_tag="llvmorg-${CLANG_VERSION}.0.0" + case "${CLANG_VERSION}" in + "6.0") + llvm_tag="llvmorg-6.0.1" + ;; + "7") + llvm_tag="llvmorg-7.0.1" + ;; + esac + local llvm_targz="${tmpdir}/${llvm_tag}.tar.gz" + curl -L --show-error -o "${llvm_targz}" \ + "https://github.com/llvm/llvm-project/archive/${llvm_tag}.tar.gz" + tar -C "${tmpdir}" -zxf "${llvm_targz}" + llvm_root="${tmpdir}/llvm-project-${llvm_tag}" + fi + + local msan_prefix="${HOME}/.msan/${CLANG_VERSION}" + rm -rf "${msan_prefix}" + + declare -A CMAKE_EXTRAS + CMAKE_EXTRAS[libcxx]="\ + -DLIBCXX_CXX_ABI=libstdc++ \ + -DLIBCXX_INSTALL_EXPERIMENTAL_LIBRARY=ON" + + for project in libcxx; do + local proj_build="${tmpdir}/build-${project}" + local proj_dir="${llvm_root}/${project}" + mkdir -p "${proj_build}" + cmake -B"${proj_build}" -H"${proj_dir}" \ + -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_USE_SANITIZER=Memory \ + -DLLVM_PATH="${llvm_root}/llvm" \ + -DLLVM_CONFIG_PATH="$(which llvm-config llvm-config-7 llvm-config-6.0 | \ + head -n1)" \ + -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}" \ + -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS}" \ + -DCMAKE_EXE_LINKER_FLAGS="${CMAKE_EXE_LINKER_FLAGS}" \ + -DCMAKE_SHARED_LINKER_FLAGS="${CMAKE_SHARED_LINKER_FLAGS}" \ + -DCMAKE_INSTALL_PREFIX="${msan_prefix}" \ + ${CMAKE_EXTRAS[${project}]} + cmake --build "${proj_build}" + ninja -C "${proj_build}" install + done +} + +# Internal build step shared between all cmd_ossfuzz_* commands. +_cmd_ossfuzz() { + local sanitizer="$1" + shift + mkdir -p "${BUILD_DIR}" + local real_build_dir=$(realpath "${BUILD_DIR}") + + # oss-fuzz defines three directories: + # * /work, with the working directory to do re-builds + # * /src, with the source code to build + # * /out, with the output directory where to copy over the built files. + # We use $BUILD_DIR as the /work and the script directory as the /src. The + # /out directory is ignored as developers are used to look for the fuzzers in + # $BUILD_DIR/tools/ directly. + + if [[ "${sanitizer}" = "memory" && ! -d "${BUILD_DIR}/msan" ]]; then + sudo docker run --rm -i \ + --user $(id -u):$(id -g) \ + -v "${real_build_dir}":/work \ + gcr.io/oss-fuzz-base/msan-libs-builder \ + bash -c "cp -r /msan /work" + fi + + # Args passed to ninja. These will be evaluated as a string separated by + # spaces. + local jpegxl_extra_args="$@" + + sudo docker run --rm -i \ + -e JPEGXL_UID=$(id -u) \ + -e JPEGXL_GID=$(id -g) \ + -e FUZZING_ENGINE="${FUZZING_ENGINE:-libfuzzer}" \ + -e SANITIZER="${sanitizer}" \ + -e ARCHITECTURE=x86_64 \ + -e FUZZING_LANGUAGE=c++ \ + -e MSAN_LIBS_PATH="/work/msan" \ + -e JPEGXL_EXTRA_ARGS="${jpegxl_extra_args}" \ + -v "${MYDIR}":/src/libjxl \ + -v "${MYDIR}/tools/scripts/ossfuzz-build.sh":/src/build.sh \ + -v "${real_build_dir}":/work \ + gcr.io/oss-fuzz/libjxl +} + +cmd_ossfuzz_asan() { + _cmd_ossfuzz address "$@" +} +cmd_ossfuzz_msan() { + _cmd_ossfuzz memory "$@" +} +cmd_ossfuzz_ubsan() { + _cmd_ossfuzz undefined "$@" +} + +cmd_ossfuzz_ninja() { + [[ -e "${BUILD_DIR}/build.ninja" ]] + local real_build_dir=$(realpath "${BUILD_DIR}") + + if [[ -e "${BUILD_DIR}/msan" ]]; then + echo "ossfuzz_ninja doesn't work with msan builds. Use ossfuzz_msan." >&2 + exit 1 + fi + + sudo docker run --rm -i \ + --user $(id -u):$(id -g) \ + -v "${MYDIR}":/src/libjxl \ + -v "${real_build_dir}":/work \ + gcr.io/oss-fuzz/libjxl \ + ninja -C /work "$@" +} + +cmd_fast_benchmark() { + local small_corpus_tar="${BENCHMARK_CORPORA}/jyrki-full.tar" + mkdir -p "${BENCHMARK_CORPORA}" + curl --show-error -o "${small_corpus_tar}" -z "${small_corpus_tar}" \ + "https://storage.googleapis.com/artifacts.jpegxl.appspot.com/corpora/jyrki-full.tar" + + local tmpdir=$(mktemp -d) + CLEANUP_FILES+=("${tmpdir}") + tar -xf "${small_corpus_tar}" -C "${tmpdir}" + + run_benchmark "${tmpdir}" 1048576 +} + +cmd_benchmark() { + local nikon_corpus_tar="${BENCHMARK_CORPORA}/nikon-subset.tar" + mkdir -p "${BENCHMARK_CORPORA}" + curl --show-error -o "${nikon_corpus_tar}" -z "${nikon_corpus_tar}" \ + "https://storage.googleapis.com/artifacts.jpegxl.appspot.com/corpora/nikon-subset.tar" + + local tmpdir=$(mktemp -d) + CLEANUP_FILES+=("${tmpdir}") + tar -xvf "${nikon_corpus_tar}" -C "${tmpdir}" + + local sem_id="jpegxl_benchmark-$$" + local nprocs=$(nproc --all || echo 1) + images=() + local filename + while IFS= read -r filename; do + # This removes the './' + filename="${filename:2}" + local mode + if [[ "${filename:0:4}" == "srgb" ]]; then + mode="RGB_D65_SRG_Rel_SRG" + elif [[ "${filename:0:5}" == "adobe" ]]; then + mode="RGB_D65_Ado_Rel_Ado" + else + echo "Unknown image colorspace: ${filename}" >&2 + exit 1 + fi + png_filename="${filename%.ppm}.png" + png_filename=$(echo "${png_filename}" | tr '/' '_') + sem --bg --id "${sem_id}" -j"${nprocs}" -- \ + "${BUILD_DIR}/tools/decode_and_encode" \ + "${tmpdir}/${filename}" "${mode}" "${tmpdir}/${png_filename}" + images+=( "${png_filename}" ) + done < <(cd "${tmpdir}"; ${FIND_BIN} . -name '*.ppm' -type f) + sem --id "${sem_id}" --wait + + # We need about 10 GiB per thread on these images. + run_benchmark "${tmpdir}" 10485760 +} + +get_mem_available() { + if [[ "${OS}" == "Darwin" ]]; then + echo $(vm_stat | grep -F 'Pages free:' | awk '{print $3 * 4}') + else + echo $(grep -F MemAvailable: /proc/meminfo | awk '{print $2}') + fi +} + +run_benchmark() { + local src_img_dir="$1" + local mem_per_thread="${2:-10485760}" + + local output_dir="${BUILD_DIR}/benchmark_results" + mkdir -p "${output_dir}" + + # The memory available at the beginning of the benchmark run in kB. The number + # of threads depends on the available memory, and the passed memory per + # thread. We also add a 2 GiB of constant memory. + local mem_available="$(get_mem_available)" + # Check that we actually have a MemAvailable value. + [[ -n "${mem_available}" ]] + local num_threads=$(( (${mem_available} - 1048576) / ${mem_per_thread} )) + if [[ ${num_threads} -le 0 ]]; then + num_threads=1 + fi + + local benchmark_args=( + --input "${src_img_dir}/*.png" + --codec=jpeg:yuv420:q85,webp:q80,jxl:d1:6,jxl:d1:6:downsampling=8,jxl:d5:6,jxl:d5:6:downsampling=8,jxl:m:d0:2,jxl:m:d0:3,jxl:m:d2:2 + --output_dir "${output_dir}" + --show_progress + --num_threads="${num_threads}" + ) + if [[ "${STORE_IMAGES}" == "1" ]]; then + benchmark_args+=(--save_decompressed --save_compressed) + fi + ( + [[ "${TEST_STACK_LIMIT}" == "none" ]] || ulimit -s "${TEST_STACK_LIMIT}" + "${BUILD_DIR}/tools/benchmark_xl" "${benchmark_args[@]}" | \ + tee "${output_dir}/results.txt" + + # Check error code for benckmark_xl command. This will exit if not. + return ${PIPESTATUS[0]} + ) + + if [[ -n "${CI_BUILD_NAME:-}" ]]; then + { set +x; } 2>/dev/null + local message="Results for ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} (job ${CI_JOB_URL:-}): + +$(cat "${output_dir}/results.txt") +" + cmd_post_mr_comment "${message}" + set -x + fi +} + +# Helper function to wait for the CPU temperature to cool down on ARM. +wait_for_temp() { + { set +x; } 2>/dev/null + local temp_limit=${1:-38000} + if [[ -z "${THERMAL_FILE:-}" ]]; then + echo "Must define the THERMAL_FILE with the thermal_zoneX/temp file" \ + "to read the temperature from. This is normally set in the runner." >&2 + exit 1 + fi + local org_temp=$(cat "${THERMAL_FILE}") + if [[ "${org_temp}" -ge "${temp_limit}" ]]; then + echo -n "Waiting for temp to get down from ${org_temp}... " + fi + local temp="${org_temp}" + local secs=0 + while [[ "${temp}" -ge "${temp_limit}" ]]; do + sleep 1 + temp=$(cat "${THERMAL_FILE}") + echo -n "${temp} " + secs=$((secs + 1)) + if [[ ${secs} -ge 5 ]]; then + break + fi + done + if [[ "${org_temp}" -ge "${temp_limit}" ]]; then + echo "Done, temp=${temp}" + fi + set -x +} + +# Helper function to set the cpuset restriction of the current process. +cmd_cpuset() { + [[ "${SKIP_CPUSET:-}" != "1" ]] || return 0 + local newset="$1" + local mycpuset=$(cat /proc/self/cpuset) + mycpuset="/dev/cpuset${mycpuset}" + # Check that the directory exists: + [[ -d "${mycpuset}" ]] + if [[ -e "${mycpuset}/cpuset.cpus" ]]; then + echo "${newset}" >"${mycpuset}/cpuset.cpus" + else + echo "${newset}" >"${mycpuset}/cpus" + fi +} + +# Return the encoding/decoding speed from the Stats output. +_speed_from_output() { + local speed="$1" + local unit="${2:-MP/s}" + if [[ "${speed}" == *"${unit}"* ]]; then + speed="${speed%% ${unit}*}" + speed="${speed##* }" + echo "${speed}" + fi +} + + +# Run benchmarks on ARM for the big and little CPUs. +cmd_arm_benchmark() { + # Flags used for cjxl encoder with .png inputs + local jxl_png_benchmarks=( + # Lossy options: + "--epf=0 --distance=1.0 --speed=cheetah" + "--epf=2 --distance=1.0 --speed=cheetah" + "--epf=0 --distance=8.0 --speed=cheetah" + "--epf=1 --distance=8.0 --speed=cheetah" + "--epf=2 --distance=8.0 --speed=cheetah" + "--epf=3 --distance=8.0 --speed=cheetah" + "--modular -Q 90" + "--modular -Q 50" + # Lossless options: + "--modular" + "--modular -E 0 -I 0" + "--modular -P 5" + "--modular --responsive=1" + # Near-lossless options: + "--epf=0 --distance=0.3 --speed=fast" + "--modular -Q 97" + ) + + # Flags used for cjxl encoder with .jpg inputs. These should do lossless + # JPEG recompression (of pixels or full jpeg). + local jxl_jpeg_benchmarks=( + "--num_reps=3" + ) + + local images=( + "testdata/jxl/flower/flower.png" + ) + + local jpg_images=( + "testdata/jxl/flower/flower.png.im_q85_420.jpg" + ) + + if [[ "${SKIP_CPUSET:-}" == "1" ]]; then + # Use a single cpu config in this case. + local cpu_confs=("?") + else + # Otherwise the CPU config comes from the environment: + local cpu_confs=( + "${RUNNER_CPU_LITTLE}" + "${RUNNER_CPU_BIG}" + # The CPU description is something like 3-7, so these configurations only + # take the first CPU of the group. + "${RUNNER_CPU_LITTLE%%-*}" + "${RUNNER_CPU_BIG%%-*}" + ) + # Check that RUNNER_CPU_ALL is defined. In the SKIP_CPUSET=1 case this will + # be ignored but still evaluated when calling cmd_cpuset. + [[ -n "${RUNNER_CPU_ALL}" ]] + fi + + local jpg_dirname="third_party/corpora/jpeg" + mkdir -p "${jpg_dirname}" + local jpg_qualities=( 50 80 95 ) + for src_img in "${images[@]}"; do + for q in "${jpg_qualities[@]}"; do + local jpeg_name="${jpg_dirname}/"$(basename "${src_img}" .png)"-q${q}.jpg" + convert -sampling-factor 1x1 -quality "${q}" \ + "${src_img}" "${jpeg_name}" + jpg_images+=("${jpeg_name}") + done + done + + local output_dir="${BUILD_DIR}/benchmark_results" + mkdir -p "${output_dir}" + local runs_file="${output_dir}/runs.txt" + + if [[ ! -e "${runs_file}" ]]; then + echo -e "binary\tflags\tsrc_img\tsrc size\tsrc pixels\tcpuset\tenc size (B)\tenc speed (MP/s)\tdec speed (MP/s)\tJPG dec speed (MP/s)\tJPG dec speed (MB/s)" | + tee -a "${runs_file}" + fi + + mkdir -p "${BUILD_DIR}/arm_benchmark" + local flags + local src_img + for src_img in "${jpg_images[@]}" "${images[@]}"; do + local src_img_hash=$(sha1sum "${src_img}" | cut -f 1 -d ' ') + local enc_binaries=("${BUILD_DIR}/tools/cjxl") + local src_ext="${src_img##*.}" + for enc_binary in "${enc_binaries[@]}"; do + local enc_binary_base=$(basename "${enc_binary}") + + # Select the list of flags to use for the current encoder/image pair. + local img_benchmarks + if [[ "${src_ext}" == "jpg" ]]; then + img_benchmarks=("${jxl_jpeg_benchmarks[@]}") + else + img_benchmarks=("${jxl_png_benchmarks[@]}") + fi + + for flags in "${img_benchmarks[@]}"; do + # Encoding step. + local enc_file_hash="${enc_binary_base} || $flags || ${src_img} || ${src_img_hash}" + enc_file_hash=$(echo "${enc_file_hash}" | sha1sum | cut -f 1 -d ' ') + local enc_file="${BUILD_DIR}/arm_benchmark/${enc_file_hash}.jxl" + + for cpu_conf in "${cpu_confs[@]}"; do + cmd_cpuset "${cpu_conf}" + # nproc returns the number of active CPUs, which is given by the cpuset + # mask. + local num_threads="$(nproc)" + + echo "Encoding with: ${enc_binary_base} img=${src_img} cpus=${cpu_conf} enc_flags=${flags}" + local enc_output + if [[ "${flags}" == *"modular"* ]]; then + # We don't benchmark encoding speed in this case. + if [[ ! -f "${enc_file}" ]]; then + cmd_cpuset "${RUNNER_CPU_ALL:-}" + "${enc_binary}" ${flags} "${src_img}" "${enc_file}.tmp" + mv "${enc_file}.tmp" "${enc_file}" + cmd_cpuset "${cpu_conf}" + fi + enc_output=" ?? MP/s" + else + wait_for_temp + enc_output=$("${enc_binary}" ${flags} "${src_img}" "${enc_file}.tmp" \ + 2>&1 | tee /dev/stderr | grep -F "MP/s [") + mv "${enc_file}.tmp" "${enc_file}" + fi + local enc_speed=$(_speed_from_output "${enc_output}") + local enc_size=$(stat -c "%s" "${enc_file}") + + echo "Decoding with: img=${src_img} cpus=${cpu_conf} enc_flags=${flags}" + + local dec_output + wait_for_temp + dec_output=$("${BUILD_DIR}/tools/djxl" "${enc_file}" \ + --num_reps=5 --num_threads="${num_threads}" 2>&1 | tee /dev/stderr | + grep -E "M[BP]/s \[") + local img_size=$(echo "${dec_output}" | cut -f 1 -d ',') + local img_size_x=$(echo "${img_size}" | cut -f 1 -d ' ') + local img_size_y=$(echo "${img_size}" | cut -f 3 -d ' ') + local img_size_px=$(( ${img_size_x} * ${img_size_y} )) + local dec_speed=$(_speed_from_output "${dec_output}") + + # For JPEG lossless recompression modes (where the original is a JPEG) + # decode to JPG as well. + local jpeg_dec_mps_speed="" + local jpeg_dec_mbs_speed="" + if [[ "${src_ext}" == "jpg" ]]; then + wait_for_temp + local dec_file="${BUILD_DIR}/arm_benchmark/${enc_file_hash}.jpg" + dec_output=$("${BUILD_DIR}/tools/djxl" "${enc_file}" \ + "${dec_file}" --num_reps=5 --num_threads="${num_threads}" 2>&1 | \ + tee /dev/stderr | grep -E "M[BP]/s \[") + local jpeg_dec_mps_speed=$(_speed_from_output "${dec_output}") + local jpeg_dec_mbs_speed=$(_speed_from_output "${dec_output}" MB/s) + if ! cmp --quiet "${src_img}" "${dec_file}"; then + # Add a start at the end to signal that the files are different. + jpeg_dec_mbs_speed+="*" + fi + fi + + # Record entry in a tab-separated file. + local src_img_base=$(basename "${src_img}") + echo -e "${enc_binary_base}\t${flags}\t${src_img_base}\t${img_size}\t${img_size_px}\t${cpu_conf}\t${enc_size}\t${enc_speed}\t${dec_speed}\t${jpeg_dec_mps_speed}\t${jpeg_dec_mbs_speed}" | + tee -a "${runs_file}" + done + done + done + done + cmd_cpuset "${RUNNER_CPU_ALL:-}" + cat "${runs_file}" + + if [[ -n "${CI_BUILD_NAME:-}" ]]; then + load_mr_vars_from_commit + { set +x; } 2>/dev/null + local message="Results for ${CI_BUILD_NAME} @ ${CI_COMMIT_SHORT_SHA} (job ${CI_JOB_URL:-}): + +\`\`\` +$(column -t -s " " "${runs_file}") +\`\`\` +" + cmd_post_mr_comment "${message}" + set -x + fi +} + +# Generate a corpus and run the fuzzer on that corpus. +cmd_fuzz() { + local corpus_dir=$(realpath "${BUILD_DIR}/fuzzer_corpus") + local fuzzer_crash_dir=$(realpath "${BUILD_DIR}/fuzzer_crash") + mkdir -p "${corpus_dir}" "${fuzzer_crash_dir}" + # Generate step. + "${BUILD_DIR}/tools/fuzzer_corpus" "${corpus_dir}" + # Run step: + local nprocs=$(nproc --all || echo 1) + ( + cd "${BUILD_DIR}" + "tools/djxl_fuzzer" "${fuzzer_crash_dir}" "${corpus_dir}" \ + -max_total_time="${FUZZER_MAX_TIME}" -jobs=${nprocs} \ + -artifact_prefix="${fuzzer_crash_dir}/" + ) +} + +# Runs the linters (clang-format, build_cleaner, buildirier) on the pending CLs. +cmd_lint() { + merge_request_commits + { set +x; } 2>/dev/null + local versions=(${1:-16 15 14 13 12 11 10 9 8 7 6.0}) + local clang_format_bins=("${versions[@]/#/clang-format-}" clang-format) + local tmpdir=$(mktemp -d) + CLEANUP_FILES+=("${tmpdir}") + + local ret=0 + local build_patch="${tmpdir}/build_cleaner.patch" + if ! "${MYDIR}/tools/scripts/build_cleaner.py" >"${build_patch}"; then + ret=1 + echo "build_cleaner.py findings:" >&2 + "${COLORDIFF_BIN}" <"${build_patch}" + echo "Run \`tools/scripts/build_cleaner.py --update\` to apply them" >&2 + fi + + # It is ok, if buildifier is not installed. + if which buildifier >/dev/null; then + local buildifier_patch="${tmpdir}/buildifier.patch" + local bazel_files=`git -C ${MYDIR} ls-files | grep -E "/BUILD$|WORKSPACE|.bzl$"` + set -x + buildifier -d ${bazel_files} >"${buildifier_patch}"|| true + { set +x; } 2>/dev/null + if [ -s "${buildifier_patch}" ]; then + ret=1 + echo 'buildifier have found some problems in Bazel build files:' >&2 + "${COLORDIFF_BIN}" <"${buildifier_patch}" + echo 'To fix them run (from the base directory):' >&2 + echo ' buildifier `git ls-files | grep -E "/BUILD$|WORKSPACE|.bzl$"`' >&2 + fi + fi + + local installed=() + local clang_patch + local clang_format + for clang_format in "${clang_format_bins[@]}"; do + if ! which "${clang_format}" >/dev/null; then + continue + fi + installed+=("${clang_format}") + local tmppatch="${tmpdir}/${clang_format}.patch" + # We include in this linter all the changes including the uncommitted changes + # to avoid printing changes already applied. + set -x + # Ignoring the error that git-clang-format outputs. + git -C "${MYDIR}" "${clang_format}" --binary "${clang_format}" \ + --style=file --diff "${MR_ANCESTOR_SHA}" -- >"${tmppatch}" || true + { set +x; } 2>/dev/null + if grep -E '^--- ' "${tmppatch}" | grep -v 'a/third_party' >/dev/null; then + if [[ -n "${LINT_OUTPUT:-}" ]]; then + cp "${tmppatch}" "${LINT_OUTPUT}" + fi + clang_patch="${tmppatch}" + else + echo "clang-format check OK" >&2 + return ${ret} + fi + done + + if [[ ${#installed[@]} -eq 0 ]]; then + echo "You must install clang-format for \"git clang-format\"" >&2 + exit 1 + fi + + # clang-format is installed but found problems. + echo "clang-format findings:" >&2 + "${COLORDIFF_BIN}" < "${clang_patch}" + + echo "clang-format found issues in your patches from ${MR_ANCESTOR_SHA}" \ + "to the current patch. Run \`./ci.sh lint | patch -p1\` from the base" \ + "directory to apply them." >&2 + exit 1 +} + +# Runs clang-tidy on the pending CLs. If the "all" argument is passed it runs +# clang-tidy over all the source files instead. +cmd_tidy() { + local what="${1:-}" + + if [[ -z "${CLANG_TIDY_BIN}" ]]; then + echo "ERROR: You must install clang-tidy-7 or newer to use ci.sh tidy" >&2 + exit 1 + fi + + local git_args=() + if [[ "${what}" == "all" ]]; then + git_args=(ls-files) + shift + else + merge_request_commits + git_args=( + diff-tree --no-commit-id --name-only -r "${MR_ANCESTOR_SHA}" + "${MR_HEAD_SHA}" + ) + fi + + # Clang-tidy needs the compilation database generated by cmake. + if [[ ! -e "${BUILD_DIR}/compile_commands.json" ]]; then + # Generate the build options in debug mode, since we need the debug asserts + # enabled for the clang-tidy analyzer to use them. + CMAKE_BUILD_TYPE="Debug" + cmake_configure + # Build the autogen targets to generate the .h files from the .ui files. + local autogen_targets=( + $(ninja -C "${BUILD_DIR}" -t targets | grep -F _autogen: | + cut -f 1 -d :) + ) + if [[ ${#autogen_targets[@]} != 0 ]]; then + ninja -C "${BUILD_DIR}" "${autogen_targets[@]}" + fi + fi + + cd "${MYDIR}" + local nprocs=$(nproc --all || echo 1) + local ret=0 + if ! parallel -j"${nprocs}" --keep-order -- \ + "${CLANG_TIDY_BIN}" -p "${BUILD_DIR}" -format-style=file -quiet "$@" {} \ + < <(git "${git_args[@]}" | grep -E '(\.cc|\.cpp)$') \ + >"${BUILD_DIR}/clang-tidy.txt"; then + ret=1 + fi + { set +x; } 2>/dev/null + echo "Findings statistics:" >&2 + grep -E ' \[[A-Za-z\.,\-]+\]' -o "${BUILD_DIR}/clang-tidy.txt" | sort \ + | uniq -c >&2 + + if [[ $ret -ne 0 ]]; then + cat >&2 </dev/null + local debsdir="${BUILD_DIR}/debs" + local f + while IFS='' read -r -d '' f; do + echo "=====================================================================" + echo "Package $f:" + dpkg --info $f + dpkg --contents $f + done < <(find "${BUILD_DIR}/debs" -maxdepth 1 -mindepth 1 -type f \ + -name '*.deb' -print0) +} + +build_debian_pkg() { + local srcdir="$1" + local srcpkg="$2" + + local debsdir="${BUILD_DIR}/debs" + local builddir="${debsdir}/${srcpkg}" + + # debuild doesn't have an easy way to build out of tree, so we make a copy + # of with all symlinks on the first level. + mkdir -p "${builddir}" + for f in $(find "${srcdir}" -mindepth 1 -maxdepth 1 -printf '%P\n'); do + if [[ ! -L "${builddir}/$f" ]]; then + rm -f "${builddir}/$f" + ln -s "${srcdir}/$f" "${builddir}/$f" + fi + done + ( + cd "${builddir}" + debuild -b -uc -us + ) +} + +cmd_debian_build() { + local srcpkg="${1:-}" + + case "${srcpkg}" in + jpeg-xl) + build_debian_pkg "${MYDIR}" "jpeg-xl" + ;; + highway) + build_debian_pkg "${MYDIR}/third_party/highway" "highway" + ;; + *) + echo "ERROR: Must pass a valid source package name to build." >&2 + ;; + esac +} + +get_version() { + local varname=$1 + local line=$(grep -F "set(${varname} " lib/CMakeLists.txt | head -n 1) + [[ -n "${line}" ]] + line="${line#set(${varname} }" + line="${line%)}" + echo "${line}" +} + +cmd_bump_version() { + local newver="${1:-}" + + if ! which dch >/dev/null; then + echo "Missing dch\nTo install it run:\n sudo apt install devscripts" + exit 1 + fi + + if [[ -z "${newver}" ]]; then + local major=$(get_version JPEGXL_MAJOR_VERSION) + local minor=$(get_version JPEGXL_MINOR_VERSION) + local patch=0 + minor=$(( ${minor} + 1)) + else + local major="${newver%%.*}" + newver="${newver#*.}" + local minor="${newver%%.*}" + newver="${newver#${minor}}" + local patch="${newver#.}" + if [[ -z "${patch}" ]]; then + patch=0 + fi + fi + + newver="${major}.${minor}.${patch}" + + echo "Bumping version to ${newver} (${major}.${minor}.${patch})" + sed -E \ + -e "s/(set\\(JPEGXL_MAJOR_VERSION) [0-9]+\\)/\\1 ${major})/" \ + -e "s/(set\\(JPEGXL_MINOR_VERSION) [0-9]+\\)/\\1 ${minor})/" \ + -e "s/(set\\(JPEGXL_PATCH_VERSION) [0-9]+\\)/\\1 ${patch})/" \ + -i lib/CMakeLists.txt + sed -E \ + -e "s/(LIBJXL_VERSION: )[0-9\\.]+/\\1 ${major}.${minor}.${patch}/" \ + -e "s/(LIBJXL_ABI_VERSION: )[0-9\\.]+/\\1 ${major}.${minor}/" \ + -i .github/workflows/conformance.yml + + # Update lib.gni + tools/scripts/build_cleaner.py --update + + # Mark the previous version as "unstable". + DEBCHANGE_RELEASE_HEURISTIC=log dch -M --distribution unstable --release '' + DEBCHANGE_RELEASE_HEURISTIC=log dch -M \ + --newversion "${newver}" \ + "Bump JPEG XL version to ${newver}." +} + +# Check that the AUTHORS file contains the email of the committer. +cmd_authors() { + merge_request_commits + local emails + local names + readarray -t emails < <(git log --format='%ae' "${MR_ANCESTOR_SHA}..${MR_HEAD_SHA}") + readarray -t names < <(git log --format='%an' "${MR_ANCESTOR_SHA}..${MR_HEAD_SHA}") + for i in "${!names[@]}"; do + echo "Checking name '${names[$i]}' with email '${emails[$i]}' ..." + "${MYDIR}"/tools/scripts/check_author.py "${emails[$i]}" "${names[$i]}" + done +} + +main() { + local cmd="${1:-}" + if [[ -z "${cmd}" ]]; then + cat >&2 < Build the given source package. + debian_stats Print stats about the built packages. + +oss-fuzz commands: + ossfuzz_asan Build the local source inside oss-fuzz docker with asan. + ossfuzz_msan Build the local source inside oss-fuzz docker with msan. + ossfuzz_ubsan Build the local source inside oss-fuzz docker with ubsan. + ossfuzz_ninja Run ninja on the BUILD_DIR inside the oss-fuzz docker. Extra + parameters are passed to ninja, for example "djxl_fuzzer" will + only build that ninja target. Use for faster build iteration + after one of the ossfuzz_*san commands. + +You can pass some optional environment variables as well: + - BUILD_DIR: The output build directory (by default "$$repo/build") + - BUILD_TARGET: The target triplet used when cross-compiling. + - CMAKE_FLAGS: Convenience flag to pass both CMAKE_C_FLAGS and CMAKE_CXX_FLAGS. + - CMAKE_PREFIX_PATH: Installation prefixes to be searched by the find_package. + - ENABLE_WASM_SIMD=1: enable experimental SIMD in WASM build (only). + - FUZZER_MAX_TIME: "fuzz" command fuzzer running timeout in seconds. + - LINT_OUTPUT: Path to the output patch from the "lint" command. + - SKIP_CPUSET=1: Skip modifying the cpuset in the arm_benchmark. + - SKIP_BUILD=1: Skip the build stage, cmake configure only. + - SKIP_TEST=1: Skip the test stage. + - STORE_IMAGES=0: Makes the benchmark discard the computed images. + - TEST_STACK_LIMIT: Stack size limit (ulimit -s) during tests, in KiB. + - TEST_SELECTOR: pass additional arguments to ctest, e.g. "-R .Resample.". + - STACK_SIZE=1: Generate binaries with the .stack_sizes sections. + +These optional environment variables are forwarded to the cmake call as +parameters: + - CMAKE_BUILD_TYPE + - CMAKE_C_FLAGS + - CMAKE_CXX_FLAGS + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_CROSSCOMPILING_EMULATOR + - CMAKE_FIND_ROOT_PATH + - CMAKE_EXE_LINKER_FLAGS + - CMAKE_MAKE_PROGRAM + - CMAKE_MODULE_LINKER_FLAGS + - CMAKE_SHARED_LINKER_FLAGS + - CMAKE_TOOLCHAIN_FILE + +Example: + BUILD_DIR=/tmp/build $0 opt +EOF + exit 1 + fi + + cmd="cmd_${cmd}" + shift + set -x + "${cmd}" "$@" +} + +main "$@" diff --git a/third-party/libjxl/libjxl/cmake/FindAtomics.cmake b/third-party/libjxl/libjxl/cmake/FindAtomics.cmake new file mode 100644 index 0000000000..9a6cdc39ec --- /dev/null +++ b/third-party/libjxl/libjxl/cmake/FindAtomics.cmake @@ -0,0 +1,53 @@ +# Original issue: +# * https://gitlab.kitware.com/cmake/cmake/-/issues/23021#note_1098733 +# +# For reference: +# * https://gcc.gnu.org/wiki/Atomic/GCCMM +# +# riscv64 specific: +# * https://lists.debian.org/debian-riscv/2022/01/msg00009.html +# +# ATOMICS_FOUND - system has c++ atomics +# ATOMICS_LIBRARIES - libraries needed to use c++ atomics + +include(CheckCXXSourceCompiles) + +# RISC-V only has 32-bit and 64-bit atomic instructions. GCC is supposed +# to convert smaller atomics to those larger ones via masking and +# shifting like LLVM, but it’s a known bug that it does not. This means +# anything that wants to use atomics on 1-byte or 2-byte types needs +# -latomic, but not 4-byte or 8-byte (though it does no harm). +set(atomic_code + " + #include + #include + std::atomic n8 (0); // riscv64 + std::atomic n64 (0); // armel, mipsel, powerpc + int main() { + ++n8; + ++n64; + return 0; + }") + +check_cxx_source_compiles("${atomic_code}" ATOMICS_LOCK_FREE_INSTRUCTIONS) + +if(ATOMICS_LOCK_FREE_INSTRUCTIONS) + set(ATOMICS_FOUND TRUE) + set(ATOMICS_LIBRARIES) +else() + set(CMAKE_REQUIRED_LIBRARIES "-latomic") + check_cxx_source_compiles("${atomic_code}" ATOMICS_IN_LIBRARY) + set(CMAKE_REQUIRED_LIBRARIES) + if(ATOMICS_IN_LIBRARY) + set(ATOMICS_LIBRARY atomic) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(Atomics DEFAULT_MSG ATOMICS_LIBRARY) + set(ATOMICS_LIBRARIES ${ATOMICS_LIBRARY}) + unset(ATOMICS_LIBRARY) + else() + if(Atomics_FIND_REQUIRED) + message(FATAL_ERROR "Neither lock free instructions nor -latomic found.") + endif() + endif() +endif() +unset(atomic_code) diff --git a/third-party/libjxl/libjxl/cmake/FindBrotli.cmake b/third-party/libjxl/libjxl/cmake/FindBrotli.cmake new file mode 100644 index 0000000000..9fb78e47d8 --- /dev/null +++ b/third-party/libjxl/libjxl/cmake/FindBrotli.cmake @@ -0,0 +1,75 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +set(brlibs brotlicommon brotlienc brotlidec) + +find_package(PkgConfig QUIET) +if (PkgConfig_FOUND) + foreach(brlib IN ITEMS ${brlibs}) + string(TOUPPER "${brlib}" BRPREFIX) + pkg_check_modules("PC_${BRPREFIX}" lib${brlib}) + endforeach() +endif() + +find_path(BROTLI_INCLUDE_DIR + NAMES brotli/decode.h + HINTS ${PC_BROTLICOMMON_INCLUDEDIR} ${PC_BROTLICOMMON_INCLUDE_DIRS} +) + +foreach(brlib IN ITEMS ${brlibs}) + string(TOUPPER "${brlib}" BRPREFIX) + find_library(${BRPREFIX}_LIBRARY + NAMES ${${BRPREFIX}_NAMES} ${brlib} + HINTS ${PC_${BRPREFIX}_LIBDIR} ${PC_${BRPREFIX}_LIBRARY_DIRS} + ) + + if (${BRPREFIX}_LIBRARY AND NOT TARGET ${brlib}) + if(CMAKE_VERSION VERSION_LESS "3.13.5") + add_library(${brlib} INTERFACE IMPORTED GLOBAL) + set_property(TARGET ${brlib} PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${BROTLI_INCLUDE_DIR}) + target_link_libraries(${brlib} INTERFACE ${${BRPREFIX}_LIBRARY}) + set_property(TARGET ${brlib} PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_${BRPREFIX}_CFLAGS_OTHER}) + else() + add_library(${brlib} INTERFACE IMPORTED GLOBAL) + target_include_directories(${brlib} + INTERFACE ${BROTLI_INCLUDE_DIR}) + target_link_libraries(${brlib} + INTERFACE ${${BRPREFIX}_LIBRARY}) + target_link_options(${brlib} + INTERFACE ${PC_${BRPREFIX}_LDFLAGS_OTHER}) + target_compile_options(${brlib} + INTERFACE ${PC_${BRPREFIX}_CFLAGS_OTHER}) + endif() + endif() +endforeach() + +if (BROTLICOMMON_FOUND AND BROTLIENC_FOUND AND BROTLIDEC_FOUND) + set(Brotli_FOUND ON) +else () + set(Brotli_FOUND OFF) +endif() + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Brotli + FOUND_VAR Brotli_FOUND + REQUIRED_VARS + BROTLI_INCLUDE_DIR + BROTLICOMMON_LIBRARY + BROTLIENC_LIBRARY + BROTLIDEC_LIBRARY + VERSION_VAR Brotli_VERSION +) + +mark_as_advanced( + BROTLI_INCLUDE_DIR + BROTLICOMMON_LIBRARY + BROTLIENC_LIBRARY + BROTLIDEC_LIBRARY +) + +if (Brotli_FOUND) + set(Brotli_LIBRARIES ${BROTLICOMMON_LIBRARY} ${BROTLIENC_LIBRARY} ${BROTLIDEC_LIBRARY}) + set(Brotli_INCLUDE_DIRS ${BROTLI_INCLUDE_DIR}) +endif() diff --git a/third-party/libjxl/libjxl/cmake/FindHWY.cmake b/third-party/libjxl/libjxl/cmake/FindHWY.cmake new file mode 100644 index 0000000000..c1deb9b851 --- /dev/null +++ b/third-party/libjxl/libjxl/cmake/FindHWY.cmake @@ -0,0 +1,66 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +find_package(PkgConfig QUIET) +if (PkgConfig_FOUND) + pkg_check_modules(PC_HWY QUIET libhwy) + set(HWY_VERSION ${PC_HWY_VERSION}) +endif () + +find_path(HWY_INCLUDE_DIR + NAMES hwy/highway.h + HINTS ${PC_HWY_INCLUDEDIR} ${PC_HWY_INCLUDE_DIRS} +) + +find_library(HWY_LIBRARY + NAMES ${HWY_NAMES} hwy + HINTS ${PC_HWY_LIBDIR} ${PC_HWY_LIBRARY_DIRS} +) + +if (HWY_INCLUDE_DIR AND NOT HWY_VERSION) + if (EXISTS "${HWY_INCLUDE_DIR}/hwy/highway.h") + file(READ "${HWY_INCLUDE_DIR}/hwy/highway.h" HWY_VERSION_CONTENT) + + string(REGEX MATCH "#define HWY_MAJOR +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}") + set(HWY_VERSION_MAJOR "${CMAKE_MATCH_1}") + + string(REGEX MATCH "#define +HWY_MINOR +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}") + set(HWY_VERSION_MINOR "${CMAKE_MATCH_1}") + + string(REGEX MATCH "#define +HWY_PATCH +([0-9]+)" _dummy "${HWY_VERSION_CONTENT}") + set(HWY_VERSION_PATCH "${CMAKE_MATCH_1}") + + set(HWY_VERSION "${HWY_VERSION_MAJOR}.${HWY_VERSION_MINOR}.${HWY_VERSION_PATCH}") + endif () +endif () + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(HWY + FOUND_VAR HWY_FOUND + REQUIRED_VARS HWY_LIBRARY HWY_INCLUDE_DIR + VERSION_VAR HWY_VERSION +) + +if (HWY_LIBRARY AND NOT TARGET hwy) + add_library(hwy INTERFACE IMPORTED GLOBAL) + + if(CMAKE_VERSION VERSION_LESS "3.13.5") + set_property(TARGET hwy PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${HWY_INCLUDE_DIR}) + target_link_libraries(hwy INTERFACE ${HWY_LIBRARY}) + set_property(TARGET hwy PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_HWY_CFLAGS_OTHER}) + else() + target_include_directories(hwy INTERFACE ${HWY_INCLUDE_DIR}) + target_link_libraries(hwy INTERFACE ${HWY_LIBRARY}) + target_link_options(hwy INTERFACE ${PC_HWY_LDFLAGS_OTHER}) + target_compile_options(hwy INTERFACE ${PC_HWY_CFLAGS_OTHER}) + endif() +endif() + +mark_as_advanced(HWY_INCLUDE_DIR HWY_LIBRARY) + +if (HWY_FOUND) + set(HWY_LIBRARIES ${HWY_LIBRARY}) + set(HWY_INCLUDE_DIRS ${HWY_INCLUDE_DIR}) +endif () diff --git a/third-party/libjxl/libjxl/cmake/FindLCMS2.cmake b/third-party/libjxl/libjxl/cmake/FindLCMS2.cmake new file mode 100644 index 0000000000..0a7b54eb96 --- /dev/null +++ b/third-party/libjxl/libjxl/cmake/FindLCMS2.cmake @@ -0,0 +1,59 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +find_package(PkgConfig QUIET) +if (PkgConfig_FOUND) + pkg_check_modules(PC_LCMS2 QUIET libLCMS2) + set(LCMS2_VERSION ${PC_LCMS2_VERSION}) +endif () + +find_path(LCMS2_INCLUDE_DIR + NAMES lcms2.h + HINTS ${PC_LCMS2_INCLUDEDIR} ${PC_LCMS2_INCLUDE_DIRS} +) + +find_library(LCMS2_LIBRARY + NAMES ${LCMS2_NAMES} lcms2 liblcms2 lcms-2 liblcms-2 + HINTS ${PC_LCMS2_LIBDIR} ${PC_LCMS2_LIBRARY_DIRS} +) + +if (LCMS2_INCLUDE_DIR AND NOT LCMS_VERSION) + file(READ ${LCMS2_INCLUDE_DIR}/lcms2.h LCMS2_VERSION_CONTENT) + string(REGEX MATCH "#define[ \t]+LCMS_VERSION[ \t]+([0-9]+)[ \t]*\n" LCMS2_VERSION_MATCH ${LCMS2_VERSION_CONTENT}) + if (LCMS2_VERSION_MATCH) + string(SUBSTRING ${CMAKE_MATCH_1} 0 1 LCMS2_VERSION_MAJOR) + string(SUBSTRING ${CMAKE_MATCH_1} 1 2 LCMS2_VERSION_MINOR) + set(LCMS2_VERSION "${LCMS2_VERSION_MAJOR}.${LCMS2_VERSION_MINOR}") + endif () +endif () + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(LCMS2 + FOUND_VAR LCMS2_FOUND + REQUIRED_VARS LCMS2_LIBRARY LCMS2_INCLUDE_DIR + VERSION_VAR LCMS2_VERSION +) + +if (LCMS2_LIBRARY AND NOT TARGET lcms2) + add_library(lcms2 INTERFACE IMPORTED GLOBAL) + + if(CMAKE_VERSION VERSION_LESS "3.13.5") + set_property(TARGET lcms2 PROPERTY INTERFACE_INCLUDE_DIRECTORIES ${LCMS2_INCLUDE_DIR}) + target_link_libraries(lcms2 INTERFACE ${LCMS2_LIBRARY}) + set_property(TARGET lcms2 PROPERTY INTERFACE_COMPILE_OPTIONS ${PC_LCMS2_CFLAGS_OTHER}) + else() + target_include_directories(lcms2 INTERFACE ${LCMS2_INCLUDE_DIR}) + target_link_libraries(lcms2 INTERFACE ${LCMS2_LIBRARY}) + target_link_options(lcms2 INTERFACE ${PC_LCMS2_LDFLAGS_OTHER}) + target_compile_options(lcms2 INTERFACE ${PC_LCMS2_CFLAGS_OTHER}) + endif() +endif() + +mark_as_advanced(LCMS2_INCLUDE_DIR LCMS2_LIBRARY) + +if (LCMS2_FOUND) + set(LCMS2_LIBRARIES ${LCMS2_LIBRARY}) + set(LCMS2_INCLUDE_DIRS ${LCMS2_INCLUDE_DIR}) +endif () diff --git a/third-party/libjxl/libjxl/debian/changelog b/third-party/libjxl/libjxl/debian/changelog new file mode 100644 index 0000000000..6fbaddf68a --- /dev/null +++ b/third-party/libjxl/libjxl/debian/changelog @@ -0,0 +1,95 @@ +jpeg-xl (0.9.0) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.9.0. + + -- JPEG XL Maintainers Wed, 11 Jan 2023 16:12:35 +0000 + +jpeg-xl (0.8) unstable; urgency=medium + + * Bump JPEG XL version to 0.8. + + -- JPEG XL Maintainers Wed, 11 Jan 2023 16:12:34 +0000 + +jpeg-xl (0.7) unstable; urgency=medium + + * Bump JPEG XL version to 0.7. + + -- JPEG XL Maintainers Mon, 08 Aug 2022 14:43:58 +0000 + +jpeg-xl (0.6) unstable; urgency=medium + + * Bump JPEG XL version to 0.6. + + -- JPEG XL Maintainers Fri, 10 Sep 2021 16:08:17 +0200 + +jpeg-xl (0.5.0) unstable; urgency=medium + + * Bump JPEG XL version to 0.5.0. + + -- JPEG XL Maintainers Thu, 12 Aug 2021 23:49:40 +0200 + +jpeg-xl (0.3.7) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.7. + + -- Sami Boukortt Mon, 29 Mar 2021 12:14:20 +0200 + +jpeg-xl (0.3.6) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.6. + + -- Sami Boukortt Thu, 25 Mar 2021 17:40:58 +0100 + +jpeg-xl (0.3.5) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.5. + + -- Sami Boukortt Tue, 23 Mar 2021 15:20:44 +0100 + +jpeg-xl (0.3.4) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.4. + + -- Sami Boukortt Tue, 16 Mar 2021 12:13:59 +0100 + +jpeg-xl (0.3.3) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.3. + + -- Sami Boukortt Fri, 5 Mar 2021 19:15:26 +0100 + +jpeg-xl (0.3.2) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.2. + + -- Alex Deymo Fri, 12 Feb 2021 21:00:12 +0100 + +jpeg-xl (0.3.1) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3.1. + + -- Alex Deymo Tue, 09 Feb 2021 09:48:43 +0100 + +jpeg-xl (0.3) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.3. + + -- Alex Deymo Wed, 27 Jan 2021 22:36:32 +0100 + +jpeg-xl (0.2) UNRELEASED; urgency=medium + + * Bump JPEG XL version to 0.2. + + -- Alex Deymo Wed, 23 Nov 2020 20:42:10 +0100 + +jpeg-xl (0.1) UNRELEASED; urgency=medium + + * JPEG XL format release candidate. + + -- Alex Deymo Fri, 13 Nov 2020 17:42:24 +0100 + +jpeg-xl (0.0.2-1) UNRELEASED; urgency=medium + + * Initial debian package. + + -- Alex Deymo Tue, 27 Oct 2020 15:27:59 +0100 diff --git a/third-party/libjxl/libjxl/debian/compat b/third-party/libjxl/libjxl/debian/compat new file mode 100644 index 0000000000..f599e28b8a --- /dev/null +++ b/third-party/libjxl/libjxl/debian/compat @@ -0,0 +1 @@ +10 diff --git a/third-party/libjxl/libjxl/debian/control b/third-party/libjxl/libjxl/debian/control new file mode 100644 index 0000000000..f5dc5ce0cc --- /dev/null +++ b/third-party/libjxl/libjxl/debian/control @@ -0,0 +1,88 @@ +Source: jpeg-xl +Maintainer: JPEG XL Maintainers +Section: misc +Priority: optional +Standards-Version: 3.9.8 +Build-Depends: + asciidoc, + cmake, + debhelper (>= 9), + libbrotli-dev, + libgdk-pixbuf-2.0-dev | libgdk-pixbuf2.0-dev, + libgif-dev, + libgimp2.0-dev, + libgmock-dev, + libgoogle-perftools-dev, + libgtest-dev, + libhwy-dev (>= 1.0.0), + libjpeg-dev, + libopenexr-dev, + libpng-dev, + libwebp-dev, + pkg-config, + xdg-utils, + xmlto, +Homepage: https://github.com/libjxl/libjxl +Rules-Requires-Root: no + +Package: jxl +Architecture: any +Section: utils +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: JPEG XL Image Coding System - "JXL" (command line utility) + The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and + lossless image compression format. It has a rich feature set and is + particularly optimized for responsive web environments, so that + content renders well on a wide range of devices. Moreover, it includes + several features that help transition from the legacy JPEG format. + . + This package installs the command line utilities. + +Package: libjxl-dev +Architecture: any +Section: libdevel +Depends: libjxl (= ${binary:Version}), ${misc:Depends} + libhwy-dev, +Description: JPEG XL Image Coding System - "JXL" (development files) + The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and + lossless image compression format. It has a rich feature set and is + particularly optimized for responsive web environments, so that + content renders well on a wide range of devices. Moreover, it includes + several features that help transition from the legacy JPEG format. + . + This package installs development files. + +Package: libjxl +Architecture: any +Multi-Arch: same +Section: libs +Depends: ${shlibs:Depends}, ${misc:Depends} +Pre-Depends: ${misc:Pre-Depends} +Description: JPEG XL Image Coding System - "JXL" (shared libraries) + The JPEG XL Image Coding System (ISO/IEC 18181) is a lossy and + lossless image compression format. It has a rich feature set and is + particularly optimized for responsive web environments, so that + content renders well on a wide range of devices. Moreover, it includes + several features that help transition from the legacy JPEG format. + . + This package installs shared libraries. + +Package: libjxl-gdk-pixbuf +Architecture: any +Multi-Arch: same +Section: libs +Depends: ${shlibs:Depends}, ${misc:Depends} +Pre-Depends: ${misc:Pre-Depends} +Description: JPEG XL Plugin for gdk-pixbuf + This package installs the required files for reading JPEG XL files in + GTK applications. + +Package: libjxl-gimp-plugin +Architecture: any +Multi-Arch: same +Section: graphics +Depends: ${shlibs:Depends}, ${misc:Depends} +Pre-Depends: ${misc:Pre-Depends} +Enhances: gimp +Description: JPEG XL Import and Export Plugin for GIMP + This is a plugin for GIMP version 2.10.x to import and export JPEG XL images. diff --git a/third-party/libjxl/libjxl/debian/copyright b/third-party/libjxl/libjxl/debian/copyright new file mode 100644 index 0000000000..7786a8775b --- /dev/null +++ b/third-party/libjxl/libjxl/debian/copyright @@ -0,0 +1,199 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: jpeg-xl + +Files: * +Copyright: 2020 the JPEG XL Project +License: BSD-3-clause + +Files: third_party/libjpeg-turbo/* +Copyright (C)2009-2023 D. R. Commander. All Rights Reserved. +Copyright (C)2015 Viktor Szathmáry. All Rights Reserved. +License: BSD-3-clause + +Files: third_party/sjpeg/* +Copyright: 2017 Google, Inc +License: Apache-2.0 + +Files: third_party/skcms/* +Copyright: 2018 Google Inc. +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + . + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Files: testdata/external/pngsuite/* +Copyright: Willem van Schaik, 1996, 2011 +License: PngSuite License + See http://www.schaik.com/pngsuite/ for details. + . + Permission to use, copy, modify and distribute these images for any + purpose and without fee is hereby granted. + +Files: testdata/external/raw.pixls/* +Copyright: their respective owners listed in https://raw.pixls.us/ +License: CC0-1.0 + +Files: testdata/external/wesaturate/* +Copyright: their respective owners listed in https://www.wesaturate.com/ +License: CC0-1.0 + +Files: testdata/external/wide-gamut-tests/ +Copyright: github.com/codelogic/wide-gamut-tests authors. +License: Apache-2.0 + +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the complete text of the Apache License, Version 2 + can be found in "/usr/share/common-licenses/Apache-2.0". + +License: CC0 + Creative Commons Zero v1.0 Universal + . + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL + SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT + RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" + BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS + DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS + LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE + INFORMATION OR WORKS PROVIDED HEREUNDER. + . + Statement of Purpose + . + The laws of most jurisdictions throughout the world automatically confer + exclusive Copyright and Related Rights (defined below) upon the creator and + subsequent owner(s) (each and all, an "owner") of an original work of + authorship and/or a database (each, a "Work"). + . + Certain owners wish to permanently relinquish those rights to a Work for the + purpose of contributing to a commons of creative, cultural and scientific + works ("Commons") that the public can reliably and without fear of later + claims of infringement build upon, modify, incorporate in other works, reuse + and redistribute as freely as possible in any form whatsoever and for any + purposes, including without limitation commercial purposes. These owners may + contribute to the Commons to promote the ideal of a free culture and the + further production of creative, cultural and scientific works, or to gain + reputation or greater distribution for their Work in part through the use + and efforts of others. + . + For these and/or other purposes and motivations, and without any expectation + of additional consideration or compensation, the person associating CC0 with + a Work (the "Affirmer"), to the extent that he or she is an owner of + Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to + the Work and publicly distribute the Work under its terms, with knowledge of + his or her Copyright and Related Rights in the Work and the meaning and + intended legal effect of CC0 on those rights. + . + 1. Copyright and Related Rights. A Work made available under CC0 may be + protected by copyright and related or neighboring rights ("Copyright and + Related Rights"). Copyright and Related Rights include, but are not limited + to, the following: + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); + iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national implementations + thereof. + . + 2. Waiver. To the greatest extent permitted by, but not in contravention of, + applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and + unconditionally waives, abandons, and surrenders all of Affirmer's Copyright + and Related Rights and associated claims and causes of action, whether now + known or unknown (including existing as well as future claims and causes of + action), in the Work (i) in all territories worldwide, (ii) for the maximum + duration provided by applicable law or treaty (including future time + extensions), (iii) in any current or future medium and for any number of + copies, and (iv) for any purpose whatsoever, including without limitation + commercial, advertising or promotional purposes (the "Waiver"). Affirmer + makes the Waiver for the benefit of each member of the public at large and + to the detriment of Affirmer's heirs and successors, fully intending that + such Waiver shall not be subject to revocation, rescission, cancellation, + termination, or any other legal or equitable action to disrupt the quiet + enjoyment of the Work by the public as contemplated by Affirmer's express + Statement of Purpose. + . + 3. Public License Fallback. Should any part of the Waiver for any reason be + judged legally invalid or ineffective under applicable law, then the Waiver + shall be preserved to the maximum extent permitted taking into account + Affirmer's express Statement of Purpose. In addition, to the extent the + Waiver is so judged Affirmer hereby grants to each affected person a + royalty-free, non transferable, non sublicensable, non exclusive, + irrevocable and unconditional license to exercise Affirmer's Copyright and + Related Rights in the Work (i) in all territories worldwide, (ii) for the + maximum duration provided by applicable law or treaty (including future time + extensions), (iii) in any current or future medium and for any number of + copies, and (iv) for any purpose whatsoever, including without limitation + commercial, advertising or promotional purposes (the "License"). The License + shall be deemed effective as of the date CC0 was applied by Affirmer to the + Work. Should any part of the License for any reason be judged legally + invalid or ineffective under applicable law, such partial invalidity or + ineffectiveness shall not invalidate the remainder of the License, and in + such case Affirmer hereby affirms that he or she will not (i) exercise any + of his or her remaining Copyright and Related Rights in the Work or (ii) + assert any associated claims and causes of action with respect to the Work, + in either case contrary to Affirmer's express Statement of Purpose. + . + 4. Limitations and Disclaimers. + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, statutory or + otherwise, including without limitation warranties of title, + merchantability, fitness for a particular purpose, non infringement, or the + absence of latent or other defects, accuracy, or the present or absence of + errors, whether or not discoverable, all to the greatest extent permissible + under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + . + For more information, please see: + http://creativecommons.org/publicdomain/zero/1.0/> + diff --git a/third-party/libjxl/libjxl/debian/jxl.install b/third-party/libjxl/libjxl/debian/jxl.install new file mode 100644 index 0000000000..c3bae3ed10 --- /dev/null +++ b/third-party/libjxl/libjxl/debian/jxl.install @@ -0,0 +1,3 @@ +usr/bin/* +usr/share/man/man1/cjxl.1 +usr/share/man/man1/djxl.1 diff --git a/third-party/libjxl/libjxl/debian/libjxl-dev.install b/third-party/libjxl/libjxl/debian/libjxl-dev.install new file mode 100644 index 0000000000..b735ec2c26 --- /dev/null +++ b/third-party/libjxl/libjxl/debian/libjxl-dev.install @@ -0,0 +1,4 @@ +usr/include/jxl/*.h +usr/lib/*/*.a +usr/lib/*/*.so +usr/lib/*/pkgconfig/*.pc diff --git a/third-party/libjxl/libjxl/debian/libjxl-gdk-pixbuf.install b/third-party/libjxl/libjxl/debian/libjxl-gdk-pixbuf.install new file mode 100644 index 0000000000..12d2ab250f --- /dev/null +++ b/third-party/libjxl/libjxl/debian/libjxl-gdk-pixbuf.install @@ -0,0 +1,3 @@ +usr/lib/*/gdk-pixbuf-*/*/loaders/* +usr/share/mime/packages/image-jxl.xml +usr/share/thumbnailers/jxl.thumbnailer diff --git a/third-party/libjxl/libjxl/debian/libjxl-gimp-plugin.install b/third-party/libjxl/libjxl/debian/libjxl-gimp-plugin.install new file mode 100644 index 0000000000..353431dba3 --- /dev/null +++ b/third-party/libjxl/libjxl/debian/libjxl-gimp-plugin.install @@ -0,0 +1 @@ +usr/lib/gimp diff --git a/third-party/libjxl/libjxl/debian/libjxl.install b/third-party/libjxl/libjxl/debian/libjxl.install new file mode 100644 index 0000000000..cd157a7a5c --- /dev/null +++ b/third-party/libjxl/libjxl/debian/libjxl.install @@ -0,0 +1 @@ +usr/lib/*/libjxl*.so.* diff --git a/third-party/libjxl/libjxl/debian/rules b/third-party/libjxl/libjxl/debian/rules new file mode 100755 index 0000000000..6259dbfc61 --- /dev/null +++ b/third-party/libjxl/libjxl/debian/rules @@ -0,0 +1,21 @@ +#!/usr/bin/make -f + +include /usr/share/dpkg/pkg-info.mk + +%: + dh $@ --buildsystem=cmake + +override_dh_auto_configure: + # TODO(deymo): Remove the DCMAKE_BUILD_TYPE once builds without NDEBUG + # are as useful as Release builds. + # TODO(szabadka) Re-enable jpegli after tests are fixed on Ubuntu 20.04, + # and debian:buster + dh_auto_configure -- \ + -DJPEGXL_VERSION=$(DEB_VERSION) \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DJPEGXL_FORCE_SYSTEM_GTEST=ON \ + -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \ + -DJPEGXL_FORCE_SYSTEM_HWY=ON \ + -DJPEGXL_ENABLE_JPEGLI=OFF \ + -DJPEGXL_ENABLE_JPEGLI_LIBJPEG=OFF \ + -DJPEGXL_ENABLE_PLUGINS=ON diff --git a/third-party/libjxl/libjxl/debian/source/format b/third-party/libjxl/libjxl/debian/source/format new file mode 100644 index 0000000000..163aaf8d82 --- /dev/null +++ b/third-party/libjxl/libjxl/debian/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/third-party/libjxl/libjxl/deps.sh b/third-party/libjxl/libjxl/deps.sh new file mode 100755 index 0000000000..cfca027fcb --- /dev/null +++ b/third-party/libjxl/libjxl/deps.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# This file downloads the dependencies needed to build JPEG XL into third_party. +# These dependencies are normally pulled by gtest. + +set -eu + +MYDIR=$(dirname $(realpath "$0")) + +# Git revisions we use for the given submodules. Update these whenever you +# update a git submodule. +THIRD_PARTY_BROTLI="36533a866ed1ca4b75cf049f4521e4ec5fe24727" +THIRD_PARTY_HIGHWAY="591ad359a5aa6c320951ebd35f839604c87abe6c" +THIRD_PARTY_SKCMS="b25b07b4b07990811de121c0356155b2ba0f4318" +THIRD_PARTY_SJPEG="e5ab13008bb214deb66d5f3e17ca2f8dbff150bf" +THIRD_PARTY_ZLIB="cacf7f1d4e3d44d871b605da3b647f07d718623f" +THIRD_PARTY_LIBPNG="a40189cf881e9f0db80511c382292a5604c3c3d1" +THIRD_PARTY_LIBJPEG_TURBO="8ecba3647edb6dd940463fedf38ca33a8e2a73d1" # 2.1.5.1 + +# Download the target revision from GitHub. +download_github() { + local path="$1" + local project="$2" + + local varname=`echo "$path" | tr '[:lower:]' '[:upper:]'` + varname="${varname/[\/-]/_}" + local sha + eval "sha=\${${varname}}" + + local down_dir="${MYDIR}/downloads" + local local_fn="${down_dir}/${sha}.tar.gz" + if [[ -e "${local_fn}" && -d "${MYDIR}/${path}" ]]; then + echo "${path} already up to date." >&2 + return 0 + fi + + local url + local strip_components=0 + if [[ "${project:0:4}" == "http" ]]; then + # "project" is a googlesource.com base url. + url="${project}${sha}.tar.gz" + else + # GitHub files have a top-level directory + strip_components=1 + url="https://github.com/${project}/tarball/${sha}" + fi + + echo "Downloading ${path} version ${sha}..." >&2 + mkdir -p "${down_dir}" + curl -L --show-error -o "${local_fn}.tmp" "${url}" + mkdir -p "${MYDIR}/${path}" + tar -zxf "${local_fn}.tmp" -C "${MYDIR}/${path}" \ + --strip-components="${strip_components}" + mv "${local_fn}.tmp" "${local_fn}" +} + +is_git_repository() { + local dir="$1" + local toplevel=$(git rev-parse --show-toplevel) + + [[ "${dir}" == "${toplevel}" ]] +} + + +main() { + if is_git_repository "${MYDIR}"; then + cat >&2 <> $EMSDK/.emscripten + +# Assuming you are in the root level of the cloned libjxl repo, +# either build with regular WASM: +BUILD_TARGET=wasm32 emconfigure ./ci.sh release +# or with SIMD WASM: +BUILD_TARGET=wasm32 ENABLE_WASM_SIMD=1 emconfigure ./ci.sh release +``` + +## Example site + +Once you have build the wasm binary, you can give it a try by building a site +that decodes jxl images, see [wasm_demo](../tools/wasm_demo/README.md). diff --git a/third-party/libjxl/libjxl/doc/color_management.md b/third-party/libjxl/libjxl/doc/color_management.md new file mode 100644 index 0000000000..88a7b60afa --- /dev/null +++ b/third-party/libjxl/libjxl/doc/color_management.md @@ -0,0 +1,68 @@ +# Color Management + +[TOC] + + + +## Why + +The vast majority of web images are still sRGB. However, wide-gamut material is +increasingly being produced (photography, cinema, 4K). Screens covering most of +the Adobe RGB gamut are readily available and some also cover most of DCI P3 +(iPhone, Pixel2) or even BT.2020. + +Currently, after a camera records a very saturated red pixel, most raw +processors would clip it to the rather small sRGB gamut before saving as JPEG. +In keeping with our high-quality goal, we prevent such loss by allowing wider +input color spaces. + +## Which color space + +Even wide gamuts could be expressed relative to the sRGB primaries, but the +resulting coordinates may be outside the valid 0..1 range. Surprisingly, such +'unbounded' coordinates can be passed through color transforms provided the +transfer functions are expressed as parametric functions (not lookup tables). +However, most image file formats (including PNG and PNM) lack min/max metadata +and thus do not support unbounded coordinates. + +Instead, we need a larger working gamut to ensure most pixel coordinates are +within bounds and thus not clipped. However, larger gamuts result in lower +precision/resolution when using <= 16 bit encodings (as opposed to 32-bit float +in PFM). BT.2100 or P3 DCI appear to be good compromises. + +## CMS library + +Transforms with unbounded pixels are desirable because they reduce round-trip +error in tests. This requires parametric curves, which are only supported for +the common sRGB case in ICC v4 profiles. ArgyllCMS does not support v4. The +other popular open-source CMS is LittleCMS. It is also used by color-managed +editors (Krita/darktable), which increases the chances of interoperability. +However, LCMS has race conditions and overflow issues that prevent fuzzing. We +will later switch to the newer skcms. Note that this library does not intend to +support multiProcessElements, so HDR transfer functions cannot be represented +accurately. Thus in the long term, we will probably migrate away from ICC +profiles entirely. + +## Which viewer + +On Linux, Krita and darktable support loading our PNG output images and their +ICC profile. + +## How to compress/decompress + +### Embedded ICC profile + +- Create an 8-bit or 16-bit PNG with an iCCP chunk, e.g. using darktable. +- Pass it to `cjxl`, then `djxl` with no special arguments. The decoded output + will have the same bit depth (can override with `--output_bit_depth`) and + color space. + +### Images without metadata (e.g. HDR) + +- Create a PGM/PPM/PFM file in a known color space. +- Invoke `cjxl` with `-x color_space=RGB_D65_202_Rel_Lin` (linear 2020). For + details/possible values, see color_encoding.cc `Description`. +- Invoke `djxl` as above with no special arguments. diff --git a/third-party/libjxl/libjxl/doc/developing_in_debian.md b/third-party/libjxl/libjxl/doc/developing_in_debian.md new file mode 100644 index 0000000000..5b2bbd335c --- /dev/null +++ b/third-party/libjxl/libjxl/doc/developing_in_debian.md @@ -0,0 +1,56 @@ +# Developing in Debian + +These instructions assume an up-to-date Debian/Ubuntu system. +For other platforms, please instead use the following: + +* [Cross Compiling for Windows with Crossroad](developing_with_crossroad.md). + +## Minimum build dependencies + +Apart from the dependencies in `third_party`, some of the tools use external +dependencies that need to be installed on your system first: + +```bash +sudo apt install cmake clang doxygen g++ extra-cmake-modules \ + libgif-dev libjpeg-dev ninja-build libgoogle-perftools-dev +``` + +Make sure your default `clang` compiler is at least version 6 by running + +```bash +clang --version +``` + +If it still shows an old version despite having, for example, `clang-7` installed, you need +to update the default `clang` compiler. On Debian-based systems run: + +```bash +sudo update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-7 100 +sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-7 100 +``` + +Optionally, to compile some of the extra tool support and tests you can install +the following packages: + +```bash +sudo apt install qt6-base-dev libwebp-dev libgimp2.0-dev libopenexr-dev \ + libgtest-dev libgmock-dev libbenchmark-dev libbenchmark-tools +``` + +For the lint/coverage commands, you will also need additional packages: + +```bash +sudo apt install clang-format clang-tidy curl parallel gcovr +``` + +## Building + +The `libjxl` project uses CMake to build. We provide a script that simplifies the +invocation. To build and test the project, run + +```bash +./ci.sh opt +``` + +This writes binaries to `build/tools` and runs unit tests. More information +on [build modes and testing](building_and_testing.md) is available. diff --git a/third-party/libjxl/libjxl/doc/developing_in_github.md b/third-party/libjxl/libjxl/doc/developing_in_github.md new file mode 100644 index 0000000000..ecda64fc85 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/developing_in_github.md @@ -0,0 +1,357 @@ +# Developing in GitHub + +This document describes the development steps related to handling the git +repository. + +If you are new to GitHub, there's a nice [quickstart +guide](https://docs.github.com/en/github/getting-started-with-github/quickstart) +on GitHub explaining the basics. + +## Initial setup + +You need to perform this set up at least once if you haven't use GitHub before. +Read through the quickstart guide [Set up +Git](https://docs.github.com/en/github/getting-started-with-github/set-up-git) +page to get your git up and running. You will need to Fork a repository next. +After that "Life of a Pull Request" describes the common everyday workflows. + +### Configure your SSH access + +The easiest way to configure access to your Github repository is to use SSH +keys. For that you need an SSH private and public key, ideally a strong one. You +can use different keys for different sites if you want. In this example, we will +create one for using in GitHub only. + +Create the `~/.ssh/id_rsa_github` file executing the following. (Here and +elsewhere, {{X}} are placeholders for your email/username) + +```bash +ssh-keygen -t rsa -b 4096 -C "{{EMAIL}}" -f ~/.ssh/id_rsa_github +``` + +Go to your [SSH and GPG keys](https://github.com/settings/keys) settings and +paste the contents of your *public key* (the one ending in `.pub`), that would +be the output of this command: + +```bash +cat ~/.ssh/id_rsa_github.pub +``` + +To use a specific key when SSHing to the github.com domain, you can add this +snippet of config to your .ssh/config file executing the following. + +```bash +cat >> ~/.ssh/config <github.com/*{{USERNAME}}*/libjxl + +where {{USERNAME}} denotes your GitHub username. + +### Checkout the JPEG XL code from GitHub + +To get the source code on your computer you need to "clone" it. There are two +repositories at play here, the upstream repository (`libjxl/lbjxl`) and your +fork (`{{USERNAME}}/libjxl`). You will be normally fetching new changes from +the upstream repository and push changes to your fork. Getting your changes from +your fork to the upstream repository is done through the Web interface, via Pull +Requests. + +The [Fork a +repo](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) +goes in great detail, but uses the git remote names `upstream` for the shared +upstream repository and `origin` for your work. This guide proposes an +alternative naming scheme, used in the examples below. + +In this guide `origin` is the upstream shared repository and `myfork` is your +fork. You can use any other name for your fork if you want. Use the following +commands to set things up, replacing `{{USERNAME}}` with your GitHub username: + +```bash +git clone git https://github.com/libjxl/libjxl --recursive +cd libjxl +git remote set-url --push origin git@github.com:{{USERNAME}}/libjxl.git +git remote add myfork git@github.com:{{USERNAME}}/libjxl.git +git remote -vv +``` + +These commands did three things: + + * Created the repository with `origin` as the upstream remote, + * Changed the "push" URL to point to your fork, and + * Create a new remote pointing to your fork. + +The last step is optional. Since the "fetch" URL of `origin` points to the +shared repository and the "push" URL points to your fork, fetching from `origin` +always gets the latest changes from the upstream repository regardless of the +contents of your fork. + +Having a second origin called `myfork` is only useful if you need to download +pending changes from your fork from a different computer. For example, if you +work on multiple computers, each one with this setup, you can push to your +fork from one, and then fetch from `myfork` from another computer to get those. + +# Life of a Pull Request + +The general [GitHub flow +guide](https://docs.github.com/en/github/getting-started-with-github/github-flow) +applies to sending Pull Requests to this project. + +All the commands here assume you are in a git checkout as setup here. + +### Sync to the latest version + +```bash +git fetch origin +``` + +The last upstream version is now on `origin/main` and none of your local +branches have been modified by this command. + +### Start a new branch + +To start a new change you need a local branch. Each branch will represent a list +of individual commits which can then be requested to be merged as a single merge +request. So in general one branch is one code review, but each branch can have +multiple individual commits in it. + +```bash +git checkout origin/main -b mybranch +``` + +This will create a new branch `mybranch` tracking `origin/main`. A branch can +track any remove or local branch, which is used by some tools. Running `git +branch -vv` will show all the branches you have have, what are they tracking and +how many commits are ahead or behind. If you create a branch without tracking +any other, you can add or change the tracking branch of the current branch +running `git branch --set-upstream-to=...`. + +### Add changes to your branch + +Follow any of the many online tutorials, for example +[The basics](https://git-scm.com/book/en/v2/Git-Basics-Getting-a-Git-Repository) +chapter from the https://git-scm.com/doc website is a good starting guide. +Create, change or delete files and do a git commit with a message. + +The commit message is required. A commit message should follow the 50/72 rule: + +* First line is 50 characters or less. +* Then a blank line. +* Remaining text should be wrapped at 72 characters. + +The first line should identify your commit, since that's what most tools will +show to the user. First lines like "Some fixes" are not useful. Explain what the +commit contains and why. + +We follow the [Google C++ Coding +Style](https://google.github.io/styleguide/cppguide.html). A +[clang-format](https://clang.llvm.org/docs/ClangFormat.html) configuration +file is available to automatically format your code, you can invoke it with +the `./ci.sh lint` helper tool. + +Read the [CONTRIBUTING.md](../CONTRIBUTING.md) file for more information about +contributing to libjxl. + +### Upload your changes for review + +The first step is a local review of your changes to see what will you be sending +for review. `gitg` is a nice Gtk UI for reviewing your local changes, or `tig` +for similar ncurses console-based interface. Otherwise, from the terminal you +can run: + +```bash +git branch -vv +``` + +To show the current status of your local branches. In particular, since your +branch is tracking origin/main (as seen in the output) git will tell you that +you are one commit ahead of the tracking branch. + +``` +* mybranch e74ae1a [origin/main: ahead 1] Improved decoding speed by 40% +``` + +It is a good idea before uploading to sync again with upstream (`git fetch +origin`) and then run `git branch -vv` to check whether there are new changes +upstream. If that is the case, you will see a "behind" flag in the output: + +``` +* mybranch e74ae1a [origin/main: ahead 1, behind 2] Improved decoding speed by 40% +``` + +To sync your changes on top of the latest changes in upstream you need to +rebase: + +```bash +git rebase +``` + +This will by default rebase your current branch changes on top of the tracking +branch. In this case, this will try to apply the current commit on top of the +latest origin/main (which has 2 more commits than the ones we have in our +branch) and your branch will now include that. There could be conflicts that you +have to deal with. A shortcut to do both fetch and rebase is to run `git pull +-r`, where the `-r` stands for "rebase" and will rebase the local commits on top +of the remote ones. + +Before uploading a patch, make sure your patch conforms to the +[contributing guidelines](../CONTRIBUTING.md) and it +[builds and passes tests](building_and_testing.md). + +Once you are ready to send your branch for review, upload it to *your* fork: + +```bash +git push origin mybranch +``` + +This will push your local branch "mybranch" to a remote in your fork called +"mybranch". The name can be anything, but keep in mind that it is public. A link +to the URL to create a merge request will be displayed. + +``` +Enumerating objects: 627, done. +Counting objects: 100% (627/627), done. +Delta compression using up to 56 threads +Compressing objects: 100% (388/388), done. +Writing objects: 100% (389/389), 10.71 MiB | 8.34 MiB/s, done. +Total 389 (delta 236), reused 0 (delta 0) +emote: +remote: Create a pull request for 'mybranch' on GitHub by visiting: +remote: https://github.com/{{USERNAME}}/libjxl/pull/new/mybranch +remote: +To github.com:{{USERNAME}}/libjxl.git + * [new branch] mybranch -> mybranch +``` + +### Updating submodules + +The repository uses submodules for external library dependencies in +third_party. Each submodule points to a particular external commit of the +external repository by the hash code of that external commit. Just like +regular source code files, this hash code is part of the current branch and +jpeg xl commit you have checked out. + +When changing branches or when doing `git rebase`, git will unfortunately +*not* automatically set those hashes to the ones of the branch or jpeg xl +commit you changed to nor set the source files of the third_party submodules +to the new state. That is, even though git will have updated the jpeg xl +source code files on your disk to the new ones, it will leave the submodule +hashes and the files in third_party in your workspace to the ones they were +before you changed branches. This will show up in a git diff because this +is seen as a change compared to the branch you switched to. The git diff shows +the difference in hash codes (as if you are changing to the old ones), it does +not show changes in files inside the third_party directory. + +This mismatch can cause at least two problems: + +*) the jpeg xl codebase may not compile due to third_party library version +mismatch if e.g. API changed or a submodule was added/removed. + +*) when using `commit -a` your commit, which may be a technical change +unrelated to submodule changes, will unintentionally contain a change to the +submodules hash code, which is undesired unless you actually want to change +the version of third_party libraries. + +To resolve this, the submodules must be updated manually with +the following command after those actions (at least when the submodules +changed): + +``` +git submodule update --init --recursive +``` + +Here, the init flag ensures new modules get added when encessary and the +recursive flag is required for the submodules depending on other submodules. + +If you checkout a different branch, you can spot that submodules changed +when it shows a message similar to this: + +``` +M third_party/brotli +M third_party/lcms +``` + +If you do a rebase you may end up in a harder to solve situation, where +`git submodule update --init --recursive` itself fails with errors such as: + +``` +Unable to checkout '35ef5c554d888bef217d449346067de05e269b30' in submodule path 'third_party/brotli' +``` + +In that case, you can use the force flag: + +``` +git submodule update --init --recursive --force +``` + +### Iterating changes in your merge request + +To address reviewer changes you need to amend the local changes in your branch +first. Make the changes you need in your commit locally by running `git commit +--amend file1 file2 file3 ...` or `git commit --amend -a` to amend all the +changes from all the staged files. + +Once you have the new version of the "mybranch" branch to re-upload, you need to +force push it to the same branch in your fork. Since you are pushing a different +version of the same commit (as opposed to another commit on top of the existing +ones), you need to force the operation to replace the old version. + +```bash +git push origin mybranch --force +``` + +The merge request should now be updated with the new changes. + +### Merging your changes + +We use "rebase" as a merge policy, which means that there a no "merge" commits +(commits with more than one parent) but instead only a linear history of +changes. + +It is possible that other changes where added to the main branch since the last +time you rebased your changes. These changes could create a conflict with your +Pull Request, if so you need to `git fetch`, `git rebase` and push again your +changes which need to go through the continuous integration workflow again to +verify that all the tests pass again after including the latest changes. + +### Trying locally a pending Pull Request + +If you want to review in your computer a pending pull request proposed by +another user you can fetch the merge request commit with the following command, +replacing `NNNN` with the pull request number: + +```bash +git fetch origin refs/pull/NNNN/head +git checkout FETCH_HEAD +``` + +The first command will add to your local git repository the remote commit for +the pending pull request and store a temporary reference called `FETCH_HEAD`. +The second command then checks out that reference. From this point you can +review the files in your computer, create a local branch for this FETCH_HEAD or +build on top of it. diff --git a/third-party/libjxl/libjxl/doc/developing_in_windows_msys.md b/third-party/libjxl/libjxl/doc/developing_in_windows_msys.md new file mode 100644 index 0000000000..3e86d5dd86 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/developing_in_windows_msys.md @@ -0,0 +1,168 @@ +# Developing for Windows with MSYS2 + +[MSYS2](https://www.msys2.org/) ("minimal system 2") is a software distribution and a development platform based on MinGW and Cygwin. It provides a Unix-like environment to build code on Windows. These instructions were written with a 64-bit instance of Windows 10 running on a VM. They may also work on native instances of Windows and other versions of Windows. + +## Build Environments + +MSYS2 provides multiple development [environments](https://www.msys2.org/docs/environments/). By convention, they are referred to in uppercase. They target slightly different platforms, runtime libraries, and compiler toolchains. For example, to build for 32-bit Windows, use the MINGW32 environment. For interoperability with Visual Studio projects, use the UCRT64 environment. + +Since all of the build environments are built on top of the MSYS environment, **all updates and package installation must be done from within the MSYS environment**. After making any package changes, `exit` all MSYS2 terminals and restart the desired build-environment. This reminder is repeated multiple times throughout this guide. + +* **MINGW32:** To compile for 32-bit Windows (on 64-bit Windows), use packages from the `mingw32` group. Package names are prefixed with `mingw-w64-i686`. The naming scheme may be different on the 32-bit version of MSYS2. + +* **MINGW64:** This is the primary environment to building for 64-bit Windows. It uses the older MSVCRT runtime, which is widely available across Windows systems. Package names are prefixed with `mingw-w64-x86_64`. + +* **UCRT64:** The Universal C Runtime (UCRT) is used by recent versions of Microsoft Visual Studio. It ships by default with Windows 10. For older versions of Windows, it must be provided with the application or installed by the user. Package names are prefixed with `mingw-w64-ucrt-x86_64`. + +* **CLANG64:** Unfortunately, the `gimp` packages are not available for the CLANG64 environment. However, `libjxl` will otherwise build in this environment if the appropriate packages are installed. Packages are prefixed with `mingw-w64-clang-x86_64`. + +## Install and Upgrade MSYS2 + +Download MSYS2 from the homepage. Install at a location without any spaces on a drive with ample free space. After installing the packages used in this guide, MSYS2 used about 15GB of space. + +Toward the end of installation, select the option to run MSYS2 now. A command-line window will open. Run the following command, and answer the prompts to update the repository and close the terminal. + +```bash +pacman -Syu +``` + +Now restart the MSYS environment and run the following command to complete updates: + +```bash +pacman -Su +``` + +## Package Management + +Packages are organized in groups, which share the build environment name, but in lower case. Then they have name prefixes that indicate which group they belong to. Consider this package search: `pacman -Ss cmake` + +``` +mingw32/mingw-w64-i686-cmake +mingw64/mingw-w64-x86_64-cmake +ucrt64/mingw-w64-ucrt-x86_64-cmake +clang64/mingw-w64-clang-x86_64-cmake +msys/cmake +``` + +We can see the organization `group/prefix-name`. When installing packages, the group name is optional. + +```bash +pacman -S mingw-w64-x86_64-cmake +``` + +For tools that need to be aware of the compiler to function, install the package that corresponds with the specific build-environment you plan to use. For `cmake`, install the `mingw64` version. The generic `msys/cmake` will not function correctly because it will not find the compiler. For other tools, the generic `msys` version is adequate, like `msys/git`. + +To remove packages, use: + +```bash +pacman -Rsc [package-name] +``` + +## Worst-Case Scenario... + +If packages management is done within a build environment other than MSYS, the environment structure will be disrupted and compilation will likely fail. If this happens, it may be necessary to reinstall MSYS2. + +1. Rename the `msys64` folder to `msys64.bak`. + +2. Use the installer to reinstall MSYS2 to `msys64`. + +3. Copy packages from `msys64.bak/var/cache/pacman/pkg/` to the new installation to save download time and bandwidth. + +4. Use `pacman` from within the MSYS environment to install and update packages. + +5. After successfully building a project, it is safe to delete `msys64.bak` + +## The MING64 Environment + +Next set up the MING64 environment. The following commands should be run within the MSYS environment. `pacman -S` is used to install packages. The `--needed` argument prevents packages from being reinstalled. + +```bash +pacman -S --needed base-devel mingw-w64-x86_64-toolchain +pacman -S git mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja \ + mingw-w64-x86_64-gtest mingw-w64-x86_64-giflib \ + mingw-w64-x86_64-libpng mingw-w64-x86_64-libjpeg-turbo +``` + +## Build `libjxl` + +Download the source from the libjxl [releases](https://github.com/libjxl/libjxl/releases) page. Alternatively, you may obtain the latest development version with `git`. Run `./deps.sh` to ensure additional third-party dependencies are downloaded. + +Start the MINGW64 environment, create a build directory within the source directory, and configure with `cmake`. + +```bash +mkdir build +cd build +cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \ + -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_PLUGINS=ON \ + -DJPEGXL_ENABLE_MANPAGES=OFF -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \ + -DJPEGXL_FORCE_SYSTEM_GTEST=ON .. +``` + +Check the output to see if any dependencies were missed and need to be installed. Adding `-G Ninja` may be helpful, but on my computer, Ninja was selected by default. Remember that package changes must be done from the MSYS environment. Then exit all MSYS2 terminals and restart the build environment. + +If all went well, you may now run `cmake` to build `libjxl`: + +```bash +cmake --build . +``` + +Do not be alarmed by the compiler warnings. They are a caused by differences between gcc/g++ and clang. The build should complete successfully. Then `cjxl`, `djxl`, `jxlinfo`, and others can be run from within the build environment. Moving them into the native Windows environment requires resolving `dll` issues that are beyond the scope of this document. + +## The `clang` Compiler + +To use the `clang` compiler, install the packages that correspond with the environment you wish to use. Remember to make package changes from within the MSYS environment. + +``` +mingw-w64-i686-clang +mingw-w64-i686-clang-tools-extra +mingw-w64-i686-clang-compiler-rt + +mingw-w64-x86_64-clang +mingw-w64-x86_64-clang-tools-extra +mingw-w64-x86_64-clang-compiler-rt + +mingw-w64-ucrt64-x86_64-clang +mingw-w64-ucrt64-x86_64-clang-tools-extra +mingw-w64-ucrt64-x86_64-clang-compiler-rt +``` + +After the `clang` compiler is installed, 'libjxl' can be built with the `./ci.sh` script. + +```bash +./ci.sh opt -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \ + -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_MANPAGES=OFF \ + -DJPEGXL_FORCE_SYSTEM_BROTLI=ON -DJPEGXL_FORCE_SYSTEM_GTEST=ON +``` + +On my computer, `doxygen` packages needed to be installed to proceed with building. Use `pacman -Ss doxygen` to find the packages to install. + +## The GIMP Plugin + +To build the GIMP plugin, install the relevant `gimp` package. This will also install dependencies. Again, perform package management tasks from only the MSYS environment. Then restart the build environment. + +```bash +pacman -S mingw-w64-i686-gimp +pacman -S mingw-w64-x86_64-gimp +pacman -S mingw-w64-ucrt-x86_64-gimp +``` + +If `clang` is installed, you can use the `./ci.sh` script to build. Otherwise, navigate to the build directory to reconfigure and build with `cmake`. + +```bash +cd build +rm -r CM* +cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \ + -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_MANPAGES=OFF \ + -DJPEGXL_ENABLE_PLUGINS=ON -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \ + -DJPEGXL_FORCE_SYSTEM_GTEST=ON .. +``` + +The plugin is built statically, so there should be no need to install `dll` files. To try out the plugin: + +1. [Download](https://www.gimp.org/downloads/) and install the stable version of GIMP (currently 2.10.24). + +2. Create a new folder: `C:\Program Files\GIMP 2\lib\gimp\2.0\plug-ins\file-jxl` + +3. Copy `build/plugins/gimp/file-jxl.exe` to the new folder. diff --git a/third-party/libjxl/libjxl/doc/developing_in_windows_vcpkg.md b/third-party/libjxl/libjxl/doc/developing_in_windows_vcpkg.md new file mode 100644 index 0000000000..a897be29a3 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/developing_in_windows_vcpkg.md @@ -0,0 +1,90 @@ +# Developing on Windows with Visual Studio 2019 + +These instructions assume an up-to-date Windows 10 (e.g. build 19041.928) with +**Microsoft Visual Studio 2019** (e.g. Version 16.9.0 Preview 4.0) installed. If +unavailable, please use another build environment: + +* [MSYS2 on Windows](developing_in_windows_msys.md) +* [Crossroad on Linux](developing_with_crossroad.md) (cross compilation for Windows) + +## Minimum build dependencies + +Apart from the dependencies in third_party, some of the tools use external +dependencies that need to be installed in your system first. + +Please install [vcpkg](https://vcpkg.readthedocs.io/en/latest/examples/installing-and-using-packages/) +(tested with version 2019.07.18), and use it to install the following libraries: + +``` +vcpkg install gtest:x64-windows +vcpkg install giflib:x64-windows +vcpkg install libjpeg-turbo:x64-windows +vcpkg install libpng:x64-windows +vcpkg install zlib:x64-windows +``` + +## Building + +From Visual Studio, open the CMakeLists.txt in the JPEG XL root directory. +Right-click the CMakeLists.txt entry in the Folder View of the Solution +Explorer. In the context menu, select CMake Settings. Click on the green plus +to add an x64-Clang configuration and the red minus to remove any non-Clang +configuration (the MSVC compiler is currently not supported). Click on the blue +hyperlink marked "CMakeSettings.json" and an editor will open. Insert the +following text after replacing $VCPKG with the directory where you installed +vcpkg above. + +``` +{ + "configurations": [ + { + "name": "x64-Clang-Release", + "generator": "Ninja", + "configurationType": "MinSizeRel", + "buildRoot": "${projectDir}\\out\\build\\${name}", + "installRoot": "${projectDir}\\out\\install\\${name}", + "cmakeCommandArgs": "-DCMAKE_TOOLCHAIN_FILE=$VCPKG/scripts/buildsystems/vcpkg.cmake", + "buildCommandArgs": "-v", + "ctestCommandArgs": "", + "inheritEnvironments": [ "clang_cl_x64" ], + "variables": [ + { + "name": "VCPKG_TARGET_TRIPLET", + "value": "x64-windows", + "type": "STRING" + }, + { + "name": "JPEGXL_ENABLE_TCMALLOC", + "value": "False", + "type": "BOOL" + }, + { + "name": "BUILD_GMOCK", + "value": "True", + "type": "BOOL" + }, + { + "name": "gtest_force_shared_crt", + "value": "True", + "type": "BOOL" + }, + { + "name": "JPEGXL_ENABLE_FUZZERS", + "value": "False", + "type": "BOOL" + }, + { + "name": "JPEGXL_ENABLE_VIEWERS", + "value": "False", + "type": "BOOL" + } + ] + } + ] +} +``` + +The project is now ready for use. To build, simply press F7 (or choose +Build All from the Build menu). This writes binaries to +`out/build/x64-Clang-Release/tools`. The main [README.md](../README.md) explains +how to use the encoder/decoder and benchmark binaries. diff --git a/third-party/libjxl/libjxl/doc/developing_with_crossroad.md b/third-party/libjxl/libjxl/doc/developing_with_crossroad.md new file mode 100644 index 0000000000..e7c2f23f99 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/developing_with_crossroad.md @@ -0,0 +1,116 @@ +# Cross Compiling for Windows with Crossroad + +[Crossroad](https://pypi.org/project/crossroad/) is a tool to set up cross-compilation environments on GNU/Linux distributions. These instructions assume a Debian/Ubuntu system. However, they can likely be adapted to other Linux environments. Since Ubuntu can be run on Windows through WSL, these instruction may be useful for developing directly on Windows. + +## Install Crossroad + +Crossroad requires tools included with `python3-docutils` and `mingw-w64`. They may be installed using: + +```bash +sudo aptitude install python3-docutils mingw-w64 +``` + +The `zstandard` python package is also required, but is not available in the repositories. It may be installed using `pip`. + +```bash +pip3 install zstandard +``` + +After the dependencies are installed, crossroad itself maybe installed with `pip`. + +```bash +pip3 install crossroad +``` + +If there are errors while running crossroad, it may need to be downloaded and installed directly using `setup.py`. Instructions are on the crossroad homepage. + +## Update Debian Alternatives + +Since `libjxl` uses C++ features that require posix threads, the symlinks used by the Debian alternative system need to be updated: + +```bash +sudo update-alternatives --config x86_64-w64-mingw32-g++ +``` + +Select the option that indicates `posix` usage. Repeat for `gcc` and `i686`: + +```bash +sudo update-alternatives --config x86_64-w64-mingw32-gcc +sudo update-alternatives --config i686-w64-mingw32-gcc +sudo update-alternatives --config i686-w64-mingw32-g++ +``` + +## Create a New Crossroad Project + +Crossroad supports the following platforms: + +``` +native Native platform (x86_64 GNU/Linux) +android-x86 Generic Android/Bionic on x86 +android-mips64 Generic Android/Bionic on MIPS64 +android-x86-64 Generic Android/Bionic on x86-64 +w64 Windows 64-bit +w32 Windows 32-bit +android-arm64 Generic Android/Bionic on ARM64 +android-mips Generic Android/Bionic on MIPS +android-arm Generic Android/Bionic on ARM +``` + +To begin cross compiling for Windows, a new project needs to be created: + +```bash +crossroad w64 [project-name] +``` + +## Install Dependencies + +Since the `gimp` development package is required to build the GIMP plugin and also includes most of the packages required by `libjxl`, install it first. + +```bash +crossroad install gimp +``` + +`gtest` and `brotli` are also required. + +```bash +crossroad install gtest brotli +``` + +If any packages are later found to be missing, you may search for them using: + +```bash +crossroad search [...] +``` + +## Build `libjxl` + +Download the source from the libjxl [releases](https://github.com/libjxl/libjxl/releases) page. Alternatively, you may obtain the latest development version with `git`. Run `./deps.sh` to ensure additional third-party dependencies are downloaded. Unfortunately, the script `./ci.sh` does not work with Crossroad, so `cmake` will need to be called directly. + +Create a build directory within the source directory. If you haven't already, start your crossroad project and run `cmake`: + +```bash +mkdir build +cd build +crossroad w64 libjxl +crossroad cmake -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_TESTING=OFF -DBUILD_SHARED_LIBS=OFF \ + -DJPEGXL_ENABLE_BENCHMARK=OFF -DJPEGXL_ENABLE_MANPAGES=OFF \ + -DJPEGXL_ENABLE_PLUGINS=ON -DJPEGXL_FORCE_SYSTEM_BROTLI=ON \ + -DJPEGXL_FORCE_SYSTEM_GTEST=ON .. +``` + +Check the output to see if any dependencies were missed and need to be installed. If all went well, you may now run `cmake` to build `libjxl`: + +```bash +cmake --build . +``` + +## Try out the GIMP Plugin + +The plugin is built statically, so there should be no need to install `dll` files. To try out the plugin: + +1. [Download](https://www.gimp.org/downloads/) and install the stable version of GIMP (currently 2.10.24). + +2. Create a new folder: `C:\Program Files\GIMP 2\lib\gimp\2.0\plug-ins\file-jxl` + +3. Copy `build/plugins/gimp/file-jxl.exe` to the new folder. diff --git a/third-party/libjxl/libjxl/doc/encode_effort.md b/third-party/libjxl/libjxl/doc/encode_effort.md new file mode 100644 index 0000000000..221b2bf649 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/encode_effort.md @@ -0,0 +1,32 @@ +# Encode effort settings + +Various trade-offs between encode speed and compression performance can be selected in libjxl. In `cjxl`, this is done via the `--effort` (`-e`) option. +Higher effort means slower encoding; generally the higher the effort, the more coding tools are used, computationally more expensive heuristics are used, +and more exhaustive search is performed. +Generally efforts range between `1` and `9`, but there is also `e10` you pass the flag `--allow_expert_options` (in combination with "lossless", i.e. `-d 0`). It is considered an expert option because it can be extremely slow. + + +For lossy compression, higher effort results in better visual quality at a given filesize, and also better +encoder consistency, i.e. less image-dependent variation in the actual visual quality that is achieved. This means that for lossy compression, +higher effort does not necessarily mean smaller filesizes for every image — some images may be somewhat lower quality than desired when using +lower effort heuristics, and to improve consistency, higher effort heuristics may decide to use more bytes for them. + +For lossless compression, higher effort should result in smaller filesizes, although this is not guaranteed; +in particular, e2 can be better than e3 for non-photographic images, and e3 can be better than e4 for photographic images. + +The following table describes what the various effort settings do: + +|Effort | Modular (lossless) | VarDCT (lossy) | +|-------|--------------------|----------------| +| e1 | fast-lossless, fixed YCoCg RCT, fixed ClampedGradient predictor, simple palette detection, no MA tree (one context for everything), Huffman, simple rle-only lz77 | | +| e2 | global channel palette, fixed MA tree (context based on Gradient-error), ANS, otherwise same as e1 | | +| e3 | same as e2 but fixed Weighted predictor and fixed MA tree with context based on WP-error | only 8x8, basically XYB jpeg with ANS | +| e4 | try both ClampedGradient and Weighted predictor, learned MA tree, global palette | simple variable blocks heuristics, adaptive quantization, coefficient reordering | +| e5 | e4 + patches, local palette / local channel palette, different local RCTs | e4 + gabor-like transform, chroma from luma | +| e6 | e5 + more RCTs and MA tree properties | e5 + error diffusion, full variable blocks heuristics | +| e7 | e6 + more RCTs and MA tree properties | e6 + patches (including dots) | +| e8 | e7 + more RCTs, MA tree properties and Weighted predictor parameters | e7 + Butteraugli iterations for adaptive quantization | +| e9 | e8 + more RCTs, MA tree properties and Weighted predictor parameters, try all predictors | e8 + more Butteraugli iterations | +| e10 | e9 + previous-channel MA tree properties, different group dimensions, exhaustively try various e9 options | | + +For the entropy coding (context clustering, lz77 search, hybriduint configuration): slower/more exhaustive search as effort goes up. diff --git a/third-party/libjxl/libjxl/doc/format_overview.md b/third-party/libjxl/libjxl/doc/format_overview.md new file mode 100644 index 0000000000..4614df5509 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/format_overview.md @@ -0,0 +1,284 @@ +# JPEG XL Format Overview + +This document gives an overview of the JPEG XL file format and codestream, +its features, and the underlying design rationale. +The aim of this document is to provide general insight into the +format capabilities and design, thus helping developers +better understand how to use the `libjxl` API. + +## Codestream and File Format + +The JPEG XL format is defined in ISO/IEC 18181. This standard consists of +four parts: + +* 18181-1: Core codestream +* 18181-2: File format +* 18181-3: Conformance testing +* 18181-4: Reference implementation + +### Core codestream + +The core codestream contains all the data necessary to decode and display +still image or animation data. This includes basic metadata like image dimensions, +the pixel data itself, colorspace information, orientation, upsampling, etc. + +### File format + +The JPEG XL file format can take two forms: + +* A 'naked' codestream. In this case, only the image/animation data itself is +stored, and no additional metadata can be included. Such a file starts with the +bytes `0xFF0A` (the JPEG marker for "start of JPEG XL codestream"). +* An ISOBMFF-based container. This is a box-based container that includes a +JPEG XL codestream box (`jxlc`), and can optionally include other boxes with +additional information, such as Exif metadata. In this case, the file starts with +the bytes `0x0000000C 4A584C20 0D0A870A`. + +### Conformance testing + +This part of the standard defines precision bounds and test cases for conforming +decoders, to verify that they implement all coding tools correctly and accurately. + +### Reference implementation + +The `libjxl` software is the reference implementation of JPEG XL. + + +## Metadata versus Image Data + +JPEG XL makes a clear separation between metadata and image data. +Everything that is needed to correctly display an image is +considered to be image data, and is part of the core codestream. This includes +elements that have traditionally been considered 'metadata', such as ICC profiles +and Exif orientation. The goal is to reduce the ambiguity and potential for +incorrect implementations that can be caused by having a 'black box' codestream +that only contains numerical pixel data, requiring applications to figure out how +to correctly interpret the data (i.e. apply color transforms, upsampling, +orientation, blending, cropping, etc.). By including this functionality in the +codestream itself, the decoder can provide output in a normalized way +(e.g. in RGBA, orientation already applied, frames blended and coalesced), +simplifying things and making it less error-prone for applications. + +The remaining metadata, e.g. Exif or XMP, can be stored in the container format, +but it does not influence image rendering. In the case of Exif orientation, +this field has to be ignored by applications, since the orientation in the +codestream always takes precedence (and will already have been applied +transparently by the decoder). This means that stripping metadata can be done +without affecting the displayed image. + + +## Codestream Features + +### Color Management + +In JPEG XL, images always have a fully defined colorspace, i.e. it is always +unambiguous how to interpret the pixel values. There are two options: + +* Pixel data is in a specified (non-XYB) colorspace, and the decoder will produce +a pixel buffer in this colorspace plus an ICC profile that describes that +colorspace. Mathematically lossless encoding can only use this option. +* Pixel data is in the XYB colorspace, which is an absolute colorspace. +In this case, the decoder can produce a pixel buffer directly in a desired +display space like sRGB, Display-P3 or Rec.2100 PQ. + +The image header always contains a colorspace; however, its meaning depends on +which of the above two options were used: + +* In the first case (non-XYB), the signaled colorspace defines the +interpretation of the pixel data. +* In the second case (XYB), the signaled colorspace is merely a _suggestion_ +of a target colorspace to represent the image in, i.e. it is the colorspace +the original image was in, that has a sufficiently wide gamut and a +suitable transfer curve to represent the image data with high fidelity +using a limited bit depth representation. + +Colorspaces can be signaled in two ways in JPEG XL: + +* CICP-style Enum values: This is a very compact representation that +covers most or all of the common colorspaces. The decoder can convert +XYB to any of these colorspaces without requiring an external color management +library. +* ICC profiles: Arbitrary ICC profiles can also be used, including +CMYK ones. The ICC profile data gets compressed. In this case, external +color management software (e.g. lcms2 or skcms) has to be used for color +conversions. + +### Frames + +A JPEG XL codestream contains one or more frames. In the case of animation, +these frames have a duration and can be looped (infinitely or a number of times). +Zero-duration frames are possible and represent different layers of the image. + +Frames can have a blendmode (Replace, Add, Alpha-blend, Multiply, etc.) and +they can use any previous frame as a base. +They can be smaller than the image canvas, in which case the pixels outside the +crop are copied from the base frame. They can be positioned at an arbitrary +offset from the image canvas; this offset can also be negative and frames can +also be larger than the image canvas, in which case parts of the frame will +be invisible and only the intersection with the image canvas will be shown. + +By default, the decoder will blend and coalesce frames, producing only a single +output frame when there are subsequent zero-duration frames, and all output frames +are of the same size (the size of the image canvas) and have either no duration +(in case of a still image) or a non-zero duration (in case of animation). + +### Pixel Data + +Every frame contains pixel data encoded in one of two modes: + +* VarDCT mode: In this mode, variable-sized DCT transforms are applied +and the image data is encoded in the form of DCT coefficients. This mode is +always lossy, but it can also be used to losslessly represent an existing +(already lossy) JPEG image, in which case only the DCT8x8 is used. +* Modular mode: In this mode, only integer arithmetic is used, which +enables lossless compression. However, this mode can also be used for lossy +compression. Multiple transformations can be used to improve compression or to +obtain other desirable effects: reversible color transforms (RCTs), +(delta) palette transforms, and a modified non-linear Haar transform +called Squeeze, which facilitates (but does not require) lossy compression +and enables progressive decoding. + +Internally, the VarDCT mode uses Modular sub-bitstreams to encode +various auxiliary images, such as the "LF image" (a 1:8 downscaled version +of the image that contains the DC coefficients of DCT8x8 and low-frequency +coefficients of the larger DCT transforms), extra channels besides the +three color channels (e.g. alpha), and weights for adaptive quantization. + +In addition, both modes can separately encode additional 'image features' that +are rendered on top of the decoded image: + +* Patches: rectangles from a previously decoded frame (which can be a +'hidden' frame that is not displayed but only stored to be referenced later) +can be blended using one of the blendmodes on top of the current frame. +This allows the encoder to identify repeating patterns (such as letters of +text) and encode them only once, using patches to insert the pattern in +multiple spots. These patterns are encoded in a previous frame, making +it possible to add Modular-encoded pixels to a VarDCT-encoded frame or +vice versa. +* Splines: centripetal Catmull-Rom splines can be encoded, with a color +and a thickness that can vary along the arclength of the curve. +Although the current encoder does not use this bitstream feature yet, we +anticipate that it can be useful to complement DCT-encoded data, since +thin lines are hard to represent faithfully using the DCT. +* Noise: luma-modulated synthetic noise can be added to an image, e.g. +to emulate photon noise, in a way that avoids poor compression due to +high frequency DCT coefficients. + +Finally, both modes can also optionally apply two filtering methods to +the decoded image, which both have the goal of reducing block artifacts +and ringing: + +* Gabor-like transform ('Gaborish'): a small (3x3) blur that gets +applied across block and group boundaries, reducing blockiness. The +encoder applies the inverse sharpening transform before encoding, +effectively getting the benefits of lapped transforms without the +disadvantages. +* Edge-preserving filter ('EPF'): similar to a bilateral filter, +this smoothing filter avoids blurring edges while reducing ringing. +The strength of this filter is signaled and can locally be adapted. + +### Groups + +In both modes (Modular and VarDCT), the frame data is signaled as +a sequence of groups. These groups can be decoded independently, +and the frame header contains a table of contents (TOC) with bitstream +offsets for the start of each group. This enables parallel decoding, +and also partial decoding of a region of interest or a progressive preview. + +In VarDCT mode, all groups have dimensions 256x256 (or smaller at the +right and bottom borders). First the LF image is encoded, also in +256x256 groups (corresponding to 2048x2048 pixels, since this data +corresponds to the 1:8 image). This means there is always a basic +progressive preview available in VarDCT mode. +Optionally, the LF image can be encoded separately in a (hidden) +LF frame, which can itself recursively be encoded in VarDCT mode +and have its own LF frame. This makes it possible to represent huge +images while still having an overall preview that can be efficiently +decoded. +Then the HF groups are encoded, corresponding to the remaining AC +coefficients. The HF groups can be encoded in multiple passes for +more progressive refinement steps; the coefficients of all passes +are added. Unlike JPEG progressive scan scripts, JPEG XL allows +signaling any amount of detail in any part of the image in any pass. + +In Modular mode, groups can have dimensions 128x128, 256x256, 512x512 +or 1024x1024. If the Squeeze transform was used, the data will +be split in three parts: the Global groups (the top of the Laplacian +pyramid that fits in a single group), the LF groups (the middle part +of the Laplacian pyramid that corresponds to the data needed to +reconstruct the 1:8 image) and the HF groups (the base of the Laplacian +pyramid), where the HF groups are again possibly encoded in multiple +passes (up to three: one for the 1:4 image, one for the 1:2 image, +and one for the 1:1 image). + +In case of a VarDCT image with extra channels (e.g. alpha), the +VarDCT groups and the Modular groups are interleaved in order to +allow progressive previews of all the channels. + +The default group order is to encode the LF and HF groups in +scanline order (top to bottom, left to right), but this order +can be permuted arbitrarily. This allows, for example, a center-first +ordering or a saliency-based ordering, causing the bitstream +to prioritize progressive refinements in a different way. + + +## File Format Features + +Besides the image data itself (stored in the `jxlc` codestream box), +the optional container format allows storing additional information. + +## Metadata + +Three types of metadata can be included in a JPEG XL container: + +* Exif (`Exif`) +* XMP (`xml `) +* JUMBF (`jumb`) + +This metadata can contain information about the image, such as copyright +notices, GPS coordinates, camera settings, etc. +If it contains rendering-impacting information (such as Exif orientation), +the information in the codestream takes precedence. + +## Compressed Metadata + +The container allows the above metadata to be stored either uncompressed +(e.g. plaintext XML in the case of XMP) or by Brotli-compression. +In the latter case, the box type is `brob` (Brotli-compressed Box) and +the first four bytes of the box contents define the actual box type +(e.g. `xml `) it represents. + +## JPEG Bitstream Reconstruction Data + +JPEG XL can losslessly recompress existing JPEG files. +The general design philosophy still applies in this case: +all the image data is stored in the codestream box, including the DCT +coefficients of the original JPEG image and possibly an ICC profile or +Exif orientation. + +In order to allow bit-identical reconstruction of the original JPEG file +(not just the image but the actual file), additional information is needed, +since the same image data can be encoded in multiple ways as a JPEG file. +The `jbrd` box (JPEG Bitstream Reconstruction Data) contains this information. +Typically it is relatively small. Using the image data from the codestream, +the JPEG bitstream reconstruction data, and possibly other metadata boxes +that were present in the JPEG file (Exif/XMP/JUMBF), the exact original +JPEG file can be reconstructed. + +This box is not needed to display a recompressed JPEG image; it is only +needed to reconstruct the original JPEG file. + +## Frame Index + +The container can optionally store a `jxli` box, which contains an index +of offsets to keyframes of a JPEG XL animation. It is not needed to display +the animation, but it does facilitate efficient seeking. + +## Partial Codestream + +The codestream can optionally be split into multiple `jxlp` boxes; +conceptually, this is equivalent to a single `jxlc` box that contains the +concatenation of all partial codestream boxes. +This makes it possible to create a file that starts with +the data needed for a progressive preview of the image, followed by +metadata, followed by the remaining image data. diff --git a/third-party/libjxl/libjxl/doc/fuzzing.md b/third-party/libjxl/libjxl/doc/fuzzing.md new file mode 100644 index 0000000000..af926596f2 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/fuzzing.md @@ -0,0 +1,184 @@ +# Fuzzing + +Fuzzing is a technique to find potential bugs by providing randomly generated +invalid inputs. To detect potential bugs such as programming errors we use +fuzzing in combination with ASan (Address Sanitizer), MSan (Memory Sanitizer), +UBSan (Undefined Behavior Sanitizer) and asserts in the code. An invalid input +will likely produce a decoding error (some API function returning error), which +is absolutely not a problem, but what it should not do is access memory out of +bounds, use uninitialized memory or hit a false assert condition. + +## Automated Fuzzing with oss-fuzz + +libjxl fuzzing is integrated into [oss-fuzz](https://github.com/google/oss-fuzz) +as the project `libjxl`. oss-fuzz regularly runs the fuzzers on the `main` +branch and reports bugs into their bug tracker which remains private until the +bugs are fixed in main. + +## Fuzzer targets + +There are several fuzzer executable targets defined in the `tools/` directory +to fuzz different parts of the code. The main one is `djxl_fuzzer`, which uses +the public C decoder API to attempt to decode an image. The fuzzer input is not +directly the .jxl file, the last few bytes of the fuzzer input are used to +decide *how* will the API be used (if preview is requested, the pixel format +requested, if the .jxl input data is provided altogether, etc) and the rest of +the fuzzer input is provided as the .jxl file to the decoder. Some bugs might +reproduce only if the .jxl input is decoded in certain way. + +The remaining fuzzer targets execute a specific portion the codec that might be +easier to fuzz independently from the whole codec. + +## Reproducing fuzzer bugs + +A fuzzer target, like `djxl_fuzzer` accepts as a parameter one or more files +that will be used as inputs. This runs the fuzzer program in test-only mode +where no new inputs are generated and only the provided files are tested. This +is the easiest way to reproduce a bug found by the fuzzer using the generated +test case from the bug report. + +oss-fuzz uses a specific compiler version and flags, and it is built using +Docker. Different compiler versions will have different support for detecting +certain actions as errors, so we want to reproduce the build from oss-fuzz as +close as possible. To reproduce the build as generated by oss-fuzz there are a +few helper commands in `ci.sh` as explained below. + +### Generate the gcr.io/oss-fuzz/libjxl image + +First you need the ossfuzz libjxl builder image. This is the base oss-fuzz +builder image with a few dependencies installed. To generate it you need to +check out the oss-fuzz project and build it: + +```bash +git clone https://github.com/google/oss-fuzz.git ~/oss-fuzz +cd ~/oss-fuzz +sudo infra/helper.py build_image libjxl +``` + +This will create the `gcr.io/oss-fuzz/libjxl` docker image. You can check if it +was created verifying that it is listed in the output of the `sudo docker image +ls` command. + +### Build the fuzzer targets with oss-fuzz + +To build the fuzzer targets from the current libjxl source checkout, use the +`./ci.sh ossfuzz_msan` command for MSan, `./ci.sh ossfuzz_asan` command for ASan +or `./ci.sh ossfuzz_ubsan` command for UBSan. All the `JXL_ASSERT` and +`JXL_DASSERT` calls are enabled in all the three modes. These ci.sh helpers will +reproduce the oss-fuzz docker call to build libjxl mounting the current source +directory into the Docker container. Ideally you will run this command in a +different build directory separated from your regular builds. + +For example, for MSan builds run: + +```bash +BUILD_DIR=build-fuzzmsan ./ci.sh ossfuzz_msan +``` + +After this, the fuzzer program will be generated in the build directory like +for other build modes: `build-fuzzmsan/tools/djxl_fuzzer`. + +### Iterating changes with oss-fuzz builds + +After modifying the source code to fix the fuzzer-found bug, or to include more +debug information, you can rebuild only a specific fuzzer target to save on +rebuilding time and immediately run the test case again. For example, for +rebuilding and testing only `djxl_fuzzer` in MSan mode we can run: + +```bash +BUILD_DIR=build-fuzzmsan ./ci.sh ossfuzz_msan djxl_fuzzer && build-fuzzmsan/tools/djxl_fuzzer path/to/testcase.bin +``` + +When MSan and ASan fuzzers fail they will print a stack trace at the point where +the error occurred, and some related information. To make these these stack +traces useful we need to convert the addresses to function names and source file +names and lines, which is done with the "symbolizer". For UBSan to print a stack +trace we need to set the `UBSAN_OPTIONS` environment variables when running the +fuzzer. + +Set the following environment variables when testing the fuzzer binaries. Here +`clang` should match the compiler version used by the container, you can pass a +different compiler version in the following example by first installing the +clang package for that version outside the container and using `clang-NN` +(for example `clang-11`) instead of `clang` in the following commands: + +```bash +symbolizer=$($(realpath $(which clang)) -print-prog-name=llvm-symbolizer) +export MSAN_SYMBOLIZER_PATH="${symbolizer}" +export UBSAN_SYMBOLIZER_PATH="${symbolizer}" +export ASAN_SYMBOLIZER_PATH="${symbolizer}" +export ASAN_OPTIONS=detect_leaks=1 +export UBSAN_OPTIONS=print_stacktrace=1 +``` + +Note: The symbolizer binary must be a program called `llvm-symbolizer`, any +other file name will fail. There are normally symlinks already installed with +the right name which the `-print-prog-name` would print. + +## Running the fuzzers locally + +Running the fuzzer targets in fuzzing mode can be achieved by running them with +no parameters, or better with a parameter with the path to a *directory* +containing a seed of files to use as a starting point. Note that passing a +directory is considered a corpus to use for fuzzing while passing a file is +considered an input to evaluate. Multi-process fuzzing is also supported. For +details about all the fuzzing options run: + +```bash +build-fuzzmsan/tools/djxl_fuzzer -help=1 +``` + +## Writing fuzzer-friendly code + +Fuzzing on itself can't find programming bugs unless an input makes the program +perform an invalid operation (read/write out of bounds, perform an undefined +behavior operation, etc). You can help the fuzzer find invalid situations by +adding asserts: + + * `JXL_ASSERT()` is enabled in Release mode by default. It can be disabled + with `-DJXL_ENABLE_ASSERT=0` but the intention is that it will run for all + the users in released code. If performance of the check is not an issue (like + checks done once per image, once per channel, once per group, etc) a + JXL_ASSERT is appropriate. A failed assert is preferable to an out of bounds + write. + + * `JXL_DASSERT()` is only enabled in Debug builds, which includes all the ASan, + MSan and UBSan builds. Performance of these checks is not an issue if kept + within reasonable limits (automated msan/asan test should finish withing 1 + hour for example). Fuzzing is more effective when the given input runs + faster, so keep that in mind when adding a complex DASSERT that runs multiple + times per output pixel. + + * For MSan builds it is also possible to specify that certain values must be + initialized. This is automatic for values that are used to make decisions + (like when used in an `if` statement or in the ternary operator condition) + but those checks can be made explicit for image data using the + `JXL_CHECK_IMAGE_INITIALIZED(image, rect)` macro. This helps document and + check (only in MSan builds) that a given portion of the image is expected to + be initialized, allowing to catch errors earlier in the process. + +## Dealing with use-of-uninitialized memory + +In MSan builds it is considered an error to *use* uninitialized memory. Using +the memory normally requires something like a decision / branch based on the +uninitialized value, just running `memcpy()` or simple arithmetic over +uninitialized memory is not a problem. Notably, computing `DemoteTo()`, +`NearestInt()` or similar expressions that create a branch based on the value of +the uninitialized memory will trigger an MSan error. + +In libjxl we often run vectorized operations over a series of values, rounding +up to the next multiple of a vector size, thus operating over uninitialized +values past the end of the requested region. These values are part of the image +padding but are not initialized. This behavior would not create an MSan error +unless the processing includes operations like `NearestInt()`. For such cases +the preferred solution is to use `msan::UnpoisonMemory` over the portion of +memory of the last SIMD vector before processing, and then running +`msan::PoisonMemory` over the corresponding value in the output side. A note +including why this is safe to do must be added, for example if the processing +doesn't involve any cross-lane computation. + +Initializing padding memory in MSan builds is discouraged because it may hide +bugs in functions that weren't supposed to read from the padding. Initializing +padding memory in all builds, including Release builds, would mitigate the +MSan potential security issue but it would hide the logic bug for a longer time +and potentially incur in a performance hit. diff --git a/third-party/libjxl/libjxl/doc/jxl.svg b/third-party/libjxl/libjxl/doc/jxl.svg new file mode 100644 index 0000000000..a80778b0b7 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/jxl.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/third-party/libjxl/libjxl/doc/man/cjxl.txt b/third-party/libjxl/libjxl/doc/man/cjxl.txt new file mode 100644 index 0000000000..261742a689 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/man/cjxl.txt @@ -0,0 +1,102 @@ +cjxl(1) +======= +:doctype: manpage + +Name +---- + +cjxl - compress images to JPEG XL + +Synopsis +-------- + +*cjxl* ['options'...] 'input' ['output.jxl'] + +Description +----------- + +`cjxl` compresses an image or animation to the JPEG XL format. It is intended to +spare users the trouble of determining a set of optimal parameters for each +individual image. Instead, for a given target quality, it should provide +consistent visual results across various kinds of images. The defaults have been +chosen to be sensible, so that the following commands should give satisfactory +results in most cases: + +---- +cjxl input.png output.jxl +cjxl input.jpg output.jxl +cjxl input.gif output.jxl +---- + +Options +------- + +-h:: +--help:: + Displays the options that `cjxl` supports. On its own, it will only show + basic options. It can be combined with `-v` or `-v -v` to show increasingly + advanced options as well. + +-v:: +--verbose:: + Increases verbosity. Can be repeated to increase it further, and also + applies to `--help`. + +-d 'distance':: +--distance='distance':: + The preferred way to specify quality. It is specified in multiples of a + just-noticeable difference. That is, `-d 0` is mathematically lossless, + `-d 1` should be visually lossless, and higher distances yield denser and + denser files with lower and lower fidelity. Lossy sources such as JPEG and + GIF files are compressed losslessly by default, and in the case of JPEG + files specifically, the original JPEG can then be reconstructed bit-for-bit. + For lossless sources, `-d 1` is the default. + +-q 'quality':: +--quality='quality':: + Alternative way to indicate the desired quality. 100 is lossless and lower + values yield smaller files. There is no lower bound to this quality + parameter, but positive values should approximately match the quality + setting of libjpeg. + +-e 'effort':: +--effort='effort':: + Controls the amount of effort that goes into producing an ``optimal'' file + in terms of quality/size. That is to say, all other parameters being equal, + a higher effort should yield a file that is at least as dense and possibly + denser, and with at least as high and possibly higher quality. ++ +Recognized effort settings, from fastest to slowest, are: ++ +- 1 or ``lightning'' +- 2 or ``thunder'' +- 3 or ``falcon'' +- 4 or ``cheetah'' +- 5 or ``hare'' +- 6 or ``wombat'' +- 7 or ``squirrel'' (default) +- 8 or ``kitten'' +- 9 or ``tortoise'' + +Examples +-------- + +---- +# Compress a PNG file to a high-quality JPEG XL version. +$ cjxl input.png output.jxl + +# Compress it at a slightly lower quality, appropriate for web use. +$ cjxl -d 2 input.png output.jxl + +# Compress it losslessly. These are equivalent. +$ cjxl -d 0 input.png lossless.jxl +$ cjxl -q 100 input.png lossless.jxl + +# Compress a JPEG file losslessly. +$ cjxl input.jpeg lossless-jpeg.jxl +---- + +See also +-------- + +*djxl*(1) diff --git a/third-party/libjxl/libjxl/doc/man/djxl.txt b/third-party/libjxl/libjxl/doc/man/djxl.txt new file mode 100644 index 0000000000..bd57b4420e --- /dev/null +++ b/third-party/libjxl/libjxl/doc/man/djxl.txt @@ -0,0 +1,61 @@ +djxl(1) +======= +:doctype: manpage + +Name +---- + +djxl - decompress JPEG XL images + +Synopsis +-------- + +*djxl* ['options'...] 'input.jxl' ['output'] + +Description +----------- + +`djxl` decompresses a JPEG XL image or animation. The output format is determined +by the extension of the output file, which can be `.png`, `.jpg`, `.ppm`, `.pfm`. +If the JPEG XL input file contains an animation, multiple output files will be +produced, with names of the form "'output'-*framenumber*.ext". + + +Options +------- + +-h:: +--help:: + Displays the options that `djxl` supports. + +-j:: +--pixels_to_jpeg:: + By default, if the input JPEG XL contains a recompressed JPEG file, + djxl reconstructs the exact original JPEG file if the output file has the + `.jpg` (or `.jpeg`) filename extension. + This flag causes the decoder to instead decode the image to pixels and + encode a new (lossy) JPEG in this case. + + +-q 'quality':: +--jpeg_quality='quality':: + When decoding to `.jpg`, use this output quality. This option implicitly + enables the --pixels_to_jpeg option. + + +Examples +-------- + +---- +# Decompress a JPEG XL file to PNG +$ djxl input.jxl output.png + +# Reconstruct a losslessly-recompressed JPEG file +$ djxl lossless-jpeg.jxl reconstructed.jpeg +---- + + +See also +-------- + +*cjxl*(1) diff --git a/third-party/libjxl/libjxl/doc/release.md b/third-party/libjxl/libjxl/doc/release.md new file mode 100644 index 0000000000..5fb042eb7a --- /dev/null +++ b/third-party/libjxl/libjxl/doc/release.md @@ -0,0 +1,314 @@ +# libjxl release process + +This guide documents the release process for the libjxl project. + +libjxl follows the [semantic versioning](https://semver.org/spec/v2.0.0.html) +specification for released versions. Releases are distributed as tags in the git +repository with the semantic version prefixed by the letter "v". For example, +release version "0.3.7" will have a git tag "v0.3.7". + +The public API is explicitly defined as C headers in the `lib/include` +directory, normally installed in your include path. All other headers are +internal API and are not covered by the versioning rules. + +## Development and release workflow + +New code development is performed on the `main` branch of the git repository. +Pre-submit checks enforce minimum build and test requirements for new patches +that balance impact and test latency, but not all checks are performed before +pull requests are merged. Several slower checks only run *after* the code has +been merged to `main`, resulting in some errors being detected hours after the +code is merged or even days after in the case of fuzzer-detected bugs. + +Release tags are cut from *release branches*. Each MAJOR.MINOR version has its +own release branch, for example releases `0.7.0`, `0.7.1`, `0.7.2`, ... would +have tags `v0.7.0`, `v0.7.1`, `v0.7.2`, ... on commits from the `v0.7.x` branch. +`v0.7.x` is a branch name, not a tag name, and doesn't represent a released +version since semantic versioning requires that the PATCH is a non-negative +number. Released tags don't each one have their own release branch, all releases +from the same MAJOR.MINOR version will share the same branch. The first commit +after the branch-off points between the main branch and the release branch +should be tagged with the suffix `-snapshot` and the name of the next +MAJOR.MINOR version, in order to get meaningful output for `git describe`. + +The main purpose of the release branch is to stabilize the code before a +release. This involves including fixes to existing bugs but **not** including +new features. New features often come with new bugs which take time to fix, so +having a release branch allows us to cherry-pick *bug fixes* from the `main` +branch into the release branch without including the new *features* from `main`. +For this reason it is important to make small commits in `main` and separate bug +fixes from new features. + +After the initial minor release (`MAJOR.MINOR.PATCH`, for example `0.5.0`) the +release branch is used to continue to cherry-pick fixes to be included in a +patch release, for example a version `0.5.1` release. Patch fixes are only meant +to fix security bugs or other critical bugs that can't wait until the next major +or minor release. + +Release branches *may* continue to be maintained even after the next minor or +major version has been released to support users that can't update to a newer +minor release. In that case, the same process applies to all the maintained +release branches. + +A release branch with specific cherry-picks from `main` means that the release +code is actually a version of the code that never existed in the `main` branch, +so it needs to be tested independently. Pre-submit and post-submit tests run on +release branches (branches matching `v*.*.x`) but extra manual checks should be +performed before a release, specially if multiple bug fixes interact with each +other. Take this into account when selecting which commits to include in a +release. The objective is to have a stable version that can be used without +problems for months. Having the latest improvements at the time the release tag +is created is a non-goal. + +## Creating a release branch + +A new release branch is needed before creating a new major or minor release, +that is, a new release where the MAJOR or MINOR numbers are increased. Patch +releases, where only the PATCH number is increased, reuse the branch from the +previous release of the same MAJOR and MINOR numbers. + +The following instructions assume that you followed the recommended [libjxl git +setup](developing_in_github.md) where `origin` points to the upstream +libjxl/libjxl project, otherwise use the name of your upstream remote repository +instead of `origin`. + +The release branch is normally created from the latest work in `main` at the +time the branch is created, but it is possible to create the branch from an +older commit if the current `main` is particularly unstable or includes commits +that were not intended to be included in the release. The following example +creates the branch `v0.5.x` from the latest commit in main (`origin/main`), if a +different commit is to be used then replace `origin/main` with the SHA of that +commit. Change the `v0.5.x` branch name to the one you are creating. + +```bash +git fetch origin main +git push git@github.com:libjxl/libjxl.git origin/main:refs/heads/v0.5.x +``` + +Here we use the SSH URL explicitly since you are pushing to the `libjxl/libjxl` +project directly to a branch there. If you followed the guide `origin` will have +the HTTPS URL which wouldn't normally let you push since you wouldn't be +authenticated. The `v*.*.x` branches are [GitHub protected +branches](https://docs.github.com/en/github/administering-a-repository/defining-the-mergeability-of-pull-requests/about-protected-branches) +in our repository, however you can push to a protected branch when *creating* it +but you can't directly push to it after it is created. To include more changes +in the release branch see the "Cherry-picking fixes to a release" section below. + +## Creating a merge label + +We use GitHub labels in Pull Requests to keep track of the changes that should +be merged into a given release branch. For this purpose create a new label for +each new MAJOR.MINOR release branch called `merge-MAJOR.MINOR`, for example, +`merge-0.5`. + +In the [edit labels](https://github.com/libjxl/libjxl/issues/labels) page, click +on "New label" and create the label. Pick your favorite color. + +Labels are a GitHub-only concept and are not represented in git. You can add the +label to a Pull Request even after it was merged, whenever it is decided that +the Pull Request should be included in the given release branch. Adding the +label doesn't automatically merge it to the release branch. + +## Update the versioning number + +The version number (as returned by `JxlDecoderVersion`) in the source code in +`main` must match the semantic versioning of a release. After the release +branch is created the code in `main` will only be included in the next major +or minor release. Right after a release branch update the version targeting the +next release. Artifacts from `main` should include the new (unreleased) version, +so it is important to update it. For example, after the `v0.5.x` branch is +created from main, you should update the version on `main` to `0.6.0`. + +To help update it, run this helper command (in a Debian-based system): + +```bash +./ci.sh bump_version 0.6.0 +``` + +This will update the version in the following files: + + * `lib/CMakeLists.txt` + * `lib/lib.gni`, automatically updated with + `tools/scripts/build_cleaner.py --update`. + * `debian/changelog` to create the Debian package release with the new version. + Debian changelog shouldn't repeat the library changelog, instead it should + include changes to the packaging scripts. + * `.github/workflows/conformance.yml` + +If there were incompatible API/ABI changes, make sure to also adapt the +corresponding section in +[CMakeLists.txt](https://github.com/libjxl/libjxl/blob/main/lib/CMakeLists.txt#L12). + +## Cherry-pick fixes to a release + +After a Pull Request that should be included in a release branch has been merged +to `main` it can be cherry-picked to the release branch. Before cherry-picking a +change to a release branch it is important to check that it doesn't introduce +more problems, in particular it should run for some time in `main` to make sure +post-submit tests and the fuzzers run on it. Waiting for a day is a good idea. + +Most of the testing is done on the `main` branch, so be careful with what +commits are cherry-picked to a branch. Refactoring code is often not a good +candidate to cherry-pick. + +To cherry-pick a single commit to a release branch (in this example to `v0.5.x`) +you can run: + +```bash +git fetch origin +git checkout origin/v0.5.x -b merge_to_release +git cherry-pick -x SHA_OF_MAIN_COMMIT +# -x will annotate the cherry-pick with the original SHA_OF_MAIN_COMMIT value. +# If not already mentioned in the original commit, add the original PR number to +# the commit, for example add "(cherry picked from PR #NNNN)". +git commit --amend +``` + +The `SHA_OF_MAIN_COMMIT` is the hash of the commit as it landed in main. Use +`git log origin/main` to list the recent main commits and their hashes. + +Making sure that the commit message on the cherry-picked commit contains a +reference to the original pull request (like `#NNNN`) is important. It creates +an automatic comment in the original pull request notifying that it was +mentioned in another commit, helping keep track of the merged pull requests. If +the original commit was merged with the "Squash and merge" policy it will +automatically contain the pull request number on the first line, if this is not +the case you can amend the commit message of the cherry-pick to include a +reference. + +Multiple commits can be cherry-picked and tested at once to save time. Continue +running `git cherry-pick` and `git commit --amend` multiple times for all the +commits you need to cherry-pick, ideally in the same order they were merged on +the `main` branch. At the end you will have a local branch with multiple commits +on top of the release branch. + +To update the version number, for example from v0.8.0 to v0.8.1 run this helper +command (in a Debian-based system): + +```bash +./ci.sh bump_version 0.8.1 +``` + +as described above and commit the changes. + +Finally, upload your changes to *your fork* like normal, except that when +creating a pull request select the desired release branch as a target: + +```bash +git push myfork merge_to_release +``` + +If you used the [guide](developing_in_github.md) `myfork` would be `origin` in +that example. Click on the URL displayed, which will be something like + + `https://github.com/mygithubusername/libjxl/pull/new/merge_to_release` + +In the "Open a pull request" page, change the drop-down base branch from +"base: main" (the default) to the release branch you are targeting. + +The pull request approval and pre-submit rules apply as with normal pull +requests to the `main` branch. + +**Important:** When merging multiple cherry-picks use "Rebase and merge" policy, +not the squash one since otherwise you would discard the individual commit +message references from the git history in the release branch. + +## Publishing a release + +Once a release tag is created it must not be modified, so you need to prepare +the changes before creating the release. Make sure you checked the following: + + * The semantic version number in the release branch (see `lib/CMakeLists.txt`) + matches the number you intend to release, all three MAJOR, MINOR and PATCH + should match. Otherwise send a pull request to the release branch to + update them. + + * The GitHub Actions checks pass on the release branch. Look for the green + tick next to the last commit on the release branch. This should be visible + on the branch page, for example: https://github.com/libjxl/libjxl/tree/v0.5.x + + * There no open fuzzer-found bugs for the release branch. The most effective + way is to [run the fuzzer](fuzzing.md) on the release branch for a while. You + can seed the fuzzer with corpus generated by oss-fuzz by [downloading + it](https://google.github.io/oss-fuzz/advanced-topics/corpora/#downloading-the-corpus), + for example `djxl_fuzzer` with libFuzzer will use: + gs://libjxl-corpus.clusterfuzz-external.appspot.com/libFuzzer/libjxl_djxl_fuzzer + + * Manually check that images encode/decode ok. + + * Manually check that downstream projects compile with our code. Sometimes + bugs on build scripts are only detected when other projects try to use our + library. For example, test compiling + [imagemagick](https://github.com/ImageMagick/ImageMagick) and Chrome. + +A [GitHub +"release"](https://docs.github.com/en/github/administering-a-repository/releasing-projects-on-github/about-releases) +consists of two different concepts: + + * a git "tag": this is a name (`v` plus the semantic version number) with a + commit hash associated, defined in the git repository. Most external projects + will use git tags or HTTP URLs to these tags to fetch the code. + + * a GitHub "release": this is a GitHub-only concept and is not represented in + git other than by having a git tag associated with the release. A GitHub + release has a given source code commit SHA associated (through the tag) but + it *also* contains release notes and optional binary files attached to the + release. + +Releases from the older GitLab repository only have a git tag in GitHub, while +newer releases have both a git tag and a release entry in GitHub. + +To publish a release open the [New Release +page](https://github.com/libjxl/libjxl/releases/new) and follow these +instructions: + + * Set the "Tag version" as "v" plus the semantic version number. + + * Select the "Target" as your release branch. For example for a "v0.7.1" + release tag you should use the "v0.7.x" branch. + + * Use the version number as the release title. + + * Copy-paste the relevant section of the [CHANGELOG.md](../CHANGELOG.md) to the + release notes into the release notes. Add any other information pertaining + the release itself that are not included in the CHANGELOG.md, although prefer + to include those in the CHANGELOG.md file. You can switch to the Preview tab + to see the results. + + * Finally click "Publish release" and go celebrate with the team. 🎉 + + * Make sure to manually push the commit of the release also to https://gitlab.com/wg1/jpeg-xl. + +### How to build downstream projects + +```bash +docker run -it debian:bullseye /bin/bash + +apt update +apt install -y clang cmake git libbrotli-dev nasm pkg-config ninja-build +export CC=clang +export CXX=clang++ + +git clone --recurse-submodules --depth 1 -b v0.7.x \ + https://github.com/libjxl/libjxl.git +git clone --recurse-submodules --depth 1 \ + https://github.com/ImageMagick/ImageMagick.git +git clone --recurse-submodules --depth 1 \ + https://github.com/FFmpeg/FFmpeg.git + +cd ~/libjxl +git checkout v0.7.x +cmake -B build -G Ninja . +cmake --build build +cmake --install build + +cd ~/ImageMagick +./configure --with-jxl=yes +# check for "JPEG XL --with-jxl=yes yes" +make -j 80 + +cd ~/FFmpeg +./configure --enable-libjxl +# check for libjxl decoder/encoder support +make -j 80 +``` diff --git a/third-party/libjxl/libjxl/doc/software_support.md b/third-party/libjxl/libjxl/doc/software_support.md new file mode 100644 index 0000000000..62e2a27c0a --- /dev/null +++ b/third-party/libjxl/libjxl/doc/software_support.md @@ -0,0 +1,75 @@ +# JPEG XL software support + +This document attempts to keep track of software that is using libjxl to support JPEG XL. +This list serves several purposes: + +- thank/acknowledge other projects for integrating jxl support +- point end-users to software that can read/write jxl +- keep track of the adoption status of jxl +- in case of a (security) bug in libjxl, it's easier to see who might be affected and check if they are updated (in case they use static linking) + +Please add missing software to this list. + +## Browsers + +- Chromium: behind a flag from version 91 to 109, [tracking bug](https://bugs.chromium.org/p/chromium/issues/detail?id=1178058) +- Firefox: behind a flag since version 90, [tracking bug](https://bugzilla.mozilla.org/show_bug.cgi?id=1539075) +- Safari: supported since version 17 beta [release notes](https://developer.apple.com/documentation/safari-release-notes/safari-17-release-notes), [tracking bug](https://bugs.webkit.org/show_bug.cgi?id=208235) +- Edge: behind a flag since version 91, start with `.\msedge.exe --enable-features=JXL` +- Opera: behind a flag since version 77. +- Basilisk: supported since version v2023.01.07, [release notes](https://www.basilisk-browser.org/releasenotes.shtml) +- Pale Moon: supported since version 31.4.0, [release notes](https://www.palemoon.org/releasenotes-archived.shtml#v31.4.0) +- Waterfox: [enabled by default](https://github.com/WaterfoxCo/Waterfox/pull/2936) + +For all browsers and to track browsers progress see [Can I Use](https://caniuse.com/jpegxl). + +## Image libraries + +- [ImageMagick](https://imagemagick.org/): supported since 7.0.10-54 +- [libvips](https://libvips.github.io/libvips/): supported since 8.11 +- [Imlib2](https://github.com/alistair7/imlib2-jxl) +- [FFmpeg](https://github.com/FFmpeg/FFmpeg/search?q=jpeg-xl&type=commits) +- [GDAL](https://gdal.org/drivers/raster/jpegxl.html): supported since 3.4.0 as a TIFF codec, and 3.6.0 as standalone format +- [GraphicsMagick](http://www.graphicsmagick.org/NEWS.html#march-26-2022): supported since 1.3.38 + +## OS-level support / UI frameworks / file browser plugins + +- Qt / KDE: [plugin available](https://github.com/novomesk/qt-jpegxl-image-plugin) +- GDK-pixbuf: plugin available in libjxl repo +- [gThumb](https://ubuntuhandbook.org/index.php/2021/04/gthumb-3-11-3-adds-jpeg-xl-support/) +- [MacOS viewer/QuickLook plugin](https://github.com/yllan/JXLook) +- [Windows Imaging Component](https://github.com/mirillis/jpegxl-wic) +- [Windows thumbnail handler](https://github.com/saschanaz/jxl-winthumb) +- [OpenMandriva Lx (since 4.3 RC)](https://www.openmandriva.org/en/news/article/openmandriva-lx-4-3-rc-available-for-testing) +- [KaOS (since 2021.06)](https://news.itsfoss.com/kaos-2021-06-release/) +- [EFL (since 1.27, no external plugin needed)](https://www.enlightenment.org) + +## Image editors + +- [Adobe Camera Raw (since version 15)](https://helpx.adobe.com/camera-raw/using/hdr-output.html) +- [Affinity (since V2)](https://affinity.serif.com/en-gb/whats-new/) +- [darktable (since 4.2)](https://github.com/darktable-org/darktable/releases/tag/release-4.2.0) +- [GIMP (since 2.99.8)](https://www.gimp.org/news/2021/10/20/gimp-2-99-8-released/); plugin for older versions available in libjxl repo +- [Graphic Converter (since 11.5)](https://www.lemkesoft.de/en/products/graphicconverter/) +- [Krita](https://invent.kde.org/graphics/krita/-/commit/13e5d2e5b9f0eac5c8064b7767f0b62264a0797b) +- [Paint.NET](https://www.getpaint.net/index.html); supported since 4.3.12 - requires a [plugin](https://github.com/0xC0000054/pdn-jpegxl) to be downloaded and installed. +- Photoshop: no plugin available yet, no official support yet + +## Image viewers + +- [XnView](https://www.xnview.com/en/) +- [ImageGlass](https://imageglass.org/) +- [IrfanView](https://www.irfanview.com/); supported since 4.59 - requires a [plugin](https://www.irfanview.com/plugins.htm) to be downloaded and enabled. +- [Tachiyomi](https://github.com/tachiyomiorg/tachiyomi/releases/tag/v0.12.1) +- Any viewer based on Qt, KDE, GDK-pixbuf, EFL, ImageMagick, libvips or imlib2 (see above) + - Qt viewers: gwenview, digiKam, KolourPaint, KPhotoAlbum, LXImage-Qt, qimgv, qView, nomacs, VookiImageViewer, PhotoQt + - GTK viewers: Eye of Gnome (eog), gThumb, Geeqie + - EFL viewers: entice, ephoto +- [Swayimg](https://github.com/artemsen/swayimg) + +## Online tools + +- [Squoosh](https://squoosh.app/) +- [Cloudinary](https://cloudinary.com/blog/cloudinary_supports_jpeg_xl) +- [MConverter](https://mconverter.eu/) +- [jpegxl.io](https://jpegxl.io/) diff --git a/third-party/libjxl/libjxl/doc/sphinx/api.rst b/third-party/libjxl/libjxl/doc/sphinx/api.rst new file mode 100644 index 0000000000..56fca09e25 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/api.rst @@ -0,0 +1,15 @@ +API reference +============= + +``libjxl`` exposes a C API for encoding and decoding JPEG XL files with some +C++ header-only helpers for C++ users. + +.. toctree:: + :caption: API REFERENCE + :maxdepth: 2 + + api_decoder + api_encoder + api_common + api_butteraugli + api_threads diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_butteraugli.rst b/third-party/libjxl/libjxl/doc/sphinx/api_butteraugli.rst new file mode 100644 index 0000000000..4aae44a991 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/api_butteraugli.rst @@ -0,0 +1,6 @@ +Butteraugli API - ``jxl/butteraugli.h`` +======================================= + +.. doxygengroup:: libjxl_butteraugli + :members: + :private-members: diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_common.rst b/third-party/libjxl/libjxl/doc/sphinx/api_common.rst new file mode 100644 index 0000000000..7114b51cd5 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/api_common.rst @@ -0,0 +1,6 @@ +Common API concepts +=================== + +.. doxygengroup:: libjxl_common + :members: + :private-members: diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_decoder.rst b/third-party/libjxl/libjxl/doc/sphinx/api_decoder.rst new file mode 100644 index 0000000000..3f8db228d4 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/api_decoder.rst @@ -0,0 +1,6 @@ +Decoder API - ``jxl/decode.h`` +============================== + +.. doxygengroup:: libjxl_decoder + :members: + :private-members: diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_encoder.rst b/third-party/libjxl/libjxl/doc/sphinx/api_encoder.rst new file mode 100644 index 0000000000..0c76cc8891 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/api_encoder.rst @@ -0,0 +1,6 @@ +Encoder API - ``jxl/encode.h`` +============================== + +.. doxygengroup:: libjxl_encoder + :members: + :private-members: diff --git a/third-party/libjxl/libjxl/doc/sphinx/api_threads.rst b/third-party/libjxl/libjxl/doc/sphinx/api_threads.rst new file mode 100644 index 0000000000..78dba657df --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/api_threads.rst @@ -0,0 +1,6 @@ +Multi-threaded Encoder/Decoder +============================== + +.. doxygengroup:: libjxl_threads + :members: + :private-members: diff --git a/third-party/libjxl/libjxl/doc/sphinx/conf.py b/third-party/libjxl/libjxl/doc/sphinx/conf.py new file mode 100644 index 0000000000..1591aefc70 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/conf.py @@ -0,0 +1,110 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Configuration file for the Sphinx documentation builder. +# +# See https://www.sphinx-doc.org/en/master/usage/configuration.html + +import os +import re +import subprocess + +def GetVersion(): + """Function to get the version of the current code.""" + with open(os.path.join( + os.path.dirname(__file__), '../../lib/CMakeLists.txt'), 'r') as f: + cmakevars = {} + for line in f: + m = re.match(r'set\(JPEGXL_([A-Z]+)_VERSION ([^\)]+)\)', line) + if m: + cmakevars[m.group(1)] = m.group(2) + return '%s.%s.%s' % (cmakevars['MAJOR'], cmakevars['MINOR'], cmakevars['PATCH']) + +def ConfigProject(app, config): + # Configure the doxygen xml directory as the "xml" directory next to the + # sphinx output directory. Doxygen generates by default the xml files in a + # "xml" sub-directory of the OUTPUT_DIRECTORY. + build_dir = os.path.dirname(app.outdir) + xml_dir = os.path.join(build_dir, 'xml') + config.breathe_projects['libjxl'] = xml_dir + + # Read the docs build environment doesn't run our cmake script so instead we + # need to run doxygen manually here. + if os.environ.get('READTHEDOCS', None) != 'True': + return + root_dir = os.path.realpath(os.path.join(app.srcdir, '../../')) + doxyfile = os.path.join(build_dir, 'Doxyfile-rtd.doc') + with open(doxyfile, 'w') as f: + f.write(f""" +FILE_PATTERNS = *.c *.h +GENERATE_HTML = NO +GENERATE_LATEX = NO +GENERATE_XML = YES +INPUT = lib/include doc/api.txt +OUTPUT_DIRECTORY = {build_dir} +PROJECT_NAME = LIBJXL +QUIET = YES +RECURSIVE = YES +STRIP_FROM_PATH = lib/include +WARN_AS_ERROR = YES +""") + subprocess.check_call(['doxygen', doxyfile], cwd=root_dir) + +def setup(app): + # Generate doxygen XML on init when running from Read the docs. + app.connect("config-inited", ConfigProject) + +### Project information + +project = 'libjxl' +project_copyright = 'JPEG XL Project Authors' +author = 'JPEG XL Project Authors' +version = GetVersion() + +### General configuration + +extensions = [ + # For integration with doxygen documentation. + 'breathe', + # sphinx readthedocs theme. + 'sphinx_rtd_theme', + # Do we use it? + 'sphinx.ext.graphviz', +] + +breathe_default_project = 'libjxl' +breathe_projects = {} + + +# All the API is in C, except those files that end with cxx.h. +breathe_domain_by_extension = {'h': 'cpp'} +breathe_domain_by_file_pattern = { + '*cxx.h': 'cpp', +} +breathe_implementation_filename_extensions = ['.cc'] + +# These are defined at build time by cmake. +c_id_attributes = [ + 'JXL_EXPORT', + 'JXL_DEPRECATED', + 'JXL_THREADS_EXPORT', +] +cpp_id_attributes = c_id_attributes + + +breathe_projects_source = { + 'libjxl' : ('../../', [ + 'doc/api.txt', + 'lib/include/jxl', + ]) +} + +# Recognized suffixes. +source_suffix = ['.rst', '.md'] + +### Options for HTML output + +# Use the readthedocs.io theme when generating the HTML output. +html_theme = 'sphinx_rtd_theme' diff --git a/third-party/libjxl/libjxl/doc/sphinx/index.rst b/third-party/libjxl/libjxl/doc/sphinx/index.rst new file mode 100644 index 0000000000..9a57074b0b --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/index.rst @@ -0,0 +1,18 @@ +.. libjxl sphinx documentation entrypoint + +JPEG XL image format reference implementation +============================================= + +.. toctree:: + :maxdepth: 3 + :caption: Contents: + + api + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` + diff --git a/third-party/libjxl/libjxl/doc/sphinx/requirements.txt b/third-party/libjxl/libjxl/doc/sphinx/requirements.txt new file mode 100644 index 0000000000..28179eafa2 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/sphinx/requirements.txt @@ -0,0 +1,3 @@ +breathe +sphinx +sphinx-rtd-theme diff --git a/third-party/libjxl/libjxl/doc/vuln_playbook.md b/third-party/libjxl/libjxl/doc/vuln_playbook.md new file mode 100644 index 0000000000..1326d70a9e --- /dev/null +++ b/third-party/libjxl/libjxl/doc/vuln_playbook.md @@ -0,0 +1,245 @@ +# Security Vulnerabilities Playbook + +## Reporting security bugs + +Report security bugs by emailing libjxl-security@google.com. + +Don't open a GitHub issue, don't discuss it public forums like Discord and don't +send a Pull Request if you think you have found a security bug. + +## Overview + +This document outlines the guidelines followed by the project when handling +security bugs, their fixes, disclosure and coordination with security +researchers. For more context about this guide, read the [coordinated +vulnerability disclosure +guidelines](https://github.com/google/oss-vulnerability-guide/blob/main/guide.md) +from Google Open Source Programs Office. + +The main target audience of this guide is the coordinator from the libjxl +Vulnerability Management Team (VMT) handling the requests, however it is useful +for other people to understand what to expect from this process. + +Members of the VMT monitor the reports received by email and will coordinate +for these to be addressed. This doesn't mean that said member would fix the bug, +but their responsibility is to make sure it is handled properly according to +this guide. + +## Life of security bug + +The Coordinator from VMT will make sure that the following steps are taken. + +1. Acknowledge the bug report. + +Our policy mandates a maximum of **3 business days** to respond to bug reports +in the given email, but you should respond as soon as possible and keep a fluid +communication with the reporter, who has spent some time looking at the issue. + +2. Determine if the bug is a security bug covered by our policy. + +Not all bugs are security bugs, and not all security bugs are covered by this +vulnerability disclosure policy. See the [What's a Security bug] section below. + +3. Determine the affected versions. + +Often new bugs on stable projects are found on new features or because of those +new features, so only the most recent versions are affected. It is important to +determine both what older versions are affected, so users running those older +versions can patch or update the software, and also what older versions are +*not* affected. It is possible that stable distributions ship older versions +that didn't contain the bug and therefore don't need to patch the code. Often +maintainers of package distributions need to patch older versions instead of +updating due to incompatibilities with newer ones and they need to understand +what's the vulnerable code. + +Security bugs that have already been fixed in `main` or in already released code +but not disclosed as a vulnerability, for example if fixed as a result of a +refactor, should be treated like any other security bug in this policy and +disclosed indicating the range of older affected versions (expect for versions +before 0.5, see below). In such case a new release would likely not be needed if +one already exists, but stable distributions may be still using those version +and need to be aware of the issue and fix. + +If no released version is affected by the bug, for example because it was only +introduced in the `main` branch but not yet released, then no vulnerability +disclosure is needed. + +Note: Versions before 0.5 are not covered by the security policy. Those versions +have multiple security issues and should not be used anyway. + +4. Communicate with the reporter + +Communicate the decision to the reporter. + +If the bug was not considered a security bug or not covered by this policy, +explain why and direct the reporter to open a public [issue in +GitHub](https://github.com/libjxl/libjxl/issues) or open one on their behalf. +You don't need to follow the rest of the guide in this case. + +If the bug *is* a covered security bug then follow the rest of this guide. + +Ask the reporter how they want to be credited in the disclosure: name and +company affiliation if any. Security researchers often value this recognition +and helps them dedicate their time to finding security bugs in our project. + +There's no bug bounty (monetary compensation for security bugs) available for +libjxl. + +5. Create a Security Advisory draft in GitHub + +At this point it was established that the bug is a security issue that requires +a vulnerability disclosure. Start by creating a Security Advisory draft in the +[Security Advisories](https://github.com/libjxl/libjxl/security/advisories) page +in GitHub. + +Add a short description of the bug explaining what's the issue and what's the +impact of the issue. Being 'hard' or 'complex' to exploit is not a reason to +discard the potential impact. You can update this description later, save it as +a draft in GitHub. + +Add the reporter to the security advisory draft if they have a GitHub account, +and add the project members that will be working on a fix for the bug. + +Establish the severity of the issue according to the impact and tag the +appropriate Common Weakness Enumeration (CWE) values. This helps classify the +security issues according to their nature. + +6. Work on a fix in a private branch + +Coordinators can work on the fix themselves, use a proposed fix from the +reporter if there is one, or work with other project members to create one. + +Work on a fix for the bug in *private*. Don't publish a Pull Request with the +fix like you normally do, and don't upload the fix to your libjxl fork. If you +ask another project member to work on it, explain them that they should follow +this guide. + +7. Request a CVE number + +The Common Vulnerabilities and Exposures (CVE) is the system used to disclose +vulnerabilities in software. A CVE number, like CVE-2021-NNNNNN, is a unique +identifier for a given vulnerability. These numbers are assigned by a CVE +Numbering Authority (CNA) with scope on the given project that has the +vulnerability. For libjxl, we use Google's Generic CNA. + +For VMT coordinators at Google, file a bug at +[go/cve-request](https://goto.google.com/cve-request) to request a CVE. See +go/vcp-cna for context. + +When requesting the CVE include: + + * A description of the problem (example: bug when parsing this field) + * A description of the impact of the bug (example: OOB read, remote code + execution, etc) + * The proposed CWE id(s) determined earlier. + * List of affected versions. + * Reporter of the bug and their preferred name/company to include in the + disclosure. + * Links to the issues/fixes (if already public), these can be added later, even + after the CVE is public. + * The CPE prefix of the affected project (`cpe:2.3:a:libjxl_project:libjxl`) + +When in doubt, you can discuss these with the security team while requesting it. + +8. File a Security bug in Chromium (if affected). + +libjxl project is in charge of updating and maintaining Chromium's libjxl +integration code, this includes updating the libjxl library when needed. While +the regular CVE disclosure process will eventually create a bug to update +Chromium, filing one at this stage speeds up the process. + +[go/crbug](https://goto.google.com/crbug), select the "Security Bug" template +and complete the details. This bug will be used to keep track of what versions +of Chromium need backporting. The new bug in Chromium will not be public +initially, but will be made public some time after the issue is fixed. + +9. Test the fixes on the intended releases + +When disclosing a vulnerability normally two ways to fix it are offered: + + * A patch or set of patches that fix the issue on `main` branch, and + * A new release that contains the security fix for the user to update to. + +New releases that fix the vulnerability should be PATCH releases, that is, a +previous release (like 1.2.3) plus the patches that fix the vulnerability, +becoming a new version (like 1.2.4). See the [release process](release.md) for +details. At least the latest MINOR release branch should have a PATCH release +with the fix, however it might make sense to also backport the fix to older +minor branch releases, depending on long-term support schedule for certain +releases. For example, if many users are still using a particular older version +of the library and updating to a new version requires significant changes (due +to a redesigned API or new unavailable dependencies) it is helpful to provide a +PATCH release there too. + +In either case, make sure that you test the fix in all the branches that you +intend to release it to. + +The Continuous Integration pipelines don't work on the private forks created by +the Security Advisory, so manual testing of the fix is needed there before +making it public. Don't upload it to your public fork for testing. + +10. Coordinate a date for release of the vulnerability disclosure. + +Agree with the reporter and security folks from the CNA on a release date. There +is a maximum of 90 day disclosure timeline from the day the bug was reported. + +On the disclosure date publish the fixes and tag the new PATCH release with the +fix. You can prepare private drafts of the release for review beforehand to +reduce the workload. + +Update Chromium to the new release version (if affected) and work with Chrome +engineers on the required backports. + +## What's a Security bug + +A security bug is a bug that can potentially be exploited to let an attacker +gain unauthorized access or privileges. For example, gaining code execution in +libjxl decoder by decoding a malicious .jxl file is a security but hitting a +`JXL_ASSERT()` is not necessarily one. + +The supported use cases to consider in the context of security bugs that require +a vulnerability disclosure are "release" builds. The disclosure is intended for +users of the project, to let them know that there is a security issue and that +they should update or patch it. + +Unreleased versions are not relevant in this context. A bug introduced in the +`main` branch that is not yet in any release is not covered by this guide even +if the bug allows a remote code execution. CVEs should have a non-empty list of +affected released versions. + +"Developer only" code is also not covered by this policy. In particular, tools +that are not installed by the build, or not installed when packaging `libjxl` +are not covered. For example, a bug in `tone_map` would not affect users since +is a developer-only tool. The rationale behind this is that users of the +released software will not have the developer code. This developer code is in +the same libjxl repository for convenience. + +When considering the impact of a bug, "release" mode should be assumed. In +release mode `JXL_ASSERT()` and `JXL_CHECK()` are enabled, but `JXL_DASSERT()` +are not. This means that if a `JXL_DASSERT()` protects an out-of-bounds (OOB) +write, then the impact of a bug hitting the `JXL_DASSERT()` is at least an +OOB write. On the other hand, if a bug ends up hitting a `JXL_CHECK()` instead +of continuing, the only impact is the process abort instead of whatever else is +possible after the `JXL_CHECK()`. + +Asserts in `libjxl` *tools* cause the tool process to abort, but don't affect +the caller. Either crashing or returning an error (non-zero exit code) would +have the same effect, so `JXL_ASSERT()` failures in the tools have no security +or functional impact. + +Asserts in `libjxl` libraries, meant to be linked into other processes, cause +the caller process to abort, potentially causing a Denial of Service, however, +Denial of Service issues are *not* considered security bugs by this policy. +These are still issues and should be fixed, but they are not security issues. + +Out-of-bounds (OOB) reads in process memory are considered security +vulnerabilities. OOB reads may allow an attacker to read other buffers from the +same process that it shouldn't have access to, even a small OOB read can +allow the attacker to read an address in the stack or in the heap, defeating +address space randomization techniques. In combination with other bugs these +can enable or simplify attacks to the process using libjxl. OOB reads don't need +to require a segmentation fault to be a problem, leaking process information in +decoded RGB pixels could be used as part of an exploit in some scenarios. + +OOB writes and remote code execution (RCE) are security bugs of at least high +security impact. diff --git a/third-party/libjxl/libjxl/doc/xl_overview.md b/third-party/libjxl/libjxl/doc/xl_overview.md new file mode 100644 index 0000000000..b1c0f913b4 --- /dev/null +++ b/third-party/libjxl/libjxl/doc/xl_overview.md @@ -0,0 +1,181 @@ +# XL Overview + +## Requirements + +JPEG XL was designed for two main requirements: + +* high quality: visually lossless at reasonable bitrates; +* decoding speed: multithreaded decoding should be able to reach around + 400 Megapixel/s on large images. + +These goals apply to various types of images, including HDR content, whose +support is made possible by full-precision (float32) computations and extensive +support of color spaces and transfer functions. + +High performance is achieved by designing the format with careful consideration +of memory bandwidth usage and ease of SIMD/GPU implementation. + +The full requirements for JPEG XL are listed in document wg1m82079. + +## General architecture + +The architecture follows the traditional block transform model with improvements +in the individual components. For a quick overview, we sketch a "block diagram" +of the lossy format decoder in the form of module names in **bold** followed by +a brief description. Note that post-processing modules in [brackets] are +optional - they are unnecessary or even counterproductive at very high quality +settings. + +**Header**: decode metadata (e.g. image dimensions) from compressed fields +(smaller than Exp-Golomb thanks to per-field encodings). The compression and +small number of required fields enables very compact headers - much smaller than +JFIF and HEVC. The container supports multiple images (e.g. animations/bursts) +and passes (progressive). + +**Bitstream**: decode transform coefficient residuals using rANS-encoded +<#bits,bits> symbols + +**Dequantize**: from adaptive quant map side information, plus chroma from luma + +**DC prediction**: expand DC residuals using adaptive (history-based) predictors + +**Chroma from luma**: restore predicted X from B and Y from B + +**IDCT:** 2x2..32x32, floating-point + +**[Gaborish]**: additional deblocking convolution with 3x3 kernel + +**[Edge preserving filter]**: nonlinear adaptive smoothing controlled by side +information + +**[Noise injection]**: add perceptually pleasing noise according to a per-image +noise model + +**Color space conversion**: from perceptual opsin XYB to linear RGB + +**[Converting to other color spaces via ICC]** + +The encoder is basically the reverse: + +**Color space conversion**: from linear RGB to perceptual opsin XYB + +**[Noise estimation]**: compute a noise model for the image + +**[Gaborish]**: sharpening to counteract the blurring on the decoder side + +**DCT**: transform sizes communicated via per-block side information + +**Chroma from luma**: find the best multipliers of Y for X and B channels of +entire image + +**Adaptive quantization**: iterative search for quant map that yields the best +perceived restoration + +**Quantize**: store 16-bit prediction residuals + +**DC prediction**: store residuals (prediction happens in quantized space) + +**Entropy coding**: rANS and context modeling with clustering + + +# File Structure + +A codestream begins with a `FileHeader` followed by one or more "passes" +(= scans: e.g. DC or AC_LF) which are then added together (summing the +respective color components in Opsin space) to form the final image. There is no +limit to the number of passes, so an encoder could choose to send salient parts +first, followed by arbitrary decompositions of the final image (in terms of +resolution, bit depth, quality or spatial location). + +Each pass contains groups of AC and DC data. A group is a subset of pixels that +can be decoded in parallel. DC groups contain 256x256 DCs (from 2048x2048 input +pixels), AC groups cover 256x256 input pixels. + +Each pass starts with a table of contents (sizes of each of their DC+AC +groups), which enables parallel decoding and/or the decoding of a subset. +However, there is no higher-level TOC of passes, as that would prevent +appending additional images and could be too constraining for the encoder. + + +## Lossless + +JPEG XL supports tools for lossless coding designed by Alexander Rhatushnyak and +Jon Sneyers. They are about 60-75% of size of PNG, and smaller than WebP +lossless for photos. + +An adaptive predictor computes 4 from the NW, N, NE and W pixels and combines +them with weights based on previous errors. The error value is encoded in a +bucket chosen based on a heuristic max error. The result is entropy-coded using +the ANS encoder. + +## Current Reference Implementation + +### Conventions + +The software is written in C++ and built using CMake 3.6 or later. + +Error handling is done by having functions return values of type `jxl::Status` +(a thin wrapper around bool which checks that it is not ignored). A convenience +macro named `JXL_RETURN_IF_ERROR` makes this more convenient by automatically +forwarding errors, and another macro named `JXL_FAILURE` exits with an error +message if reached, with no effect in optimized builds. + +To diagnose the cause of encoder/decoder failures (which often only result in a +generic "decode failed" message), build using the following command: + +```bash +CMAKE_FLAGS="-DJXL_CRASH_ON_ERROR" ./ci.sh opt +``` + +In such builds, the first JXL_FAILURE will print a message identifying where the +problem is and the program will exit immediately afterwards. + +### Architecture + +Getting back to the earlier block diagram: + +**Header** handling is implemented in `headers.h` and `field*`. + +**Bitstream**: `entropy_coder.h`, `dec_ans_*`. + +**(De)quantize**: `quantizer.h`. + +**DC prediction**: `predictor.h`. + +**Chroma from luma**: `chroma_from_luma.h` + +**(I)DCT**: `dct*.h`. Instead of operating directly on blocks of memory, the +functions operate on thin wrappers which can handle blocks spread across +multiple image lines. + +**DCT size selection**: `ac_strategy.cc` + +**[Gaborish]**: `enc_gaborish.h`. + +**[Edge preserving filter]**: `epf.h` + +**[Noise injection]**: `noise*` (currently disabled) + +**Color space conversion**: `color_*`, `dec_xyb.h`. + +## Decoder overview + +After decoding headers, the decoder begins processing frames (`dec_frame.cc`). + +For each pass, it will read the DC group table of contents (TOC) and start +decoding, dequantizing and restoring color correlation of each DC group +(covering 2048x2048 pixels in the input image) in parallel +(`compressed_dc.cc`). The DC is split into parts corresponding to each AC group +(with 1px of extra border); the AC group TOC is read and each AC group (256x256 +pixels) is processed in parallel (`dec_group.cc`). + +In each AC group, the decoder reads per-block side information indicating the +kind of DCT transform; this is followed by the quantization field. Then, AC +coefficients are read, dequantized and have color correlation restored on a +tile per tile basis for better locality. + +After all the groups are read, postprocessing is applied: Gaborish smoothing +and edge preserving filter, to reduce blocking and other artifacts. + +Finally, the image is converted back from the XYB color space +(`dec_xyb.cc`) and saved to the output image (`codec_*.cc`). diff --git a/third-party/libjxl/libjxl/examples/CMakeLists.txt b/third-party/libjxl/libjxl/examples/CMakeLists.txt new file mode 100644 index 0000000000..88dc27c49f --- /dev/null +++ b/third-party/libjxl/libjxl/examples/CMakeLists.txt @@ -0,0 +1,56 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Example project using libjxl. + +cmake_minimum_required(VERSION 3.10) + +project(SAMPLE_LIBJXL LANGUAGES C CXX) + +# Use pkg-config to find libjxl. +find_package(PkgConfig) +pkg_check_modules(Jxl REQUIRED IMPORTED_TARGET libjxl) +pkg_check_modules(JxlThreads REQUIRED IMPORTED_TARGET libjxl_threads) + +# Build the example encoder/decoder binaries using the default shared libraries +# installed. +add_executable(decode_oneshot decode_oneshot.cc) +target_link_libraries(decode_oneshot PkgConfig::Jxl PkgConfig::JxlThreads) + +add_executable(decode_progressive decode_progressive.cc) +target_link_libraries(decode_progressive PkgConfig::Jxl PkgConfig::JxlThreads) + +add_executable(encode_oneshot encode_oneshot.cc) +target_link_libraries(encode_oneshot PkgConfig::Jxl PkgConfig::JxlThreads) + + +# Building a static binary with the static libjxl dependencies. How to load +# static library configs from pkg-config and how to build static binaries +# depends on the platform, and building static binaries in general has problems. +# If you don't need static binaries you can remove this section. +add_library(StaticJxl INTERFACE IMPORTED GLOBAL) +set_target_properties(StaticJxl PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${Jxl_STATIC_INCLUDE_DIR}" + INTERFACE_COMPILE_OPTIONS "${Jxl_STATIC_CFLAGS_OTHER}" + INTERFACE_LINK_LIBRARIES "${Jxl_STATIC_LDFLAGS}" +) +add_library(StaticJxlThreads INTERFACE IMPORTED GLOBAL) +set_target_properties(StaticJxlThreads PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${JxlThreads_STATIC_INCLUDE_DIR}" + INTERFACE_COMPILE_OPTIONS "${JxlThreads_STATIC_CFLAGS_OTHER}" + # libgcc uses weak symbols for pthread which means that -lpthread is not + # linked when compiling a static binary. This is a platform-specific fix for + # that. + INTERFACE_LINK_LIBRARIES + "${JxlThreads_STATIC_LDFLAGS} -Wl,--whole-archive -lpthread -Wl,--no-whole-archive" +) + +add_executable(decode_oneshot_static decode_oneshot.cc) +target_link_libraries(decode_oneshot_static + -static StaticJxl StaticJxlThreads) + +add_executable(encode_oneshot_static encode_oneshot.cc) +target_link_libraries(encode_oneshot_static + -static StaticJxl StaticJxlThreads) diff --git a/third-party/libjxl/libjxl/examples/decode_exif_metadata.cc b/third-party/libjxl/libjxl/examples/decode_exif_metadata.cc new file mode 100644 index 0000000000..97b0e52703 --- /dev/null +++ b/third-party/libjxl/libjxl/examples/decode_exif_metadata.cc @@ -0,0 +1,172 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This C++ example decodes a JPEG XL image in one shot (all input bytes +// available at once). The example outputs the pixels and color information to a +// floating point image and an ICC profile on disk. + +#include +#include +#include +#include +#include +#include + +#include + +bool DecodeJpegXlExif(const uint8_t* jxl, size_t size, + std::vector* exif) { + auto dec = JxlDecoderMake(nullptr); + + // We're only interested in the Exif boxes in this example, so don't + // subscribe to events related to pixel data. + if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BOX)) { + fprintf(stderr, "JxlDecoderSubscribeEvents failed\n"); + return false; + } + bool support_decompression = true; + if (JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE)) { + fprintf(stderr, + "NOTE: decompressing brob boxes not supported with the currently " + "used jxl library.\n"); + support_decompression = false; + } + + JxlDecoderSetInput(dec.get(), jxl, size); + JxlDecoderCloseInput(dec.get()); + + const constexpr size_t kChunkSize = 65536; + size_t output_pos = 0; + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + if (status == JXL_DEC_ERROR) { + fprintf(stderr, "Decoder error\n"); + return false; + } else if (status == JXL_DEC_NEED_MORE_INPUT) { + fprintf(stderr, "Error, already provided all input\n"); + return false; + } else if (status == JXL_DEC_BOX) { + if (!exif->empty()) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get()); + exif->resize(exif->size() - remaining); + // No need to wait for JXL_DEC_SUCCESS or decode other boxes. + return true; + } + JxlBoxType type; + if (JXL_DEC_SUCCESS != + JxlDecoderGetBoxType(dec.get(), type, support_decompression)) { + fprintf(stderr, "Error, failed to get box type\n"); + return false; + } + if (!memcmp(type, "Exif", 4)) { + exif->resize(kChunkSize); + JxlDecoderSetBoxBuffer(dec.get(), exif->data(), exif->size()); + } + } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get()); + output_pos += kChunkSize - remaining; + exif->resize(exif->size() + kChunkSize); + JxlDecoderSetBoxBuffer(dec.get(), exif->data() + output_pos, + exif->size() - output_pos); + } else if (status == JXL_DEC_SUCCESS) { + if (!exif->empty()) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec.get()); + exif->resize(exif->size() - remaining); + return true; + } + return true; + } else { + fprintf(stderr, "Unknown decoder status\n"); + return false; + } + } +} + +bool LoadFile(const char* filename, std::vector* out) { + FILE* file = fopen(filename, "rb"); + if (!file) { + return false; + } + + if (fseek(file, 0, SEEK_END) != 0) { + fclose(file); + return false; + } + + long size = ftell(file); + // Avoid invalid file or directory. + if (size >= LONG_MAX || size < 0) { + fclose(file); + return false; + } + + if (fseek(file, 0, SEEK_SET) != 0) { + fclose(file); + return false; + } + + out->resize(size); + size_t readsize = fread(out->data(), 1, size, file); + if (fclose(file) != 0) { + return false; + } + + return readsize == static_cast(size); +} + +bool WriteFile(const char* filename, const uint8_t* data, size_t size) { + FILE* file = fopen(filename, "wb"); + if (!file) { + fprintf(stderr, "Could not open %s for writing", filename); + return false; + } + fwrite(data, 1, size, file); + if (fclose(file) != 0) { + return false; + } + return true; +} + +int main(int argc, char* argv[]) { + if (argc != 3) { + fprintf(stderr, + "Usage: %s \n" + "Where:\n" + " jxl = input JPEG XL image filename\n" + " exif = output exif filename\n" + "Output files will be overwritten.\n", + argv[0]); + return 1; + } + + const char* jxl_filename = argv[1]; + const char* exif_filename = argv[2]; + + std::vector jxl; + if (!LoadFile(jxl_filename, &jxl)) { + fprintf(stderr, "couldn't load %s\n", jxl_filename); + return 1; + } + + std::vector exif; + if (!DecodeJpegXlExif(jxl.data(), jxl.size(), &exif)) { + fprintf(stderr, "Error while decoding the jxl file\n"); + return 1; + } + if (exif.empty()) { + printf("No exif data present in this image\n"); + } else { + // TODO(lode): the exif box data contains the 4-byte TIFF header at the + // beginning, check whether this is desired to be part of the output, or + // should be removed. + if (!WriteFile(exif_filename, exif.data(), exif.size())) { + fprintf(stderr, "Error while writing the exif file\n"); + return 1; + } + printf("Successfully wrote %s\n", exif_filename); + } + return 0; +} diff --git a/third-party/libjxl/libjxl/examples/decode_oneshot.cc b/third-party/libjxl/libjxl/examples/decode_oneshot.cc new file mode 100644 index 0000000000..07720954f3 --- /dev/null +++ b/third-party/libjxl/libjxl/examples/decode_oneshot.cc @@ -0,0 +1,250 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This C++ example decodes a JPEG XL image in one shot (all input bytes +// available at once). The example outputs the pixels and color information to a +// floating point image and an ICC profile on disk. + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/** Decodes JPEG XL image to floating point pixels and ICC Profile. Pixel are + * stored as floating point, as interleaved RGBA (4 floating point values per + * pixel), line per line from top to bottom. Pixel values have nominal range + * 0..1 but may go beyond this range for HDR or wide gamut. The ICC profile + * describes the color format of the pixel data. + */ +bool DecodeJpegXlOneShot(const uint8_t* jxl, size_t size, + std::vector* pixels, size_t* xsize, + size_t* ysize, std::vector* icc_profile) { + // Multi-threaded parallel runner. + auto runner = JxlResizableParallelRunnerMake(nullptr); + + auto dec = JxlDecoderMake(nullptr); + if (JXL_DEC_SUCCESS != + JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO | + JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE)) { + fprintf(stderr, "JxlDecoderSubscribeEvents failed\n"); + return false; + } + + if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(), + JxlResizableParallelRunner, + runner.get())) { + fprintf(stderr, "JxlDecoderSetParallelRunner failed\n"); + return false; + } + + JxlBasicInfo info; + JxlPixelFormat format = {4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}; + + JxlDecoderSetInput(dec.get(), jxl, size); + JxlDecoderCloseInput(dec.get()); + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + + if (status == JXL_DEC_ERROR) { + fprintf(stderr, "Decoder error\n"); + return false; + } else if (status == JXL_DEC_NEED_MORE_INPUT) { + fprintf(stderr, "Error, already provided all input\n"); + return false; + } else if (status == JXL_DEC_BASIC_INFO) { + if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) { + fprintf(stderr, "JxlDecoderGetBasicInfo failed\n"); + return false; + } + *xsize = info.xsize; + *ysize = info.ysize; + JxlResizableParallelRunnerSetThreads( + runner.get(), + JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize)); + } else if (status == JXL_DEC_COLOR_ENCODING) { + // Get the ICC color profile of the pixel data + size_t icc_size; + if (JXL_DEC_SUCCESS != + JxlDecoderGetICCProfileSize(dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, + &icc_size)) { + fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n"); + return false; + } + icc_profile->resize(icc_size); + if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile( + dec.get(), JXL_COLOR_PROFILE_TARGET_DATA, + icc_profile->data(), icc_profile->size())) { + fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n"); + return false; + } + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + size_t buffer_size; + if (JXL_DEC_SUCCESS != + JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) { + fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n"); + return false; + } + if (buffer_size != *xsize * *ysize * 16) { + fprintf(stderr, "Invalid out buffer size %" PRIu64 " %" PRIu64 "\n", + static_cast(buffer_size), + static_cast(*xsize * *ysize * 16)); + return false; + } + pixels->resize(*xsize * *ysize * 4); + void* pixels_buffer = (void*)pixels->data(); + size_t pixels_buffer_size = pixels->size() * sizeof(float); + if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format, + pixels_buffer, + pixels_buffer_size)) { + fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n"); + return false; + } + } else if (status == JXL_DEC_FULL_IMAGE) { + // Nothing to do. Do not yet return. If the image is an animation, more + // full frames may be decoded. This example only keeps the last one. + } else if (status == JXL_DEC_SUCCESS) { + // All decoding successfully finished. + // It's not required to call JxlDecoderReleaseInput(dec.get()) here since + // the decoder will be destroyed. + return true; + } else { + fprintf(stderr, "Unknown decoder status\n"); + return false; + } + } +} + +/** Writes to .pfm file (Portable FloatMap). Gimp, tev viewer and ImageMagick + * support viewing this format. + * The input pixels are given as 32-bit floating point with 4-channel RGBA. + * The alpha channel will not be written since .pfm does not support it. + */ +bool WritePFM(const char* filename, const float* pixels, size_t xsize, + size_t ysize) { + FILE* file = fopen(filename, "wb"); + if (!file) { + fprintf(stderr, "Could not open %s for writing", filename); + return false; + } + uint32_t endian_test = 1; + uint8_t little_endian[4]; + memcpy(little_endian, &endian_test, 4); + + fprintf(file, "PF\n%d %d\n%s\n", (int)xsize, (int)ysize, + little_endian[0] ? "-1.0" : "1.0"); + for (int y = ysize - 1; y >= 0; y--) { + for (size_t x = 0; x < xsize; x++) { + for (size_t c = 0; c < 3; c++) { + const float* f = &pixels[(y * xsize + x) * 4 + c]; + fwrite(f, 4, 1, file); + } + } + } + if (fclose(file) != 0) { + return false; + } + return true; +} + +bool LoadFile(const char* filename, std::vector* out) { + FILE* file = fopen(filename, "rb"); + if (!file) { + return false; + } + + if (fseek(file, 0, SEEK_END) != 0) { + fclose(file); + return false; + } + + long size = ftell(file); + // Avoid invalid file or directory. + if (size >= LONG_MAX || size < 0) { + fclose(file); + return false; + } + + if (fseek(file, 0, SEEK_SET) != 0) { + fclose(file); + return false; + } + + out->resize(size); + size_t readsize = fread(out->data(), 1, size, file); + if (fclose(file) != 0) { + return false; + } + + return readsize == static_cast(size); +} + +bool WriteFile(const char* filename, const uint8_t* data, size_t size) { + FILE* file = fopen(filename, "wb"); + if (!file) { + fprintf(stderr, "Could not open %s for writing", filename); + return false; + } + fwrite(data, 1, size, file); + if (fclose(file) != 0) { + return false; + } + return true; +} + +int main(int argc, char* argv[]) { + if (argc != 4) { + fprintf(stderr, + "Usage: %s \n" + "Where:\n" + " jxl = input JPEG XL image filename\n" + " pfm = output Portable FloatMap image filename\n" + " icc = output ICC color profile filename\n" + "Output files will be overwritten.\n", + argv[0]); + return 1; + } + + const char* jxl_filename = argv[1]; + const char* pfm_filename = argv[2]; + const char* icc_filename = argv[3]; + + std::vector jxl; + if (!LoadFile(jxl_filename, &jxl)) { + fprintf(stderr, "couldn't load %s\n", jxl_filename); + return 1; + } + + std::vector pixels; + std::vector icc_profile; + size_t xsize = 0, ysize = 0; + if (!DecodeJpegXlOneShot(jxl.data(), jxl.size(), &pixels, &xsize, &ysize, + &icc_profile)) { + fprintf(stderr, "Error while decoding the jxl file\n"); + return 1; + } + if (!WritePFM(pfm_filename, pixels.data(), xsize, ysize)) { + fprintf(stderr, "Error while writing the PFM image file\n"); + return 1; + } + if (!WriteFile(icc_filename, icc_profile.data(), icc_profile.size())) { + fprintf(stderr, "Error while writing the ICC profile file\n"); + return 1; + } + printf("Successfully wrote %s and %s\n", pfm_filename, icc_filename); + return 0; +} diff --git a/third-party/libjxl/libjxl/examples/decode_progressive.cc b/third-party/libjxl/libjxl/examples/decode_progressive.cc new file mode 100644 index 0000000000..a094cbeb4f --- /dev/null +++ b/third-party/libjxl/libjxl/examples/decode_progressive.cc @@ -0,0 +1,241 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This C++ example decodes a JPEG XL image progressively (input bytes are +// passed in chunks). The example outputs the intermediate steps to PAM files. + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +bool WritePAM(const char* filename, const uint8_t* buffer, size_t w, size_t h) { + FILE* fp = fopen(filename, "wb"); + if (!fp) { + fprintf(stderr, "Could not open %s for writing", filename); + return false; + } + fprintf(fp, + "P7\nWIDTH %" PRIu64 "\nHEIGHT %" PRIu64 + "\nDEPTH 4\nMAXVAL 255\nTUPLTYPE " + "RGB_ALPHA\nENDHDR\n", + static_cast(w), static_cast(h)); + size_t num_bytes = w * h * 4; + if (fwrite(buffer, 1, num_bytes, fp) != num_bytes) { + fclose(fp); + return false; + }; + if (fclose(fp) != 0) { + return false; + } + return true; +} + +/** Decodes JPEG XL image to 8-bit integer RGBA pixels and an ICC Profile, in a + * progressive way, saving the intermediate steps. + */ +bool DecodeJpegXlProgressive(const uint8_t* jxl, size_t size, + const char* filename, size_t chunksize) { + std::vector pixels; + std::vector icc_profile; + size_t xsize = 0, ysize = 0; + + // Multi-threaded parallel runner. + auto runner = JxlResizableParallelRunnerMake(nullptr); + + auto dec = JxlDecoderMake(nullptr); + if (JXL_DEC_SUCCESS != + JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO | + JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE)) { + fprintf(stderr, "JxlDecoderSubscribeEvents failed\n"); + return false; + } + + if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(), + JxlResizableParallelRunner, + runner.get())) { + fprintf(stderr, "JxlDecoderSetParallelRunner failed\n"); + return false; + } + + JxlBasicInfo info; + JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + + size_t seen = 0; + JxlDecoderSetInput(dec.get(), jxl, chunksize); + size_t remaining = chunksize; + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + + if (status == JXL_DEC_ERROR) { + fprintf(stderr, "Decoder error\n"); + return false; + } else if (status == JXL_DEC_NEED_MORE_INPUT || status == JXL_DEC_SUCCESS || + status == JXL_DEC_FULL_IMAGE) { + seen += remaining - JxlDecoderReleaseInput(dec.get()); + printf("Flushing after %" PRIu64 " bytes\n", static_cast(seen)); + if (status == JXL_DEC_NEED_MORE_INPUT && + JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec.get())) { + printf("flush error (no preview yet)\n"); + } else { + char fname[1024]; + if (snprintf(fname, 1024, "%s-%" PRIu64 ".pam", filename, + static_cast(seen)) >= 1024) { + fprintf(stderr, "Filename too long\n"); + return false; + }; + if (!WritePAM(fname, pixels.data(), xsize, ysize)) { + fprintf(stderr, "Error writing progressive output\n"); + } + } + remaining = size - seen; + if (remaining > chunksize) remaining = chunksize; + if (remaining == 0) { + if (status == JXL_DEC_NEED_MORE_INPUT) { + fprintf(stderr, "Error, already provided all input\n"); + return false; + } else { + return true; + } + } + JxlDecoderSetInput(dec.get(), jxl + seen, remaining); + } else if (status == JXL_DEC_BASIC_INFO) { + if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) { + fprintf(stderr, "JxlDecoderGetBasicInfo failed\n"); + return false; + } + xsize = info.xsize; + ysize = info.ysize; + JxlResizableParallelRunnerSetThreads( + runner.get(), + JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize)); + } else if (status == JXL_DEC_COLOR_ENCODING) { + // Get the ICC color profile of the pixel data + size_t icc_size; + if (JXL_DEC_SUCCESS != + JxlDecoderGetICCProfileSize( + dec.get(), JXL_COLOR_PROFILE_TARGET_ORIGINAL, &icc_size)) { + fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n"); + return false; + } + icc_profile.resize(icc_size); + if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile( + dec.get(), JXL_COLOR_PROFILE_TARGET_ORIGINAL, + icc_profile.data(), icc_profile.size())) { + fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n"); + return false; + } + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + size_t buffer_size; + if (JXL_DEC_SUCCESS != + JxlDecoderImageOutBufferSize(dec.get(), &format, &buffer_size)) { + fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n"); + return false; + } + if (buffer_size != xsize * ysize * 4) { + fprintf(stderr, "Invalid out buffer size %" PRIu64 " != %" PRIu64 "\n", + static_cast(buffer_size), + static_cast(xsize * ysize * 4)); + return false; + } + pixels.resize(xsize * ysize * 4); + if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec.get(), &format, + pixels.data(), + pixels.size())) { + fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n"); + return false; + } + } else { + fprintf(stderr, "Unknown decoder status\n"); + return false; + } + } +} + +bool LoadFile(const char* filename, std::vector* out) { + FILE* file = fopen(filename, "rb"); + if (!file) { + return false; + } + + if (fseek(file, 0, SEEK_END) != 0) { + fclose(file); + return false; + } + + long size = ftell(file); + // Avoid invalid file or directory. + if (size >= LONG_MAX || size < 0) { + fclose(file); + return false; + } + + if (fseek(file, 0, SEEK_SET) != 0) { + fclose(file); + return false; + } + + out->resize(size); + size_t readsize = fread(out->data(), 1, size, file); + if (fclose(file) != 0) { + return false; + } + + return readsize == static_cast(size); +} + +int main(int argc, char* argv[]) { + if (argc < 3) { + fprintf( + stderr, + "Usage: %s [chunksize]\n" + "Where:\n" + " jxl = input JPEG XL image filename\n" + " basename = prefix of output filenames\n" + " chunksize = loads chunksize bytes at a time and writes\n" + " intermediate results to basename-[bytes loaded].pam\n" + "Output files will be overwritten.\n", + argv[0]); + return 1; + } + + const char* jxl_filename = argv[1]; + const char* png_filename = argv[2]; + + std::vector jxl; + if (!LoadFile(jxl_filename, &jxl)) { + fprintf(stderr, "couldn't load %s\n", jxl_filename); + return 1; + } + size_t chunksize = jxl.size(); + if (argc > 3) { + long cs = atol(argv[3]); + if (cs < 100) { + fprintf(stderr, "Chunk size is too low, try at least 100 bytes\n"); + return 1; + } + chunksize = cs; + } + + if (!DecodeJpegXlProgressive(jxl.data(), jxl.size(), png_filename, + chunksize)) { + fprintf(stderr, "Error while decoding the jxl file\n"); + return 1; + } + return 0; +} diff --git a/third-party/libjxl/libjxl/examples/encode_oneshot.cc b/third-party/libjxl/libjxl/examples/encode_oneshot.cc new file mode 100644 index 0000000000..49b360ce3b --- /dev/null +++ b/third-party/libjxl/libjxl/examples/encode_oneshot.cc @@ -0,0 +1,276 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This example encodes a file containing a floating point image to another +// file containing JPEG XL image with a single frame. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/** + * Reads from .pfm file (Portable FloatMap) + * + * @param filename name of the file to read + * @param pixels vector to fill with loaded pixels as 32-bit floating point with + * 3-channel RGB + * @param xsize set to width of loaded image + * @param ysize set to height of loaded image + */ +bool ReadPFM(const char* filename, std::vector* pixels, uint32_t* xsize, + uint32_t* ysize) { + FILE* file = fopen(filename, "rb"); + if (!file) { + fprintf(stderr, "Could not open %s for reading.\n", filename); + return false; + } + uint32_t endian_test = 1; + uint8_t little_endian[4]; + memcpy(little_endian, &endian_test, 4); + + if (fseek(file, 0, SEEK_END) != 0) { + fclose(file); + return false; + } + + long size = ftell(file); + // Avoid invalid file or directory. + if (size >= LONG_MAX || size < 0) { + fclose(file); + return false; + } + + if (fseek(file, 0, SEEK_SET) != 0) { + fclose(file); + return false; + } + + std::vector data; + data.resize(size); + + size_t readsize = fread(data.data(), 1, size, file); + if ((long)readsize != size) { + return false; + } + if (fclose(file) != 0) { + return false; + } + + std::stringstream datastream; + std::string datastream_content(data.data(), data.size()); + datastream.str(datastream_content); + + std::string pf_token; + getline(datastream, pf_token, '\n'); + if (pf_token != "PF") { + fprintf(stderr, + "%s doesn't seem to be a 3 channel Portable FloatMap file (missing " + "'PF\\n' " + "bytes).\n", + filename); + return false; + } + + std::string xsize_token; + getline(datastream, xsize_token, ' '); + *xsize = std::stoi(xsize_token); + + std::string ysize_token; + getline(datastream, ysize_token, '\n'); + *ysize = std::stoi(ysize_token); + + std::string endianness_token; + getline(datastream, endianness_token, '\n'); + bool input_little_endian; + if (endianness_token == "1.0") { + input_little_endian = false; + } else if (endianness_token == "-1.0") { + input_little_endian = true; + } else { + fprintf(stderr, + "%s doesn't seem to be a Portable FloatMap file (endianness token " + "isn't '1.0' or '-1.0').\n", + filename); + return false; + } + + size_t offset = pf_token.size() + 1 + xsize_token.size() + 1 + + ysize_token.size() + 1 + endianness_token.size() + 1; + + if (data.size() != *ysize * *xsize * 3 * 4 + offset) { + fprintf(stderr, + "%s doesn't seem to be a Portable FloatMap file (pixel data bytes " + "are %d, but expected %d * %d * 3 * 4 + %d (%d).\n", + filename, (int)data.size(), (int)*ysize, (int)*xsize, (int)offset, + (int)(*ysize * *xsize * 3 * 4 + offset)); + return false; + } + + if (!!little_endian[0] != input_little_endian) { + fprintf(stderr, + "%s has a different endianness than we do, conversion is not " + "supported.\n", + filename); + return false; + } + + pixels->resize(*ysize * *xsize * 3); + + for (int y = *ysize - 1; y >= 0; y--) { + for (int x = 0; x < (int)*xsize; x++) { + for (int c = 0; c < 3; c++) { + memcpy(pixels->data() + (y * *xsize + x) * 3 + c, data.data() + offset, + sizeof(float)); + offset += sizeof(float); + } + } + } + + return true; +} + +/** + * Compresses the provided pixels. + * + * @param pixels input pixels + * @param xsize width of the input image + * @param ysize height of the input image + * @param compressed will be populated with the compressed bytes + */ +bool EncodeJxlOneshot(const std::vector& pixels, const uint32_t xsize, + const uint32_t ysize, std::vector* compressed) { + auto enc = JxlEncoderMake(/*memory_manager=*/nullptr); + auto runner = JxlThreadParallelRunnerMake( + /*memory_manager=*/nullptr, + JxlThreadParallelRunnerDefaultNumWorkerThreads()); + if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(), + JxlThreadParallelRunner, + runner.get())) { + fprintf(stderr, "JxlEncoderSetParallelRunner failed\n"); + return false; + } + + JxlPixelFormat pixel_format = {3, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}; + + JxlBasicInfo basic_info; + JxlEncoderInitBasicInfo(&basic_info); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.bits_per_sample = 32; + basic_info.exponent_bits_per_sample = 8; + basic_info.uses_original_profile = JXL_FALSE; + if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc.get(), &basic_info)) { + fprintf(stderr, "JxlEncoderSetBasicInfo failed\n"); + return false; + } + + JxlColorEncoding color_encoding = {}; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + if (JXL_ENC_SUCCESS != + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) { + fprintf(stderr, "JxlEncoderSetColorEncoding failed\n"); + return false; + } + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), nullptr); + + if (JXL_ENC_SUCCESS != + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + (void*)pixels.data(), + sizeof(float) * pixels.size())) { + fprintf(stderr, "JxlEncoderAddImageFrame failed\n"); + return false; + } + JxlEncoderCloseInput(enc.get()); + + compressed->resize(64); + uint8_t* next_out = compressed->data(); + size_t avail_out = compressed->size() - (next_out - compressed->data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed->data(); + compressed->resize(compressed->size() * 2); + next_out = compressed->data() + offset; + avail_out = compressed->size() - offset; + } + } + compressed->resize(next_out - compressed->data()); + if (JXL_ENC_SUCCESS != process_result) { + fprintf(stderr, "JxlEncoderProcessOutput failed\n"); + return false; + } + + return true; +} + +/** + * Writes bytes to file. + */ +bool WriteFile(const std::vector& bytes, const char* filename) { + FILE* file = fopen(filename, "wb"); + if (!file) { + fprintf(stderr, "Could not open %s for writing\n", filename); + return false; + } + if (fwrite(bytes.data(), sizeof(uint8_t), bytes.size(), file) != + bytes.size()) { + fprintf(stderr, "Could not write bytes to %s\n", filename); + fclose(file); + return false; + } + if (fclose(file) != 0) { + fprintf(stderr, "Could not close %s\n", filename); + return false; + } + return true; +} + +int main(int argc, char* argv[]) { + if (argc != 3) { + fprintf(stderr, + "Usage: %s \n" + "Where:\n" + " pfm = input Portable FloatMap image filename\n" + " jxl = output JPEG XL image filename\n" + "Output files will be overwritten.\n", + argv[0]); + return 1; + } + + const char* pfm_filename = argv[1]; + const char* jxl_filename = argv[2]; + + std::vector pixels; + uint32_t xsize; + uint32_t ysize; + if (!ReadPFM(pfm_filename, &pixels, &xsize, &ysize)) { + fprintf(stderr, "Couldn't load %s\n", pfm_filename); + return 2; + } + + std::vector compressed; + if (!EncodeJxlOneshot(pixels, xsize, ysize, &compressed)) { + fprintf(stderr, "Couldn't encode jxl\n"); + return 3; + } + + if (!WriteFile(compressed, jxl_filename)) { + fprintf(stderr, "Couldn't write jxl file\n"); + return 4; + } + + return 0; +} diff --git a/third-party/libjxl/libjxl/examples/examples.cmake b/third-party/libjxl/libjxl/examples/examples.cmake new file mode 100644 index 0000000000..fd159578bc --- /dev/null +++ b/third-party/libjxl/libjxl/examples/examples.cmake @@ -0,0 +1,11 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +add_executable(decode_oneshot ${CMAKE_CURRENT_LIST_DIR}/decode_oneshot.cc) +target_link_libraries(decode_oneshot jxl_dec jxl_threads) +add_executable(decode_progressive ${CMAKE_CURRENT_LIST_DIR}/decode_progressive.cc) +target_link_libraries(decode_progressive jxl_dec jxl_threads) +add_executable(encode_oneshot ${CMAKE_CURRENT_LIST_DIR}/encode_oneshot.cc) +target_link_libraries(encode_oneshot jxl jxl_threads) diff --git a/third-party/libjxl/libjxl/lib/BUILD b/third-party/libjxl/libjxl/lib/BUILD new file mode 100644 index 0000000000..8aa803dc7f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/BUILD @@ -0,0 +1,298 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Load sources/headers/tests lists. +load( + "jxl_lists.bzl", + "libjxl_base_sources", + "libjxl_codec_apng_sources", + "libjxl_codec_exr_sources", + "libjxl_codec_gif_sources", + "libjxl_codec_jpegli_sources", + "libjxl_codec_jpg_sources", + "libjxl_codec_jxl_sources", + "libjxl_codec_npy_sources", + "libjxl_codec_pgx_sources", + "libjxl_codec_pnm_sources", + "libjxl_dec_box_sources", + "libjxl_dec_jpeg_sources", + "libjxl_dec_sources", + "libjxl_enc_sources", + "libjxl_extras_for_tools_sources", + "libjxl_extras_sources", + #'libjxl_gbench_sources', + "libjxl_jpegli_lib_version", + "libjxl_jpegli_libjpeg_helper_files", + "libjxl_jpegli_sources", + "libjxl_jpegli_testlib_files", + "libjxl_jpegli_tests", + "libjxl_major_version", + "libjxl_minor_version", + "libjxl_patch_version", + "libjxl_public_headers", + "libjxl_testlib_files", + "libjxl_tests", + "libjxl_threads_public_headers", + "libjxl_threads_sources", +) +load( + "jxl_vars.bzl", + "libjxl_deps_brotli", + "libjxl_deps_exr", + "libjxl_deps_gif", + "libjxl_deps_gtest", + "libjxl_deps_hwy", + "libjxl_deps_hwy_nanobenchmark", + "libjxl_deps_hwy_test_util", + "libjxl_deps_jpeg", + "libjxl_deps_jxl_box", + "libjxl_deps_png", + "libjxl_deps_runfiles", + "libjxl_deps_skcms", + "libjxl_deps_testdata", + "libjxl_root_package", + "libjxl_test_shards", + "libjxl_test_timeouts", +) +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("@bazel_skylib//rules:copy_file.bzl", "copy_file") + +DEFAULT_VISIBILITY = ["//:__subpackages__"] + +DEFAULT_COMPATIBILITY = [] + +INCLUDES_DIR = "include" + +package( + default_visibility = ["//:__subpackages__"], +) + +licenses(["notice"]) + +exports_files(["LICENSE"]) + +EXPORT_TEMPLATE = """ +#ifndef @_EXPORT_H +#define @_EXPORT_H + +#define @_EXPORT +#define @_NO_EXPORT + +#ifndef @_DEPRECATED +# define @_DEPRECATED __attribute__ ((__deprecated__)) +#endif + +#endif +""" + +JXL_EXPORT_H = INCLUDES_DIR + "/jxl/jxl_export.h" + +genrule( + name = "create_jxl_export", + outs = [JXL_EXPORT_H], + cmd = "echo '" + EXPORT_TEMPLATE.replace("@", "JXL") + "' > $@", + compatible_with = DEFAULT_COMPATIBILITY, +) + +JXL_THREADS_EXPORT_H = INCLUDES_DIR + "/jxl/jxl_threads_export.h" + +genrule( + name = "create_jxl_threads_export", + outs = [JXL_THREADS_EXPORT_H], + cmd = "echo '" + EXPORT_TEMPLATE.replace("@", "JXL_THREADS") + "' > $@", + compatible_with = DEFAULT_COMPATIBILITY, +) + +JXL_VERSION_H = INCLUDES_DIR + "/jxl/version.h" + +expand_template( + name = "expand_jxl_version", + out = JXL_VERSION_H, + compatible_with = DEFAULT_COMPATIBILITY, + substitutions = { + "@JPEGXL_MAJOR_VERSION@": str(libjxl_major_version), + "@JPEGXL_MINOR_VERSION@": str(libjxl_minor_version), + "@JPEGXL_PATCH_VERSION@": str(libjxl_patch_version), + }, + template = "jxl/version.h.in", +) + +cc_library( + name = "jxl_version", + hdrs = [JXL_VERSION_H], + compatible_with = DEFAULT_COMPATIBILITY, + strip_include_prefix = INCLUDES_DIR, +) + +JPEGLI_JCONFIG_H = INCLUDES_DIR + "/jpegli/jconfig.h" + +JPEGLI_JMORECFG_H = INCLUDES_DIR + "/jpegli/jmorecfg.h" + +JPEGLI_JPEGLIB_H = INCLUDES_DIR + "/jpegli/jpeglib.h" + +copy_file( + name = "expand_jconfig", + src = "@libjpeg_turbo//:jconfig.h", + out = JPEGLI_JCONFIG_H, + compatible_with = DEFAULT_COMPATIBILITY, +) + +copy_file( + name = "copy_jmorecfg", + src = "@libjpeg_turbo//:jmorecfg.h", + out = JPEGLI_JMORECFG_H, + compatible_with = DEFAULT_COMPATIBILITY, +) + +copy_file( + name = "copy_jpeglib", + src = "@libjpeg_turbo//:jpeglib.h", + out = JPEGLI_JPEGLIB_H, + compatible_with = DEFAULT_COMPATIBILITY, +) + +cc_library( + name = "includes", + hdrs = libjxl_public_headers + [JXL_EXPORT_H], + compatible_with = DEFAULT_COMPATIBILITY, + strip_include_prefix = INCLUDES_DIR, + deps = [":jxl_version"], +) + +cc_library( + name = "libjpeg_includes", + hdrs = [ + JPEGLI_JCONFIG_H, + JPEGLI_JMORECFG_H, + JPEGLI_JPEGLIB_H, + ], + compatible_with = DEFAULT_COMPATIBILITY, + strip_include_prefix = INCLUDES_DIR + "/jpegli", +) + +cc_library( + name = "base", + srcs = [path for path in libjxl_base_sources if path.endswith(".cc")], + hdrs = [path for path in libjxl_base_sources if path.endswith(".h")], + compatible_with = DEFAULT_COMPATIBILITY, + deps = [ + ":includes", + ] + libjxl_deps_hwy, +) + +cc_library( + name = "jpegxl", + srcs = libjxl_dec_sources + libjxl_dec_box_sources + libjxl_dec_jpeg_sources + libjxl_enc_sources, + compatible_with = DEFAULT_COMPATIBILITY, + defines = ["JPEGXL_ENABLE_SKCMS=1"], + deps = [ + ":base", + ":includes", + ] + libjxl_deps_brotli + libjxl_deps_hwy + libjxl_deps_skcms, +) + +cc_library( + name = "jpegxl_private", + hdrs = [ + path + for path in libjxl_dec_sources + libjxl_dec_box_sources + libjxl_dec_jpeg_sources + libjxl_enc_sources + if path.endswith(".h") and not path.endswith("-inl.h") + ], + compatible_with = DEFAULT_COMPATIBILITY, + deps = [":jpegxl"], +) + +cc_library( + name = "jpegxl_threads", + srcs = libjxl_threads_sources, + hdrs = libjxl_threads_public_headers + [JXL_THREADS_EXPORT_H], + compatible_with = DEFAULT_COMPATIBILITY, + strip_include_prefix = INCLUDES_DIR, + deps = [ + ":base", + ":includes", + ], +) + +CODEC_FILES = libjxl_codec_apng_sources + libjxl_codec_exr_sources + libjxl_codec_gif_sources + libjxl_codec_jpegli_sources + libjxl_codec_jpg_sources + libjxl_codec_jxl_sources + libjxl_codec_npy_sources + libjxl_codec_pgx_sources + libjxl_codec_pnm_sources + +CODEC_SRCS = [path for path in CODEC_FILES if path.endswith(".cc")] + +CODEC_HDRS = [path for path in CODEC_FILES if path.endswith(".h")] + +cc_library( + name = "jpegli", + srcs = libjxl_jpegli_sources, + hdrs = [ + "jpegli/common_internal.h", # TODO(eustas): should not be here + ], + compatible_with = DEFAULT_COMPATIBILITY, + deps = [ + ":jpegxl_private", + ":libjpeg_includes", + ] + libjxl_deps_hwy, +) + +# TODO(eustas): build codecs separately? +cc_library( + name = "jpegxl_extras", + srcs = libjxl_extras_sources + libjxl_extras_for_tools_sources + CODEC_SRCS, + hdrs = CODEC_HDRS, + compatible_with = DEFAULT_COMPATIBILITY, + defines = [ + "JPEGXL_ENABLE_APNG=1", + "JPEGXL_ENABLE_EXR=1", + "JPEGXL_ENABLE_GIF=1", + "JPEGXL_ENABLE_JPEG=1", + "JPEGXL_ENABLE_JPEGLI=1", + ], + deps = [ + ":jpegli", + ":jpegxl_private", + ":jpegxl_threads", + ":jxl_version", + ] + libjxl_deps_exr + libjxl_deps_gif + libjxl_deps_jpeg + libjxl_deps_png, +) + +TESTLIB_FILES = libjxl_testlib_files + libjxl_jpegli_testlib_files + libjxl_jpegli_libjpeg_helper_files + +cc_library( + name = "test_utils", + testonly = 1, + srcs = [path for path in TESTLIB_FILES if not path.endswith(".h")], + hdrs = [path for path in TESTLIB_FILES if path.endswith(".h")], + compatible_with = DEFAULT_COMPATIBILITY, + defines = [ + 'JPEGXL_ROOT_PACKAGE=\'"' + libjxl_root_package + '"\'', + ], + deps = [ + ":jpegli", + ":jpegxl_extras", + ":jpegxl_private", + ] + libjxl_deps_runfiles, +) + +TESTS = [path.partition(".")[0] for path in libjxl_tests + libjxl_jpegli_tests] + +[ + cc_test( + name = test, + timeout = libjxl_test_timeouts.get(test, "moderate"), + srcs = [ + test + ".cc", + "jpegli/testing.h", + "jxl/testing.h", + ], + data = ["//:testdata"], + shard_count = libjxl_test_shards.get(test, 1), + deps = [ + ":jpegxl_extras", + ":jpegxl_private", + ":jpegxl_threads", + ":test_utils", + ] + libjxl_deps_gtest + libjxl_deps_hwy_test_util + libjxl_deps_hwy_nanobenchmark + libjxl_deps_jxl_box, + ) + for test in TESTS +] diff --git a/third-party/libjxl/libjxl/lib/CMakeLists.txt b/third-party/libjxl/libjxl/lib/CMakeLists.txt new file mode 100644 index 0000000000..24961db5e5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/CMakeLists.txt @@ -0,0 +1,167 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +set(JPEGXL_MAJOR_VERSION 0) +set(JPEGXL_MINOR_VERSION 9) +set(JPEGXL_PATCH_VERSION 0) +set(JPEGXL_LIBRARY_VERSION + "${JPEGXL_MAJOR_VERSION}.${JPEGXL_MINOR_VERSION}.${JPEGXL_PATCH_VERSION}") + +# This is the library API/ABI compatibility version. Changing this value makes +# the shared library incompatible with previous version. A program linked +# against this shared library SOVERSION will not run with an older SOVERSION. +# It is important to update this value when making incompatible API/ABI changes +# so that programs that depend on libjxl can update their dependencies. Semantic +# versioning allows 0.y.z to have incompatible changes in minor versions. +set(JPEGXL_SO_MINOR_VERSION 9) +if (JPEGXL_MAJOR_VERSION EQUAL 0) + set(JPEGXL_LIBRARY_SOVERSION + "${JPEGXL_MAJOR_VERSION}.${JPEGXL_SO_MINOR_VERSION}") +else() + set(JPEGXL_LIBRARY_SOVERSION "${JPEGXL_MAJOR_VERSION}") +endif() + + +# List of warning and feature flags for our library and tests. +if (MSVC) + set(JPEGXL_INTERNAL_FLAGS + # TODO(janwas): add flags + ) +else () + set(JPEGXL_INTERNAL_FLAGS + # F_FLAGS + -fmerge-all-constants + -fno-builtin-fwrite + -fno-builtin-fread + + # WARN_FLAGS + -Wall + -Wextra + -Wc++11-compat + -Warray-bounds + -Wformat-security + -Wimplicit-fallthrough + -Wno-register # Needed by public headers in lcms + -Wno-unused-function + -Wno-unused-parameter + -Wnon-virtual-dtor + -Woverloaded-virtual + -Wvla + ) + + # Warning flags supported by clang. + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND JPEGXL_INTERNAL_FLAGS + -Wdeprecated-increment-bool + # TODO(deymo): Add -Wextra-semi once we update third_party/highway. + # -Wextra-semi + -Wfloat-overflow-conversion + -Wfloat-zero-conversion + -Wfor-loop-analysis + -Wgnu-redeclared-enum + -Winfinite-recursion + -Wliteral-conversion + -Wno-c++98-compat + -Wno-unused-command-line-argument + -Wprivate-header + -Wself-assign + -Wstring-conversion + -Wtautological-overlap-compare + -Wthread-safety-analysis + -Wundefined-func-template + -Wunreachable-code + -Wunused-comparison + ) + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.0) + list(APPEND HWY_FLAGS -Wc++2a-extensions) + endif() + endif() # Clang + + if (WIN32) + list(APPEND JPEGXL_INTERNAL_FLAGS + -Wno-cast-align + -Wno-double-promotion + -Wno-float-equal + -Wno-format-nonliteral + -Wno-shadow + -Wno-sign-conversion + -Wno-zero-as-null-pointer-constant + ) + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND JPEGXL_INTERNAL_FLAGS + -Wno-used-but-marked-unused + -Wno-unused-template + -Wno-unused-member-function + -Wno-shadow-field-in-constructor + -Wno-language-extension-token + -Wno-global-constructors + -Wno-c++98-compat-pedantic + ) + endif() # Clang + else() # WIN32 + list(APPEND JPEGXL_INTERNAL_FLAGS + -fsized-deallocation + -fno-exceptions + + # Language flags + -fmath-errno + ) + + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + list(APPEND JPEGXL_INTERNAL_FLAGS + -fnew-alignment=8 + -fno-cxx-exceptions + -fno-slp-vectorize + -fno-vectorize + + -disable-free + -disable-llvm-verifier + ) + endif() # Clang + endif() # WIN32 +endif() #!MSVC + +# strips the -static suffix from all the elements in LIST +function(strip_static OUTPUT_VAR LIB_LIST) + foreach(lib IN LISTS ${LIB_LIST}) + string(REGEX REPLACE "-static$" "" lib "${lib}") + list(APPEND out_list "${lib}") + endforeach() + set(${OUTPUT_VAR} ${out_list} PARENT_SCOPE) +endfunction() + +# The jxl library definition. +include(jxl.cmake) + +# Other libraries outside the core jxl library. +if(JPEGXL_ENABLE_TOOLS) + include(jxl_extras.cmake) +endif() +include(jxl_threads.cmake) +if (JPEGXL_ENABLE_JPEGLI) + include(jpegli.cmake) +endif() + +# Install all the library headers from the source and the generated ones. There +# is no distinction on which libraries use which header since it is expected +# that all developer libraries are available together at build time. +install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/jxl + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/jxl + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + +if(BUILD_TESTING) + cmake_policy(SET CMP0057 NEW) # https://gitlab.kitware.com/cmake/cmake/issues/18198 + include(GoogleTest) +endif() + +# Tests for the jxl library. +include(jxl_tests.cmake) + +if(BUILD_TESTING) + # Google benchmark for the jxl library + include(jxl_benchmark.cmake) +endif() diff --git a/third-party/libjxl/libjxl/lib/compatibility.cmake b/third-party/libjxl/libjxl/lib/compatibility.cmake new file mode 100644 index 0000000000..9d99d29482 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/compatibility.cmake @@ -0,0 +1,30 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +function(jxl_discover_tests TESTNAME) + if (CMAKE_VERSION VERSION_LESS "3.10.3") + gtest_discover_tests(${TESTNAME} TIMEOUT 240) + else () + gtest_discover_tests(${TESTNAME} DISCOVERY_TIMEOUT 240) + endif () +endfunction() + +function(jxl_link_libraries DST SRC) + if (CMAKE_VERSION VERSION_LESS "3.13.5") + target_include_directories(${DST} SYSTEM PUBLIC + $> + ) + add_dependencies(${DST} ${SRC}) + else() + target_link_libraries(${DST} PUBLIC ${SRC}) + endif() +endfunction() + + +if (CMAKE_VERSION VERSION_LESS "3.12.4") + set(JXL_HWY_INCLUDE_DIRS "$>") +else() + set(JXL_HWY_INCLUDE_DIRS "$,hwy::hwy,hwy>,INTERFACE_INCLUDE_DIRECTORIES>>") +endif() diff --git a/third-party/libjxl/libjxl/lib/extras/LICENSE.apngdis b/third-party/libjxl/libjxl/lib/extras/LICENSE.apngdis new file mode 100644 index 0000000000..eb0ba7c07b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/LICENSE.apngdis @@ -0,0 +1,27 @@ +APNG Disassembler 2.8 + +Deconstructs APNG files into individual frames. + +http://apngdis.sourceforge.net + +Copyright (c) 2010-2015 Max Stepin +maxst at users.sourceforge.net + +zlib license +------------ + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. diff --git a/third-party/libjxl/libjxl/lib/extras/README.md b/third-party/libjxl/libjxl/lib/extras/README.md new file mode 100644 index 0000000000..06a9b5ea07 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/README.md @@ -0,0 +1,5 @@ +## JPEG XL "extras" + +The files in this directory do not form part of the library or codec and are +only used by tests or specific internal tools that have access to the internals +of the library. diff --git a/third-party/libjxl/libjxl/lib/extras/codec.cc b/third-party/libjxl/libjxl/lib/extras/codec.cc new file mode 100644 index 0000000000..fb590a8a94 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/codec.cc @@ -0,0 +1,173 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/codec.h" + +#include +#include + +#include "lib/extras/dec/decode.h" +#include "lib/extras/enc/apng.h" +#include "lib/extras/enc/exr.h" +#include "lib/extras/enc/jpg.h" +#include "lib/extras/enc/pgx.h" +#include "lib/extras/enc/pnm.h" +#include "lib/extras/packed_image.h" +#include "lib/extras/packed_image_convert.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { +namespace { + +// Any valid encoding is larger (ensures codecs can read the first few bytes) +constexpr size_t kMinBytes = 9; + +} // namespace + +Status SetFromBytes(const Span bytes, + const extras::ColorHints& color_hints, CodecInOut* io, + ThreadPool* pool, const SizeConstraints* constraints, + extras::Codec* orig_codec) { + if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes"); + + extras::PackedPixelFile ppf; + if (extras::DecodeBytes(bytes, color_hints, &ppf, constraints, orig_codec)) { + return ConvertPackedPixelFileToCodecInOut(ppf, pool, io); + } + return JXL_FAILURE("Codecs failed to decode"); +} + +Status Encode(const CodecInOut& io, const extras::Codec codec, + const ColorEncoding& c_desired, size_t bits_per_sample, + std::vector* bytes, ThreadPool* pool) { + bytes->clear(); + JXL_CHECK(!io.Main().c_current().ICC().empty()); + JXL_CHECK(!c_desired.ICC().empty()); + io.CheckMetadata(); + if (io.Main().IsJPEG()) { + JXL_WARNING("Writing JPEG data as pixels"); + } + JxlPixelFormat format = { + 0, // num_channels is ignored by the converter + bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16, JXL_BIG_ENDIAN, + 0}; + const bool floating_point = bits_per_sample > 16; + std::unique_ptr encoder; + std::ostringstream os; + switch (codec) { + case extras::Codec::kPNG: + encoder = extras::GetAPNGEncoder(); + if (encoder) { + break; + } else { + return JXL_FAILURE("JPEG XL was built without (A)PNG support"); + } + case extras::Codec::kJPG: + format.data_type = JXL_TYPE_UINT8; + encoder = extras::GetJPEGEncoder(); + if (encoder) { + os << io.jpeg_quality; + encoder->SetOption("q", os.str()); + break; + } else { + return JXL_FAILURE("JPEG XL was built without JPEG support"); + } + case extras::Codec::kPNM: + if (io.Main().HasAlpha()) { + encoder = extras::GetPAMEncoder(); + } else if (io.Main().IsGray()) { + encoder = extras::GetPGMEncoder(); + } else if (!floating_point) { + encoder = extras::GetPPMEncoder(); + } else { + format.data_type = JXL_TYPE_FLOAT; + format.endianness = JXL_LITTLE_ENDIAN; + encoder = extras::GetPFMEncoder(); + } + break; + case extras::Codec::kPGX: + encoder = extras::GetPGXEncoder(); + break; + case extras::Codec::kGIF: + return JXL_FAILURE("Encoding to GIF is not implemented"); + case extras::Codec::kEXR: + format.data_type = JXL_TYPE_FLOAT; + encoder = extras::GetEXREncoder(); + if (encoder) { + break; + } else { + return JXL_FAILURE("JPEG XL was built without OpenEXR support"); + } + case extras::Codec::kJXL: + return JXL_FAILURE("TODO: encode using Codec::kJXL"); + + case extras::Codec::kUnknown: + return JXL_FAILURE("Cannot encode using Codec::kUnknown"); + } + + if (!encoder) { + return JXL_FAILURE("Invalid codec."); + } + + extras::PackedPixelFile ppf; + JXL_RETURN_IF_ERROR( + ConvertCodecInOutToPackedPixelFile(io, format, c_desired, pool, &ppf)); + ppf.info.bits_per_sample = bits_per_sample; + if (format.data_type == JXL_TYPE_FLOAT) { + ppf.info.bits_per_sample = 32; + ppf.info.exponent_bits_per_sample = 8; + } + extras::EncodedImage encoded_image; + JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded_image, pool)); + JXL_ASSERT(encoded_image.bitstreams.size() == 1); + *bytes = encoded_image.bitstreams[0]; + + return true; +} + +Status Encode(const CodecInOut& io, const ColorEncoding& c_desired, + size_t bits_per_sample, const std::string& pathname, + std::vector* bytes, ThreadPool* pool) { + std::string extension; + const extras::Codec codec = extras::CodecFromPath( + pathname, &bits_per_sample, /* basename */ nullptr, &extension); + + // Warn about incorrect usage of PGM/PGX/PPM - only the latter supports + // color, but CodecFromPath lumps them all together. + if (codec == extras::Codec::kPNM && extension != ".pfm") { + if (io.Main().HasAlpha() && extension != ".pam") { + JXL_WARNING( + "For images with alpha, the filename should end with .pam.\n"); + } else if (!io.Main().IsGray() && extension == ".pgm") { + JXL_WARNING("For color images, the filename should end with .ppm.\n"); + } else if (io.Main().IsGray() && extension == ".ppm") { + JXL_WARNING( + "For grayscale images, the filename should not end with .ppm.\n"); + } + if (bits_per_sample > 16) { + JXL_WARNING("PPM only supports up to 16 bits per sample"); + bits_per_sample = 16; + } + } else if (codec == extras::Codec::kPGX && !io.Main().IsGray()) { + JXL_WARNING("Storing color image to PGX - use .ppm extension instead.\n"); + } + if (bits_per_sample > 16 && codec == extras::Codec::kPNG) { + JXL_WARNING("PNG only supports up to 16 bits per sample"); + bits_per_sample = 16; + } + + return Encode(io, codec, c_desired, bits_per_sample, bytes, pool); +} + +Status Encode(const CodecInOut& io, const std::string& pathname, + std::vector* bytes, ThreadPool* pool) { + // TODO(lode): need to take the floating_point_sample field into account + return Encode(io, io.metadata.m.color_encoding, + io.metadata.m.bit_depth.bits_per_sample, pathname, bytes, pool); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/codec.h b/third-party/libjxl/libjxl/lib/extras/codec.h new file mode 100644 index 0000000000..4ad75fd97d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/codec.h @@ -0,0 +1,63 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_CODEC_H_ +#define LIB_EXTRAS_CODEC_H_ + +// Facade for image encoders/decoders (PNG, PNM, ...). + +#include +#include + +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/dec/decode.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/field_encodings.h" // MakeBit + +namespace jxl { + +struct SizeConstraints; + +// Decodes "bytes" and sets io->metadata.m. +// color_space_hint may specify the color space, otherwise, defaults to sRGB. +Status SetFromBytes(Span bytes, + const extras::ColorHints& color_hints, CodecInOut* io, + ThreadPool* pool = nullptr, + const SizeConstraints* constraints = nullptr, + extras::Codec* orig_codec = nullptr); +// Helper function to use no color_space_hint. +JXL_INLINE Status SetFromBytes(const Span bytes, CodecInOut* io, + ThreadPool* pool = nullptr, + const SizeConstraints* constraints = nullptr, + extras::Codec* orig_codec = nullptr) { + return SetFromBytes(bytes, extras::ColorHints(), io, pool, constraints, + orig_codec); +} + +// Replaces "bytes" with an encoding of pixels transformed from c_current +// color space to c_desired. +Status Encode(const CodecInOut& io, extras::Codec codec, + const ColorEncoding& c_desired, size_t bits_per_sample, + std::vector* bytes, ThreadPool* pool = nullptr); + +// Deduces codec, calls Encode and writes to file. +Status Encode(const CodecInOut& io, const ColorEncoding& c_desired, + size_t bits_per_sample, const std::string& pathname, + std::vector* bytes, ThreadPool* pool = nullptr); +// Same, but defaults to metadata.original color_encoding and bits_per_sample. +Status Encode(const CodecInOut& io, const std::string& pathname, + std::vector* bytes, ThreadPool* pool = nullptr); + +} // namespace jxl + +#endif // LIB_EXTRAS_CODEC_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/codec_test.cc b/third-party/libjxl/libjxl/lib/extras/codec_test.cc new file mode 100644 index 0000000000..0ad540533b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/codec_test.cc @@ -0,0 +1,450 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/codec.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "lib/extras/dec/decode.h" +#include "lib/extras/dec/pnm.h" +#include "lib/extras/enc/encode.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +using test::ThreadPoolForTests; + +namespace extras { +namespace { + +using ::testing::AllOf; +using ::testing::Contains; +using ::testing::Field; +using ::testing::IsEmpty; +using ::testing::SizeIs; + +std::string ExtensionFromCodec(Codec codec, const bool is_gray, + const bool has_alpha, + const size_t bits_per_sample) { + switch (codec) { + case Codec::kJPG: + return ".jpg"; + case Codec::kPGX: + return ".pgx"; + case Codec::kPNG: + return ".png"; + case Codec::kPNM: + if (bits_per_sample == 32) return ".pfm"; + if (has_alpha) return ".pam"; + return is_gray ? ".pgm" : ".ppm"; + case Codec::kEXR: + return ".exr"; + default: + return std::string(); + } +} + +void VerifySameImage(const PackedImage& im0, size_t bits_per_sample0, + const PackedImage& im1, size_t bits_per_sample1, + bool lossless = true) { + ASSERT_EQ(im0.xsize, im1.xsize); + ASSERT_EQ(im0.ysize, im1.ysize); + ASSERT_EQ(im0.format.num_channels, im1.format.num_channels); + auto get_factor = [](JxlPixelFormat f, size_t bits) -> double { + return 1.0 / ((1u << std::min(test::GetPrecision(f.data_type), bits)) - 1); + }; + double factor0 = get_factor(im0.format, bits_per_sample0); + double factor1 = get_factor(im1.format, bits_per_sample1); + auto pixels0 = static_cast(im0.pixels()); + auto pixels1 = static_cast(im1.pixels()); + auto rgba0 = + test::ConvertToRGBA32(pixels0, im0.xsize, im0.ysize, im0.format, factor0); + auto rgba1 = + test::ConvertToRGBA32(pixels1, im1.xsize, im1.ysize, im1.format, factor1); + double tolerance = + lossless ? 0.5 * std::min(factor0, factor1) : 3.0f / 255.0f; + if (bits_per_sample0 == 32 || bits_per_sample1 == 32) { + tolerance = 0.5 * std::max(factor0, factor1); + } + for (size_t y = 0; y < im0.ysize; ++y) { + for (size_t x = 0; x < im0.xsize; ++x) { + for (size_t c = 0; c < im0.format.num_channels; ++c) { + size_t ix = (y * im0.xsize + x) * 4 + c; + double val0 = rgba0[ix]; + double val1 = rgba1[ix]; + ASSERT_NEAR(val1, val0, tolerance) + << "y = " << y << " x = " << x << " c = " << c; + } + } + } +} + +JxlColorEncoding CreateTestColorEncoding(bool is_gray) { + JxlColorEncoding c; + c.color_space = is_gray ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB; + c.white_point = JXL_WHITE_POINT_D65; + c.primaries = JXL_PRIMARIES_P3; + c.rendering_intent = JXL_RENDERING_INTENT_RELATIVE; + c.transfer_function = JXL_TRANSFER_FUNCTION_LINEAR; + // Roundtrip through internal color encoding to fill in primaries and white + // point CIE xy coordinates. + ColorEncoding c_internal; + JXL_CHECK(ConvertExternalToInternalColorEncoding(c, &c_internal)); + ConvertInternalToExternalColorEncoding(c_internal, &c); + return c; +} + +std::vector GenerateICC(JxlColorEncoding color_encoding) { + ColorEncoding c; + JXL_CHECK(ConvertExternalToInternalColorEncoding(color_encoding, &c)); + JXL_CHECK(c.CreateICC()); + PaddedBytes icc = c.ICC(); + return std::vector(icc.begin(), icc.end()); +} + +void StoreRandomValue(uint8_t* out, Rng* rng, JxlPixelFormat format, + size_t bits_per_sample) { + uint64_t max_val = (1ull << bits_per_sample) - 1; + if (format.data_type == JXL_TYPE_UINT8) { + *out = rng->UniformU(0, max_val); + } else if (format.data_type == JXL_TYPE_UINT16) { + uint32_t val = rng->UniformU(0, max_val); + if (format.endianness == JXL_BIG_ENDIAN) { + StoreBE16(val, out); + } else { + StoreLE16(val, out); + } + } else { + ASSERT_EQ(format.data_type, JXL_TYPE_FLOAT); + float val = rng->UniformF(0.0, 1.0); + uint32_t uval; + memcpy(&uval, &val, 4); + if (format.endianness == JXL_BIG_ENDIAN) { + StoreBE32(uval, out); + } else { + StoreLE32(uval, out); + } + } +} + +void FillPackedImage(size_t bits_per_sample, PackedImage* image) { + JxlPixelFormat format = image->format; + size_t bytes_per_channel = PackedImage::BitsPerChannel(format.data_type) / 8; + uint8_t* out = static_cast(image->pixels()); + size_t stride = image->xsize * format.num_channels * bytes_per_channel; + ASSERT_EQ(image->pixels_size, image->ysize * stride); + Rng rng(129); + for (size_t y = 0; y < image->ysize; ++y) { + for (size_t x = 0; x < image->xsize; ++x) { + for (size_t c = 0; c < format.num_channels; ++c) { + StoreRandomValue(out, &rng, format, bits_per_sample); + out += bytes_per_channel; + } + } + } +} + +struct TestImageParams { + Codec codec; + size_t xsize; + size_t ysize; + size_t bits_per_sample; + bool is_gray; + bool add_alpha; + bool big_endian; + bool add_extra_channels; + + bool ShouldTestRoundtrip() const { + if (codec == Codec::kPNG) { + return bits_per_sample <= 16; + } else if (codec == Codec::kPNM) { + // TODO(szabadka) Make PNM encoder endianness-aware. + return ((bits_per_sample <= 16 && big_endian) || + (bits_per_sample == 32 && !add_alpha && !big_endian)); + } else if (codec == Codec::kPGX) { + return ((bits_per_sample == 8 || bits_per_sample == 16) && is_gray && + !add_alpha); + } else if (codec == Codec::kEXR) { +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) + // OpenEXR 2.3 has a memory leak in IlmThread_2_3::ThreadPool + return false; +#else + return bits_per_sample == 32 && !is_gray; +#endif + } else if (codec == Codec::kJPG) { + return bits_per_sample == 8 && !add_alpha; + } else { + return false; + } + } + + JxlPixelFormat PixelFormat() const { + JxlPixelFormat format; + format.num_channels = (is_gray ? 1 : 3) + (add_alpha ? 1 : 0); + format.data_type = (bits_per_sample == 32 ? JXL_TYPE_FLOAT + : bits_per_sample > 8 ? JXL_TYPE_UINT16 + : JXL_TYPE_UINT8); + format.endianness = big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN; + format.align = 0; + return format; + } + + std::string DebugString() const { + std::ostringstream os; + os << "bps:" << bits_per_sample << " gr:" << is_gray << " al:" << add_alpha + << " be: " << big_endian << " ec: " << add_extra_channels; + return os.str(); + } +}; + +void CreateTestImage(const TestImageParams& params, PackedPixelFile* ppf) { + ppf->info.xsize = params.xsize; + ppf->info.ysize = params.ysize; + ppf->info.bits_per_sample = params.bits_per_sample; + ppf->info.exponent_bits_per_sample = params.bits_per_sample == 32 ? 8 : 0; + ppf->info.num_color_channels = params.is_gray ? 1 : 3; + ppf->info.alpha_bits = params.add_alpha ? params.bits_per_sample : 0; + ppf->info.alpha_premultiplied = (params.codec == Codec::kEXR); + + JxlColorEncoding color_encoding = CreateTestColorEncoding(params.is_gray); + ppf->icc = GenerateICC(color_encoding); + ppf->color_encoding = color_encoding; + + PackedFrame frame(params.xsize, params.ysize, params.PixelFormat()); + FillPackedImage(params.bits_per_sample, &frame.color); + if (params.add_extra_channels) { + for (size_t i = 0; i < 7; ++i) { + JxlPixelFormat ec_format = params.PixelFormat(); + ec_format.num_channels = 1; + PackedImage ec(params.xsize, params.ysize, ec_format); + FillPackedImage(params.bits_per_sample, &ec); + frame.extra_channels.emplace_back(std::move(ec)); + PackedExtraChannel pec; + pec.ec_info.bits_per_sample = params.bits_per_sample; + pec.ec_info.type = static_cast(i); + ppf->extra_channels_info.emplace_back(std::move(pec)); + } + } + ppf->frames.emplace_back(std::move(frame)); +} + +// Ensures reading a newly written file leads to the same image pixels. +void TestRoundTrip(const TestImageParams& params, ThreadPool* pool) { + if (!params.ShouldTestRoundtrip()) return; + + std::string extension = ExtensionFromCodec( + params.codec, params.is_gray, params.add_alpha, params.bits_per_sample); + printf("Codec %s %s\n", extension.c_str(), params.DebugString().c_str()); + + PackedPixelFile ppf_in; + CreateTestImage(params, &ppf_in); + + EncodedImage encoded; + auto encoder = Encoder::FromExtension(extension); + if (!encoder) { + fprintf(stderr, "Skipping test because of missing codec support.\n"); + return; + } + ASSERT_TRUE(encoder->Encode(ppf_in, &encoded, pool)); + ASSERT_EQ(encoded.bitstreams.size(), 1); + + PackedPixelFile ppf_out; + ColorHints color_hints; + if (params.codec == Codec::kPNM || params.codec == Codec::kPGX) { + color_hints.Add("color_space", + params.is_gray ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG"); + } + ASSERT_TRUE(DecodeBytes(Span(encoded.bitstreams[0]), + color_hints, &ppf_out)); + if (params.codec == Codec::kPNG && ppf_out.icc.empty()) { + // Decoding a PNG may drop the ICC profile if there's a valid cICP chunk. + // Rendering intent is not preserved in this case. + EXPECT_EQ(ppf_in.color_encoding.color_space, + ppf_out.color_encoding.color_space); + EXPECT_EQ(ppf_in.color_encoding.white_point, + ppf_out.color_encoding.white_point); + if (ppf_in.color_encoding.color_space != JXL_COLOR_SPACE_GRAY) { + EXPECT_EQ(ppf_in.color_encoding.primaries, + ppf_out.color_encoding.primaries); + } + EXPECT_EQ(ppf_in.color_encoding.transfer_function, + ppf_out.color_encoding.transfer_function); + EXPECT_EQ(ppf_out.color_encoding.rendering_intent, + JXL_RENDERING_INTENT_RELATIVE); + } else if (params.codec != Codec::kPNM && params.codec != Codec::kPGX && + params.codec != Codec::kEXR) { + EXPECT_EQ(ppf_in.icc, ppf_out.icc); + } + + ASSERT_EQ(ppf_out.frames.size(), 1); + const auto& frame_in = ppf_in.frames[0]; + const auto& frame_out = ppf_out.frames[0]; + VerifySameImage(frame_in.color, ppf_in.info.bits_per_sample, frame_out.color, + ppf_out.info.bits_per_sample, + /*lossless=*/params.codec != Codec::kJPG); + ASSERT_EQ(frame_in.extra_channels.size(), frame_out.extra_channels.size()); + ASSERT_EQ(ppf_out.extra_channels_info.size(), + frame_out.extra_channels.size()); + for (size_t i = 0; i < frame_in.extra_channels.size(); ++i) { + VerifySameImage(frame_in.extra_channels[i], ppf_in.info.bits_per_sample, + frame_out.extra_channels[i], ppf_out.info.bits_per_sample, + /*lossless=*/true); + EXPECT_EQ(ppf_out.extra_channels_info[i].ec_info.type, + ppf_in.extra_channels_info[i].ec_info.type); + } +} + +TEST(CodecTest, TestRoundTrip) { + ThreadPoolForTests pool(12); + + TestImageParams params; + params.xsize = 7; + params.ysize = 4; + + for (Codec codec : + {Codec::kPNG, Codec::kPNM, Codec::kPGX, Codec::kEXR, Codec::kJPG}) { + for (int bits_per_sample : {4, 8, 10, 12, 16, 32}) { + for (bool is_gray : {false, true}) { + for (bool add_alpha : {false, true}) { + for (bool big_endian : {false, true}) { + params.codec = codec; + params.bits_per_sample = static_cast(bits_per_sample); + params.is_gray = is_gray; + params.add_alpha = add_alpha; + params.big_endian = big_endian; + params.add_extra_channels = false; + TestRoundTrip(params, &pool); + if (codec == Codec::kPNM && add_alpha) { + params.add_extra_channels = true; + TestRoundTrip(params, &pool); + } + } + } + } + } + } +} + +TEST(CodecTest, LosslessPNMRoundtrip) { + ThreadPoolForTests pool(12); + + static const char* kChannels[] = {"", "g", "ga", "rgb", "rgba"}; + static const char* kExtension[] = {"", ".pgm", ".pam", ".ppm", ".pam"}; + for (size_t bit_depth = 1; bit_depth <= 16; ++bit_depth) { + for (size_t channels = 1; channels <= 4; ++channels) { + if (bit_depth == 1 && (channels == 2 || channels == 4)) continue; + std::string extension(kExtension[channels]); + std::string filename = "jxl/flower/flower_small." + + std::string(kChannels[channels]) + ".depth" + + std::to_string(bit_depth) + extension; + const PaddedBytes orig = jxl::test::ReadTestData(filename); + + PackedPixelFile ppf; + ColorHints color_hints; + color_hints.Add("color_space", + channels < 3 ? "Gra_D65_Rel_SRG" : "RGB_D65_SRG_Rel_SRG"); + ASSERT_TRUE(DecodeBytes(Span(orig.data(), orig.size()), + color_hints, &ppf)); + + EncodedImage encoded; + auto encoder = Encoder::FromExtension(extension); + ASSERT_TRUE(encoder.get()); + ASSERT_TRUE(encoder->Encode(ppf, &encoded, &pool)); + ASSERT_EQ(encoded.bitstreams.size(), 1); + ASSERT_EQ(orig.size(), encoded.bitstreams[0].size()); + EXPECT_EQ(0, + memcmp(orig.data(), encoded.bitstreams[0].data(), orig.size())); + } + } +} + +TEST(CodecTest, TestPNM) { TestCodecPNM(); } + +TEST(CodecTest, FormatNegotiation) { + const std::vector accepted_formats = { + {/*num_channels=*/4, + /*data_type=*/JXL_TYPE_UINT16, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0}, + {/*num_channels=*/3, + /*data_type=*/JXL_TYPE_UINT8, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0}, + {/*num_channels=*/3, + /*data_type=*/JXL_TYPE_UINT16, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0}, + {/*num_channels=*/1, + /*data_type=*/JXL_TYPE_UINT8, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0}, + }; + + JxlBasicInfo info; + JxlEncoderInitBasicInfo(&info); + info.bits_per_sample = 12; + info.num_color_channels = 2; + + JxlPixelFormat format; + EXPECT_FALSE(SelectFormat(accepted_formats, info, &format)); + + info.num_color_channels = 3; + ASSERT_TRUE(SelectFormat(accepted_formats, info, &format)); + EXPECT_EQ(format.num_channels, info.num_color_channels); + // 16 is the smallest accepted format that can accommodate the 12-bit data. + EXPECT_EQ(format.data_type, JXL_TYPE_UINT16); +} + +TEST(CodecTest, EncodeToPNG) { + ThreadPool* const pool = nullptr; + + std::unique_ptr png_encoder = Encoder::FromExtension(".png"); + if (!png_encoder) { + fprintf(stderr, "Skipping test because of missing codec support.\n"); + return; + } + + const PaddedBytes original_png = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png"); + PackedPixelFile ppf; + ASSERT_TRUE(extras::DecodeBytes(Span(original_png), + ColorHints(), &ppf)); + + const JxlPixelFormat& format = ppf.frames.front().color.format; + ASSERT_THAT( + png_encoder->AcceptedFormats(), + Contains(AllOf(Field(&JxlPixelFormat::num_channels, format.num_channels), + Field(&JxlPixelFormat::data_type, format.data_type), + Field(&JxlPixelFormat::endianness, format.endianness)))); + EncodedImage encoded_png; + ASSERT_TRUE(png_encoder->Encode(ppf, &encoded_png, pool)); + EXPECT_THAT(encoded_png.icc, IsEmpty()); + ASSERT_THAT(encoded_png.bitstreams, SizeIs(1)); + + PackedPixelFile decoded_ppf; + ASSERT_TRUE( + extras::DecodeBytes(Span(encoded_png.bitstreams.front()), + ColorHints(), &decoded_ppf)); + + ASSERT_EQ(decoded_ppf.info.bits_per_sample, ppf.info.bits_per_sample); + ASSERT_EQ(decoded_ppf.frames.size(), 1); + VerifySameImage(ppf.frames[0].color, ppf.info.bits_per_sample, + decoded_ppf.frames[0].color, + decoded_ppf.info.bits_per_sample); +} + +} // namespace +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/apng.cc b/third-party/libjxl/libjxl/lib/extras/dec/apng.cc new file mode 100644 index 0000000000..b0a19ea721 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/apng.cc @@ -0,0 +1,987 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/apng.h" + +// Parts of this code are taken from apngdis, which has the following license: +/* APNG Disassembler 2.8 + * + * Deconstructs APNG files into individual frames. + * + * http://apngdis.sourceforge.net + * + * Copyright (c) 2010-2015 Max Stepin + * maxst at users.sourceforge.net + * + * zlib license + * ------------ + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "lib/extras/size_constraints.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/scope_guard.h" +#include "lib/jxl/common.h" +#include "lib/jxl/sanitizers.h" +#if JPEGXL_ENABLE_APNG +#include "png.h" /* original (unpatched) libpng is ok */ +#endif + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_APNG +namespace { + +constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69, + 0x66, 0x00, 0x00}; + +/* hIST chunk tail is not proccesed properly; skip this chunk completely; + see https://github.com/glennrp/libpng/pull/413 */ +const png_byte kIgnoredPngChunks[] = { + 104, 73, 83, 84, '\0' /* hIST */ +}; + +// Returns floating-point value from the PNG encoding (times 10^5). +static double F64FromU32(const uint32_t x) { + return static_cast(x) * 1E-5; +} + +Status DecodeSRGB(const unsigned char* payload, const size_t payload_size, + JxlColorEncoding* color_encoding) { + if (payload_size != 1) return JXL_FAILURE("Wrong sRGB size"); + // (PNG uses the same values as ICC.) + if (payload[0] >= 4) return JXL_FAILURE("Invalid Rendering Intent"); + color_encoding->white_point = JXL_WHITE_POINT_D65; + color_encoding->primaries = JXL_PRIMARIES_SRGB; + color_encoding->transfer_function = JXL_TRANSFER_FUNCTION_SRGB; + color_encoding->rendering_intent = + static_cast(payload[0]); + return true; +} + +// If the cICP profile is not fully supported, return false and leave +// color_encoding unmodified. +Status DecodeCICP(const unsigned char* payload, const size_t payload_size, + JxlColorEncoding* color_encoding) { + if (payload_size != 4) return JXL_FAILURE("Wrong cICP size"); + JxlColorEncoding color_enc = *color_encoding; + + // From https://www.itu.int/rec/T-REC-H.273-202107-I/en + if (payload[0] == 1) { + // IEC 61966-2-1 sRGB + color_enc.primaries = JXL_PRIMARIES_SRGB; + color_enc.white_point = JXL_WHITE_POINT_D65; + } else if (payload[0] == 4) { + // Rec. ITU-R BT.470-6 System M + color_enc.primaries = JXL_PRIMARIES_CUSTOM; + color_enc.primaries_red_xy[0] = 0.67; + color_enc.primaries_red_xy[1] = 0.33; + color_enc.primaries_green_xy[0] = 0.21; + color_enc.primaries_green_xy[1] = 0.71; + color_enc.primaries_blue_xy[0] = 0.14; + color_enc.primaries_blue_xy[1] = 0.08; + color_enc.white_point = JXL_WHITE_POINT_CUSTOM; + color_enc.white_point_xy[0] = 0.310; + color_enc.white_point_xy[1] = 0.316; + } else if (payload[0] == 5) { + // Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM + color_enc.primaries = JXL_PRIMARIES_CUSTOM; + color_enc.primaries_red_xy[0] = 0.64; + color_enc.primaries_red_xy[1] = 0.33; + color_enc.primaries_green_xy[0] = 0.29; + color_enc.primaries_green_xy[1] = 0.60; + color_enc.primaries_blue_xy[0] = 0.15; + color_enc.primaries_blue_xy[1] = 0.06; + color_enc.white_point = JXL_WHITE_POINT_D65; + } else if (payload[0] == 6 || payload[0] == 7) { + // SMPTE ST 170 (2004) / SMPTE ST 240 (1999) + color_enc.primaries = JXL_PRIMARIES_CUSTOM; + color_enc.primaries_red_xy[0] = 0.630; + color_enc.primaries_red_xy[1] = 0.340; + color_enc.primaries_green_xy[0] = 0.310; + color_enc.primaries_green_xy[1] = 0.595; + color_enc.primaries_blue_xy[0] = 0.155; + color_enc.primaries_blue_xy[1] = 0.070; + color_enc.white_point = JXL_WHITE_POINT_D65; + } else if (payload[0] == 8) { + // Generic film (colour filters using Illuminant C) + color_enc.primaries = JXL_PRIMARIES_CUSTOM; + color_enc.primaries_red_xy[0] = 0.681; + color_enc.primaries_red_xy[1] = 0.319; + color_enc.primaries_green_xy[0] = 0.243; + color_enc.primaries_green_xy[1] = 0.692; + color_enc.primaries_blue_xy[0] = 0.145; + color_enc.primaries_blue_xy[1] = 0.049; + color_enc.white_point = JXL_WHITE_POINT_CUSTOM; + color_enc.white_point_xy[0] = 0.310; + color_enc.white_point_xy[1] = 0.316; + } else if (payload[0] == 9) { + // Rec. ITU-R BT.2100-2 + color_enc.primaries = JXL_PRIMARIES_2100; + color_enc.white_point = JXL_WHITE_POINT_D65; + } else if (payload[0] == 10) { + // CIE 1931 XYZ + color_enc.primaries = JXL_PRIMARIES_CUSTOM; + color_enc.primaries_red_xy[0] = 1; + color_enc.primaries_red_xy[1] = 0; + color_enc.primaries_green_xy[0] = 0; + color_enc.primaries_green_xy[1] = 1; + color_enc.primaries_blue_xy[0] = 0; + color_enc.primaries_blue_xy[1] = 0; + color_enc.white_point = JXL_WHITE_POINT_E; + } else if (payload[0] == 11) { + // SMPTE RP 431-2 (2011) + color_enc.primaries = JXL_PRIMARIES_P3; + color_enc.white_point = JXL_WHITE_POINT_DCI; + } else if (payload[0] == 12) { + // SMPTE EG 432-1 (2010) + color_enc.primaries = JXL_PRIMARIES_P3; + color_enc.white_point = JXL_WHITE_POINT_D65; + } else if (payload[0] == 22) { + color_enc.primaries = JXL_PRIMARIES_CUSTOM; + color_enc.primaries_red_xy[0] = 0.630; + color_enc.primaries_red_xy[1] = 0.340; + color_enc.primaries_green_xy[0] = 0.295; + color_enc.primaries_green_xy[1] = 0.605; + color_enc.primaries_blue_xy[0] = 0.155; + color_enc.primaries_blue_xy[1] = 0.077; + color_enc.white_point = JXL_WHITE_POINT_D65; + } else { + JXL_WARNING("Unsupported primaries specified in cICP chunk: %d", + static_cast(payload[0])); + return false; + } + + if (payload[1] == 1 || payload[1] == 6 || payload[1] == 14 || + payload[1] == 15) { + // Rec. ITU-R BT.709-6 + color_enc.transfer_function = JXL_TRANSFER_FUNCTION_709; + } else if (payload[1] == 4) { + // Rec. ITU-R BT.1700-0 625 PAL and 625 SECAM + color_enc.transfer_function = JXL_TRANSFER_FUNCTION_GAMMA; + color_enc.gamma = 1 / 2.2; + } else if (payload[1] == 5) { + // Rec. ITU-R BT.470-6 System B, G + color_enc.transfer_function = JXL_TRANSFER_FUNCTION_GAMMA; + color_enc.gamma = 1 / 2.8; + } else if (payload[1] == 8 || payload[1] == 13 || payload[1] == 16 || + payload[1] == 17 || payload[1] == 18) { + // These codes all match the corresponding JXL enum values + color_enc.transfer_function = static_cast(payload[1]); + } else { + JXL_WARNING("Unsupported transfer function specified in cICP chunk: %d", + static_cast(payload[1])); + return false; + } + + if (payload[2] != 0) { + JXL_WARNING("Unsupported color space specified in cICP chunk: %d", + static_cast(payload[2])); + return false; + } + if (payload[3] != 1) { + JXL_WARNING("Unsupported full-range flag specified in cICP chunk: %d", + static_cast(payload[3])); + return false; + } + // cICP has no rendering intent, so use the default + color_enc.rendering_intent = JXL_RENDERING_INTENT_RELATIVE; + *color_encoding = color_enc; + return true; +} + +Status DecodeGAMA(const unsigned char* payload, const size_t payload_size, + JxlColorEncoding* color_encoding) { + if (payload_size != 4) return JXL_FAILURE("Wrong gAMA size"); + color_encoding->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA; + color_encoding->gamma = F64FromU32(LoadBE32(payload)); + return true; +} + +Status DecodeCHRM(const unsigned char* payload, const size_t payload_size, + JxlColorEncoding* color_encoding) { + if (payload_size != 32) return JXL_FAILURE("Wrong cHRM size"); + + color_encoding->white_point = JXL_WHITE_POINT_CUSTOM; + color_encoding->white_point_xy[0] = F64FromU32(LoadBE32(payload + 0)); + color_encoding->white_point_xy[1] = F64FromU32(LoadBE32(payload + 4)); + + color_encoding->primaries = JXL_PRIMARIES_CUSTOM; + color_encoding->primaries_red_xy[0] = F64FromU32(LoadBE32(payload + 8)); + color_encoding->primaries_red_xy[1] = F64FromU32(LoadBE32(payload + 12)); + color_encoding->primaries_green_xy[0] = F64FromU32(LoadBE32(payload + 16)); + color_encoding->primaries_green_xy[1] = F64FromU32(LoadBE32(payload + 20)); + color_encoding->primaries_blue_xy[0] = F64FromU32(LoadBE32(payload + 24)); + color_encoding->primaries_blue_xy[1] = F64FromU32(LoadBE32(payload + 28)); + return true; +} + +// Retrieves XMP and EXIF/IPTC from itext and text. +class BlobsReaderPNG { + public: + static Status Decode(const png_text_struct& info, PackedMetadata* metadata) { + // We trust these are properly null-terminated by libpng. + const char* key = info.key; + const char* value = info.text; + if (strstr(key, "XML:com.adobe.xmp")) { + metadata->xmp.resize(strlen(value)); // safe, see above + memcpy(metadata->xmp.data(), value, metadata->xmp.size()); + } + + std::string type; + std::vector bytes; + + // Handle text chunks annotated with key "Raw profile type ####", with + // #### a type, which may contain metadata. + const char* kKey = "Raw profile type "; + if (strncmp(key, kKey, strlen(kKey)) != 0) return false; + + if (!MaybeDecodeBase16(key, value, &type, &bytes)) { + JXL_WARNING("Couldn't parse 'Raw format type' text chunk"); + return false; + } + if (type == "exif") { + // Remove "Exif\0\0" prefix if present + if (bytes.size() >= sizeof kExifSignature && + memcmp(bytes.data(), kExifSignature, sizeof kExifSignature) == 0) { + bytes.erase(bytes.begin(), bytes.begin() + sizeof kExifSignature); + } + if (!metadata->exif.empty()) { + JXL_WARNING("overwriting EXIF (%" PRIuS " bytes) with base16 (%" PRIuS + " bytes)", + metadata->exif.size(), bytes.size()); + } + metadata->exif = std::move(bytes); + } else if (type == "iptc") { + // TODO (jon): Deal with IPTC in some way + } else if (type == "8bim") { + // TODO (jon): Deal with 8bim in some way + } else if (type == "xmp") { + if (!metadata->xmp.empty()) { + JXL_WARNING("overwriting XMP (%" PRIuS " bytes) with base16 (%" PRIuS + " bytes)", + metadata->xmp.size(), bytes.size()); + } + metadata->xmp = std::move(bytes); + } else { + JXL_WARNING("Unknown type in 'Raw format type' text chunk: %s: %" PRIuS + " bytes", + type.c_str(), bytes.size()); + } + return true; + } + + private: + // Returns false if invalid. + static JXL_INLINE Status DecodeNibble(const char c, + uint32_t* JXL_RESTRICT nibble) { + if ('a' <= c && c <= 'f') { + *nibble = 10 + c - 'a'; + } else if ('0' <= c && c <= '9') { + *nibble = c - '0'; + } else { + *nibble = 0; + return JXL_FAILURE("Invalid metadata nibble"); + } + JXL_ASSERT(*nibble < 16); + return true; + } + + // Returns false if invalid. + static JXL_INLINE Status DecodeDecimal(const char** pos, const char* end, + uint32_t* JXL_RESTRICT value) { + size_t len = 0; + *value = 0; + while (*pos < end) { + char next = **pos; + if (next >= '0' && next <= '9') { + *value = (*value * 10) + static_cast(next - '0'); + len++; + if (len > 8) { + break; + } + } else { + // Do not consume terminator (non-decimal digit). + break; + } + (*pos)++; + } + if (len == 0 || len > 8) { + return JXL_FAILURE("Failed to parse decimal"); + } + return true; + } + + // Parses a PNG text chunk with key of the form "Raw profile type ####", with + // #### a type. + // Returns whether it could successfully parse the content. + // We trust key and encoded are null-terminated because they come from + // libpng. + static Status MaybeDecodeBase16(const char* key, const char* encoded, + std::string* type, + std::vector* bytes) { + const char* encoded_end = encoded + strlen(encoded); + + const char* kKey = "Raw profile type "; + if (strncmp(key, kKey, strlen(kKey)) != 0) return false; + *type = key + strlen(kKey); + const size_t kMaxTypeLen = 20; + if (type->length() > kMaxTypeLen) return false; // Type too long + + // Header: freeform string and number of bytes + // Expected format is: + // \n + // profile name/description\n + // 40\n (the number of bytes after hex-decoding) + // 01234566789abcdef....\n (72 bytes per line max). + // 012345667\n (last line) + const char* pos = encoded; + + if (*(pos++) != '\n') return false; + while (pos < encoded_end && *pos != '\n') { + pos++; + } + if (pos == encoded_end) return false; + // We parsed so far a \n, some number of non \n characters and are now + // pointing at a \n. + if (*(pos++) != '\n') return false; + // Skip leading spaces + while (pos < encoded_end && *pos == ' ') { + pos++; + } + uint32_t bytes_to_decode = 0; + JXL_RETURN_IF_ERROR(DecodeDecimal(&pos, encoded_end, &bytes_to_decode)); + + // We need 2*bytes for the hex values plus 1 byte every 36 values, + // plus terminal \n for length. + const unsigned long needed_bytes = + bytes_to_decode * 2 + 1 + DivCeil(bytes_to_decode, 36); + if (needed_bytes != static_cast(encoded_end - pos)) { + return JXL_FAILURE("Not enough bytes to parse %d bytes in hex", + bytes_to_decode); + } + JXL_ASSERT(bytes->empty()); + bytes->reserve(bytes_to_decode); + + // Encoding: base16 with newline after 72 chars. + // pos points to the \n before the first line of hex values. + for (size_t i = 0; i < bytes_to_decode; ++i) { + if (i % 36 == 0) { + if (pos + 1 >= encoded_end) return false; // Truncated base16 1 + if (*pos != '\n') return false; // Expected newline + ++pos; + } + + if (pos + 2 >= encoded_end) return false; // Truncated base16 2; + uint32_t nibble0, nibble1; + JXL_RETURN_IF_ERROR(DecodeNibble(pos[0], &nibble0)); + JXL_RETURN_IF_ERROR(DecodeNibble(pos[1], &nibble1)); + bytes->push_back(static_cast((nibble0 << 4) + nibble1)); + pos += 2; + } + if (pos + 1 != encoded_end) return false; // Too many encoded bytes + if (pos[0] != '\n') return false; // Incorrect metadata terminator + return true; + } +}; + +constexpr bool isAbc(char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + +constexpr uint32_t kId_IHDR = 0x52444849; +constexpr uint32_t kId_acTL = 0x4C546361; +constexpr uint32_t kId_fcTL = 0x4C546366; +constexpr uint32_t kId_IDAT = 0x54414449; +constexpr uint32_t kId_fdAT = 0x54416466; +constexpr uint32_t kId_IEND = 0x444E4549; +constexpr uint32_t kId_cICP = 0x50434963; +constexpr uint32_t kId_iCCP = 0x50434369; +constexpr uint32_t kId_sRGB = 0x42475273; +constexpr uint32_t kId_gAMA = 0x414D4167; +constexpr uint32_t kId_cHRM = 0x4D524863; +constexpr uint32_t kId_eXIf = 0x66495865; + +struct APNGFrame { + std::vector pixels; + std::vector rows; + unsigned int w, h, delay_num, delay_den; +}; + +struct Reader { + const uint8_t* next; + const uint8_t* last; + bool Read(void* data, size_t len) { + size_t cap = last - next; + size_t to_copy = std::min(cap, len); + memcpy(data, next, to_copy); + next += to_copy; + return (len == to_copy); + } + bool Eof() { return next == last; } +}; + +const unsigned long cMaxPNGSize = 1000000UL; +const size_t kMaxPNGChunkSize = 1lu << 30; // 1 GB + +void info_fn(png_structp png_ptr, png_infop info_ptr) { + png_set_expand(png_ptr); + png_set_palette_to_rgb(png_ptr); + png_set_tRNS_to_alpha(png_ptr); + (void)png_set_interlace_handling(png_ptr); + png_read_update_info(png_ptr, info_ptr); +} + +void row_fn(png_structp png_ptr, png_bytep new_row, png_uint_32 row_num, + int pass) { + APNGFrame* frame = (APNGFrame*)png_get_progressive_ptr(png_ptr); + JXL_CHECK(frame); + JXL_CHECK(row_num < frame->rows.size()); + JXL_CHECK(frame->rows[row_num] < frame->pixels.data() + frame->pixels.size()); + png_progressive_combine_row(png_ptr, frame->rows[row_num], new_row); +} + +inline unsigned int read_chunk(Reader* r, std::vector* pChunk) { + unsigned char len[4]; + if (r->Read(&len, 4)) { + const auto size = png_get_uint_32(len); + // Check first, to avoid overflow. + if (size > kMaxPNGChunkSize) { + JXL_WARNING("APNG chunk size is too big"); + return 0; + } + pChunk->resize(size + 12); + memcpy(pChunk->data(), len, 4); + if (r->Read(pChunk->data() + 4, pChunk->size() - 4)) { + return LoadLE32(pChunk->data() + 4); + } + } + return 0; +} + +int processing_start(png_structp& png_ptr, png_infop& info_ptr, void* frame_ptr, + bool hasInfo, std::vector& chunkIHDR, + std::vector>& chunksInfo) { + unsigned char header[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + + // Cleanup prior decoder, if any. + png_destroy_read_struct(&png_ptr, &info_ptr, 0); + // Just in case. Not all versions on libpng wipe-out the pointers. + png_ptr = nullptr; + info_ptr = nullptr; + + png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + info_ptr = png_create_info_struct(png_ptr); + if (!png_ptr || !info_ptr) return 1; + + if (setjmp(png_jmpbuf(png_ptr))) { + return 1; + } + + png_set_keep_unknown_chunks(png_ptr, 1, kIgnoredPngChunks, + (int)sizeof(kIgnoredPngChunks) / 5); + + png_set_crc_action(png_ptr, PNG_CRC_QUIET_USE, PNG_CRC_QUIET_USE); + png_set_progressive_read_fn(png_ptr, frame_ptr, info_fn, row_fn, NULL); + + png_process_data(png_ptr, info_ptr, header, 8); + png_process_data(png_ptr, info_ptr, chunkIHDR.data(), chunkIHDR.size()); + + if (hasInfo) { + for (unsigned int i = 0; i < chunksInfo.size(); i++) { + png_process_data(png_ptr, info_ptr, chunksInfo[i].data(), + chunksInfo[i].size()); + } + } + return 0; +} + +int processing_data(png_structp png_ptr, png_infop info_ptr, unsigned char* p, + unsigned int size) { + if (!png_ptr || !info_ptr) return 1; + + if (setjmp(png_jmpbuf(png_ptr))) { + return 1; + } + + png_process_data(png_ptr, info_ptr, p, size); + return 0; +} + +int processing_finish(png_structp png_ptr, png_infop info_ptr, + PackedMetadata* metadata) { + unsigned char footer[12] = {0, 0, 0, 0, 73, 69, 78, 68, 174, 66, 96, 130}; + + if (!png_ptr || !info_ptr) return 1; + + if (setjmp(png_jmpbuf(png_ptr))) { + return 1; + } + + png_process_data(png_ptr, info_ptr, footer, 12); + // before destroying: check if we encountered any metadata chunks + png_textp text_ptr; + int num_text; + png_get_text(png_ptr, info_ptr, &text_ptr, &num_text); + for (int i = 0; i < num_text; i++) { + (void)BlobsReaderPNG::Decode(text_ptr[i], metadata); + } + + return 0; +} + +} // namespace +#endif + +bool CanDecodeAPNG() { +#if JPEGXL_ENABLE_APNG + return true; +#else + return false; +#endif +} + +Status DecodeImageAPNG(const Span bytes, + const ColorHints& color_hints, PackedPixelFile* ppf, + const SizeConstraints* constraints) { +#if JPEGXL_ENABLE_APNG + Reader r; + unsigned int id, j, w, h, w0, h0, x0, y0; + unsigned int delay_num, delay_den, dop, bop, rowbytes, imagesize; + unsigned char sig[8]; + png_structp png_ptr = nullptr; + png_infop info_ptr = nullptr; + std::vector chunk; + std::vector chunkIHDR; + std::vector> chunksInfo; + bool isAnimated = false; + bool hasInfo = false; + bool seenFctl = false; + APNGFrame frameRaw = {}; + uint32_t num_channels; + JxlPixelFormat format; + unsigned int bytes_per_pixel = 0; + + struct FrameInfo { + PackedImage data; + uint32_t duration; + size_t x0, xsize; + size_t y0, ysize; + uint32_t dispose_op; + uint32_t blend_op; + }; + + std::vector frames; + + // Make sure png memory is released in any case. + auto scope_guard = MakeScopeGuard([&]() { + png_destroy_read_struct(&png_ptr, &info_ptr, 0); + // Just in case. Not all versions on libpng wipe-out the pointers. + png_ptr = nullptr; + info_ptr = nullptr; + }); + + r = {bytes.data(), bytes.data() + bytes.size()}; + // Not a PNG => not an error + unsigned char png_signature[8] = {137, 80, 78, 71, 13, 10, 26, 10}; + if (!r.Read(sig, 8) || memcmp(sig, png_signature, 8) != 0) { + return false; + } + id = read_chunk(&r, &chunkIHDR); + + ppf->info.exponent_bits_per_sample = 0; + ppf->info.alpha_exponent_bits = 0; + ppf->info.orientation = JXL_ORIENT_IDENTITY; + + ppf->frames.clear(); + + bool have_color = false; + bool have_cicp = false, have_iccp = false, have_srgb = false; + bool errorstate = true; + if (id == kId_IHDR && chunkIHDR.size() == 25) { + x0 = 0; + y0 = 0; + delay_num = 1; + delay_den = 10; + dop = 0; + bop = 0; + + w0 = w = png_get_uint_32(chunkIHDR.data() + 8); + h0 = h = png_get_uint_32(chunkIHDR.data() + 12); + if (w > cMaxPNGSize || h > cMaxPNGSize) { + return false; + } + + // default settings in case e.g. only gAMA is given + ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB; + ppf->color_encoding.white_point = JXL_WHITE_POINT_D65; + ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB; + ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB; + ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_RELATIVE; + + if (!processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo, + chunkIHDR, chunksInfo)) { + while (!r.Eof()) { + id = read_chunk(&r, &chunk); + if (!id) break; + seenFctl |= (id == kId_fcTL); + + if (id == kId_acTL && !hasInfo && !isAnimated) { + isAnimated = true; + ppf->info.have_animation = true; + ppf->info.animation.tps_numerator = 1000; + ppf->info.animation.tps_denominator = 1; + } else if (id == kId_IEND || + (id == kId_fcTL && (!hasInfo || isAnimated))) { + if (hasInfo) { + if (!processing_finish(png_ptr, info_ptr, &ppf->metadata)) { + // Allocates the frame buffer. + uint32_t duration = delay_num * 1000 / delay_den; + frames.push_back(FrameInfo{PackedImage(w0, h0, format), duration, + x0, w0, y0, h0, dop, bop}); + auto& frame = frames.back().data; + for (size_t y = 0; y < h0; ++y) { + memcpy(static_cast(frame.pixels()) + frame.stride * y, + frameRaw.rows[y], bytes_per_pixel * w0); + } + } else { + break; + } + } + + if (id == kId_IEND) { + errorstate = false; + break; + } + if (chunk.size() < 34) { + return JXL_FAILURE("Received a chunk that is too small (%" PRIuS + "B)", + chunk.size()); + } + // At this point the old frame is done. Let's start a new one. + w0 = png_get_uint_32(chunk.data() + 12); + h0 = png_get_uint_32(chunk.data() + 16); + x0 = png_get_uint_32(chunk.data() + 20); + y0 = png_get_uint_32(chunk.data() + 24); + delay_num = png_get_uint_16(chunk.data() + 28); + delay_den = png_get_uint_16(chunk.data() + 30); + dop = chunk[32]; + bop = chunk[33]; + + if (!delay_den) delay_den = 100; + + if (w0 > cMaxPNGSize || h0 > cMaxPNGSize || x0 > cMaxPNGSize || + y0 > cMaxPNGSize || x0 + w0 > w || y0 + h0 > h || dop > 2 || + bop > 1) { + break; + } + + if (hasInfo) { + memcpy(chunkIHDR.data() + 8, chunk.data() + 12, 8); + if (processing_start(png_ptr, info_ptr, (void*)&frameRaw, hasInfo, + chunkIHDR, chunksInfo)) { + break; + } + } + } else if (id == kId_IDAT) { + // First IDAT chunk means we now have all header info + if (seenFctl) { + // `fcTL` chunk must appear after all `IDAT` chunks + return JXL_FAILURE("IDAT chunk after fcTL chunk"); + } + hasInfo = true; + JXL_CHECK(w == png_get_image_width(png_ptr, info_ptr)); + JXL_CHECK(h == png_get_image_height(png_ptr, info_ptr)); + int colortype = png_get_color_type(png_ptr, info_ptr); + int png_bit_depth = png_get_bit_depth(png_ptr, info_ptr); + ppf->info.bits_per_sample = png_bit_depth; + png_color_8p sigbits = NULL; + png_get_sBIT(png_ptr, info_ptr, &sigbits); + if (colortype & 1) { + // palette will actually be 8-bit regardless of the index bitdepth + ppf->info.bits_per_sample = 8; + } + if (colortype & 2) { + ppf->info.num_color_channels = 3; + ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB; + if (sigbits && sigbits->red == sigbits->green && + sigbits->green == sigbits->blue) + ppf->info.bits_per_sample = sigbits->red; + } else { + ppf->info.num_color_channels = 1; + ppf->color_encoding.color_space = JXL_COLOR_SPACE_GRAY; + if (sigbits) ppf->info.bits_per_sample = sigbits->gray; + } + if (colortype & 4 || + png_get_valid(png_ptr, info_ptr, PNG_INFO_tRNS)) { + ppf->info.alpha_bits = ppf->info.bits_per_sample; + if (sigbits) { + if (sigbits->alpha && + sigbits->alpha != ppf->info.bits_per_sample) { + return JXL_FAILURE("Unsupported alpha bit-depth"); + } + ppf->info.alpha_bits = sigbits->alpha; + } + } else { + ppf->info.alpha_bits = 0; + } + ppf->color_encoding.color_space = + (ppf->info.num_color_channels == 1 ? JXL_COLOR_SPACE_GRAY + : JXL_COLOR_SPACE_RGB); + ppf->info.xsize = w; + ppf->info.ysize = h; + JXL_RETURN_IF_ERROR(VerifyDimensions(constraints, w, h)); + num_channels = + ppf->info.num_color_channels + (ppf->info.alpha_bits ? 1 : 0); + format = { + /*num_channels=*/num_channels, + /*data_type=*/ppf->info.bits_per_sample > 8 ? JXL_TYPE_UINT16 + : JXL_TYPE_UINT8, + /*endianness=*/JXL_BIG_ENDIAN, + /*align=*/0, + }; + if (png_bit_depth > 8 && format.data_type == JXL_TYPE_UINT8) { + png_set_strip_16(png_ptr); + } + bytes_per_pixel = + num_channels * (format.data_type == JXL_TYPE_UINT16 ? 2 : 1); + rowbytes = w * bytes_per_pixel; + imagesize = h * rowbytes; + frameRaw.pixels.resize(imagesize); + frameRaw.rows.resize(h); + for (j = 0; j < h; j++) + frameRaw.rows[j] = frameRaw.pixels.data() + j * rowbytes; + + if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) { + break; + } + } else if (id == kId_fdAT && isAnimated) { + if (!hasInfo) { + return JXL_FAILURE("fDAT chunk before iDAT"); + } + png_save_uint_32(chunk.data() + 4, chunk.size() - 16); + memcpy(chunk.data() + 8, "IDAT", 4); + if (processing_data(png_ptr, info_ptr, chunk.data() + 4, + chunk.size() - 4)) { + break; + } + } else if (id == kId_cICP) { + // Color profile chunks: cICP has the highest priority, followed by + // iCCP and sRGB (which shouldn't co-exist, but if they do, we use + // iCCP), followed finally by gAMA and cHRM. + if (DecodeCICP(chunk.data() + 8, chunk.size() - 12, + &ppf->color_encoding)) { + have_cicp = true; + have_color = true; + ppf->icc.clear(); + } + } else if (!have_cicp && id == kId_iCCP) { + if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) { + JXL_WARNING("Corrupt iCCP chunk"); + break; + } + + // TODO(jon): catch special case of PQ and synthesize color encoding + // in that case + int compression_type; + png_bytep profile; + png_charp name; + png_uint_32 proflen = 0; + auto ok = png_get_iCCP(png_ptr, info_ptr, &name, &compression_type, + &profile, &proflen); + if (ok && proflen) { + ppf->icc.assign(profile, profile + proflen); + have_color = true; + have_iccp = true; + } else { + // TODO(eustas): JXL_WARNING? + } + } else if (!have_cicp && !have_iccp && id == kId_sRGB) { + JXL_RETURN_IF_ERROR(DecodeSRGB(chunk.data() + 8, chunk.size() - 12, + &ppf->color_encoding)); + have_srgb = true; + have_color = true; + } else if (!have_cicp && !have_srgb && !have_iccp && id == kId_gAMA) { + JXL_RETURN_IF_ERROR(DecodeGAMA(chunk.data() + 8, chunk.size() - 12, + &ppf->color_encoding)); + have_color = true; + } else if (!have_cicp && !have_srgb && !have_iccp && id == kId_cHRM) { + JXL_RETURN_IF_ERROR(DecodeCHRM(chunk.data() + 8, chunk.size() - 12, + &ppf->color_encoding)); + have_color = true; + } else if (id == kId_eXIf) { + ppf->metadata.exif.resize(chunk.size() - 12); + memcpy(ppf->metadata.exif.data(), chunk.data() + 8, + chunk.size() - 12); + } else if (!isAbc(chunk[4]) || !isAbc(chunk[5]) || !isAbc(chunk[6]) || + !isAbc(chunk[7])) { + break; + } else { + if (processing_data(png_ptr, info_ptr, chunk.data(), chunk.size())) { + break; + } + if (!hasInfo) { + chunksInfo.push_back(chunk); + continue; + } + } + } + } + + JXL_RETURN_IF_ERROR(ApplyColorHints( + color_hints, have_color, ppf->info.num_color_channels == 1, ppf)); + } + + if (errorstate) return false; + + bool has_nontrivial_background = false; + bool previous_frame_should_be_cleared = false; + enum { + DISPOSE_OP_NONE = 0, + DISPOSE_OP_BACKGROUND = 1, + DISPOSE_OP_PREVIOUS = 2, + }; + enum { + BLEND_OP_SOURCE = 0, + BLEND_OP_OVER = 1, + }; + for (size_t i = 0; i < frames.size(); i++) { + auto& frame = frames[i]; + JXL_ASSERT(frame.data.xsize == frame.xsize); + JXL_ASSERT(frame.data.ysize == frame.ysize); + + // Before encountering a DISPOSE_OP_NONE frame, the canvas is filled with 0, + // so DISPOSE_OP_BACKGROUND and DISPOSE_OP_PREVIOUS are equivalent. + if (frame.dispose_op == DISPOSE_OP_NONE) { + has_nontrivial_background = true; + } + bool should_blend = frame.blend_op == BLEND_OP_OVER; + bool use_for_next_frame = + has_nontrivial_background && frame.dispose_op != DISPOSE_OP_PREVIOUS; + size_t x0 = frame.x0; + size_t y0 = frame.y0; + size_t xsize = frame.data.xsize; + size_t ysize = frame.data.ysize; + if (previous_frame_should_be_cleared) { + size_t px0 = frames[i - 1].x0; + size_t py0 = frames[i - 1].y0; + size_t pxs = frames[i - 1].xsize; + size_t pys = frames[i - 1].ysize; + if (px0 >= x0 && py0 >= y0 && px0 + pxs <= x0 + xsize && + py0 + pys <= y0 + ysize && frame.blend_op == BLEND_OP_SOURCE && + use_for_next_frame) { + // If the previous frame is entirely contained in the current frame and + // we are using BLEND_OP_SOURCE, nothing special needs to be done. + ppf->frames.emplace_back(std::move(frame.data)); + } else if (px0 == x0 && py0 == y0 && px0 + pxs == x0 + xsize && + py0 + pys == y0 + ysize && use_for_next_frame) { + // If the new frame has the same size as the old one, but we are + // blending, we can instead just not blend. + should_blend = false; + ppf->frames.emplace_back(std::move(frame.data)); + } else if (px0 <= x0 && py0 <= y0 && px0 + pxs >= x0 + xsize && + py0 + pys >= y0 + ysize && use_for_next_frame) { + // If the new frame is contained within the old frame, we can pad the + // new frame with zeros and not blend. + PackedImage new_data(pxs, pys, frame.data.format); + memset(new_data.pixels(), 0, new_data.pixels_size); + for (size_t y = 0; y < ysize; y++) { + size_t bytes_per_pixel = + PackedImage::BitsPerChannel(new_data.format.data_type) * + new_data.format.num_channels / 8; + memcpy(static_cast(new_data.pixels()) + + new_data.stride * (y + y0 - py0) + + bytes_per_pixel * (x0 - px0), + static_cast(frame.data.pixels()) + + frame.data.stride * y, + xsize * bytes_per_pixel); + } + + x0 = px0; + y0 = py0; + xsize = pxs; + ysize = pys; + should_blend = false; + ppf->frames.emplace_back(std::move(new_data)); + } else { + // If all else fails, insert a dummy blank frame with kReplace. + PackedImage blank(pxs, pys, frame.data.format); + memset(blank.pixels(), 0, blank.pixels_size); + ppf->frames.emplace_back(std::move(blank)); + auto& pframe = ppf->frames.back(); + pframe.frame_info.layer_info.crop_x0 = px0; + pframe.frame_info.layer_info.crop_y0 = py0; + pframe.frame_info.layer_info.xsize = pxs; + pframe.frame_info.layer_info.ysize = pys; + pframe.frame_info.duration = 0; + bool is_full_size = px0 == 0 && py0 == 0 && pxs == ppf->info.xsize && + pys == ppf->info.ysize; + pframe.frame_info.layer_info.have_crop = is_full_size ? 0 : 1; + pframe.frame_info.layer_info.blend_info.blendmode = JXL_BLEND_REPLACE; + pframe.frame_info.layer_info.blend_info.source = 1; + pframe.frame_info.layer_info.save_as_reference = 1; + ppf->frames.emplace_back(std::move(frame.data)); + } + } else { + ppf->frames.emplace_back(std::move(frame.data)); + } + + auto& pframe = ppf->frames.back(); + pframe.frame_info.layer_info.crop_x0 = x0; + pframe.frame_info.layer_info.crop_y0 = y0; + pframe.frame_info.layer_info.xsize = xsize; + pframe.frame_info.layer_info.ysize = ysize; + pframe.frame_info.duration = frame.duration; + pframe.frame_info.layer_info.blend_info.blendmode = + should_blend ? JXL_BLEND_BLEND : JXL_BLEND_REPLACE; + bool is_full_size = x0 == 0 && y0 == 0 && xsize == ppf->info.xsize && + ysize == ppf->info.ysize; + pframe.frame_info.layer_info.have_crop = is_full_size ? 0 : 1; + pframe.frame_info.layer_info.blend_info.source = 1; + pframe.frame_info.layer_info.blend_info.alpha = 0; + pframe.frame_info.layer_info.save_as_reference = use_for_next_frame ? 1 : 0; + + previous_frame_should_be_cleared = + has_nontrivial_background && frame.dispose_op == DISPOSE_OP_BACKGROUND; + } + if (ppf->frames.empty()) return JXL_FAILURE("No frames decoded"); + ppf->frames.back().frame_info.is_last = true; + + return true; +#else + return false; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/apng.h b/third-party/libjxl/libjxl/lib/extras/dec/apng.h new file mode 100644 index 0000000000..a292758b8f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/apng.h @@ -0,0 +1,36 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_APNG_H_ +#define LIB_EXTRAS_DEC_APNG_H_ + +// Decodes APNG images in memory. + +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +bool CanDecodeAPNG(); + +// Decodes `bytes` into `ppf`. +Status DecodeImageAPNG(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_APNG_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_description.cc b/third-party/libjxl/libjxl/lib/extras/dec/color_description.cc new file mode 100644 index 0000000000..54f6aa4206 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/color_description.cc @@ -0,0 +1,218 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/color_description.h" + +#include + +#include + +namespace jxl { + +namespace { + +template +struct EnumName { + const char* name; + T value; +}; + +const EnumName kJxlColorSpaceNames[] = { + {"RGB", JXL_COLOR_SPACE_RGB}, + {"Gra", JXL_COLOR_SPACE_GRAY}, + {"XYB", JXL_COLOR_SPACE_XYB}, + {"CS?", JXL_COLOR_SPACE_UNKNOWN}, +}; + +const EnumName kJxlWhitePointNames[] = { + {"D65", JXL_WHITE_POINT_D65}, + {"Cst", JXL_WHITE_POINT_CUSTOM}, + {"EER", JXL_WHITE_POINT_E}, + {"DCI", JXL_WHITE_POINT_DCI}, +}; + +const EnumName kJxlPrimariesNames[] = { + {"SRG", JXL_PRIMARIES_SRGB}, + {"Cst", JXL_PRIMARIES_CUSTOM}, + {"202", JXL_PRIMARIES_2100}, + {"DCI", JXL_PRIMARIES_P3}, +}; + +const EnumName kJxlTransferFunctionNames[] = { + {"709", JXL_TRANSFER_FUNCTION_709}, + {"TF?", JXL_TRANSFER_FUNCTION_UNKNOWN}, + {"Lin", JXL_TRANSFER_FUNCTION_LINEAR}, + {"SRG", JXL_TRANSFER_FUNCTION_SRGB}, + {"PeQ", JXL_TRANSFER_FUNCTION_PQ}, + {"DCI", JXL_TRANSFER_FUNCTION_DCI}, + {"HLG", JXL_TRANSFER_FUNCTION_HLG}, + {"", JXL_TRANSFER_FUNCTION_GAMMA}, +}; + +const EnumName kJxlRenderingIntentNames[] = { + {"Per", JXL_RENDERING_INTENT_PERCEPTUAL}, + {"Rel", JXL_RENDERING_INTENT_RELATIVE}, + {"Sat", JXL_RENDERING_INTENT_SATURATION}, + {"Abs", JXL_RENDERING_INTENT_ABSOLUTE}, +}; + +template +Status ParseEnum(const std::string& token, const EnumName* enum_values, + size_t enum_len, T* value) { + for (size_t i = 0; i < enum_len; i++) { + if (enum_values[i].name == token) { + *value = enum_values[i].value; + return true; + } + } + return false; +} +#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0])) +#define PARSE_ENUM(type, token, value) \ + ParseEnum(token, k##type##Names, ARRAY_SIZE(k##type##Names), value) + +class Tokenizer { + public: + Tokenizer(const std::string* input, char separator) + : input_(input), separator_(separator) {} + + Status Next(std::string* next) { + const size_t end = input_->find(separator_, start_); + if (end == std::string::npos) { + *next = input_->substr(start_); // rest of string + } else { + *next = input_->substr(start_, end - start_); + } + if (next->empty()) return JXL_FAILURE("Missing token"); + start_ = end + 1; + return true; + } + + private: + const std::string* const input_; // not owned + const char separator_; + size_t start_ = 0; // of next token +}; + +Status ParseDouble(const std::string& num, double* d) { + char* end; + errno = 0; + *d = strtod(num.c_str(), &end); + if (*d == 0.0 && end == num.c_str()) { + return JXL_FAILURE("Invalid double: %s", num.c_str()); + } + if (std::isnan(*d)) { + return JXL_FAILURE("Invalid double: %s", num.c_str()); + } + if (errno == ERANGE) { + return JXL_FAILURE("Double out of range: %s", num.c_str()); + } + return true; +} + +Status ParseDouble(Tokenizer* tokenizer, double* d) { + std::string num; + JXL_RETURN_IF_ERROR(tokenizer->Next(&num)); + return ParseDouble(num, d); +} + +Status ParseColorSpace(Tokenizer* tokenizer, JxlColorEncoding* c) { + std::string str; + JXL_RETURN_IF_ERROR(tokenizer->Next(&str)); + JxlColorSpace cs; + if (PARSE_ENUM(JxlColorSpace, str, &cs)) { + c->color_space = cs; + return true; + } + + return JXL_FAILURE("Unknown ColorSpace %s", str.c_str()); +} + +Status ParseWhitePoint(Tokenizer* tokenizer, JxlColorEncoding* c) { + if (c->color_space == JXL_COLOR_SPACE_XYB) { + // Implicit white point. + c->white_point = JXL_WHITE_POINT_D65; + return true; + } + + std::string str; + JXL_RETURN_IF_ERROR(tokenizer->Next(&str)); + if (PARSE_ENUM(JxlWhitePoint, str, &c->white_point)) return true; + + Tokenizer xy_tokenizer(&str, ';'); + c->white_point = JXL_WHITE_POINT_CUSTOM; + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->white_point_xy + 0)); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->white_point_xy + 1)); + return true; +} + +Status ParsePrimaries(Tokenizer* tokenizer, JxlColorEncoding* c) { + if (c->color_space == JXL_COLOR_SPACE_GRAY || + c->color_space == JXL_COLOR_SPACE_XYB) { + // No primaries case. + return true; + } + + std::string str; + JXL_RETURN_IF_ERROR(tokenizer->Next(&str)); + if (PARSE_ENUM(JxlPrimaries, str, &c->primaries)) return true; + + Tokenizer xy_tokenizer(&str, ';'); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_red_xy + 0)); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_red_xy + 1)); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_green_xy + 0)); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_green_xy + 1)); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_blue_xy + 0)); + JXL_RETURN_IF_ERROR(ParseDouble(&xy_tokenizer, c->primaries_blue_xy + 1)); + c->primaries = JXL_PRIMARIES_CUSTOM; + + return JXL_FAILURE("Invalid primaries %s", str.c_str()); +} + +Status ParseRenderingIntent(Tokenizer* tokenizer, JxlColorEncoding* c) { + std::string str; + JXL_RETURN_IF_ERROR(tokenizer->Next(&str)); + if (PARSE_ENUM(JxlRenderingIntent, str, &c->rendering_intent)) return true; + + return JXL_FAILURE("Invalid RenderingIntent %s\n", str.c_str()); +} + +Status ParseTransferFunction(Tokenizer* tokenizer, JxlColorEncoding* c) { + if (c->color_space == JXL_COLOR_SPACE_XYB) { + // Implicit TF. + c->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA; + c->gamma = 1 / 3.; + return true; + } + + std::string str; + JXL_RETURN_IF_ERROR(tokenizer->Next(&str)); + if (PARSE_ENUM(JxlTransferFunction, str, &c->transfer_function)) { + return true; + } + + if (str[0] == 'g') { + JXL_RETURN_IF_ERROR(ParseDouble(str.substr(1), &c->gamma)); + c->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA; + return true; + } + + return JXL_FAILURE("Invalid gamma %s", str.c_str()); +} + +} // namespace + +Status ParseDescription(const std::string& description, JxlColorEncoding* c) { + *c = {}; + Tokenizer tokenizer(&description, '_'); + JXL_RETURN_IF_ERROR(ParseColorSpace(&tokenizer, c)); + JXL_RETURN_IF_ERROR(ParseWhitePoint(&tokenizer, c)); + JXL_RETURN_IF_ERROR(ParsePrimaries(&tokenizer, c)); + JXL_RETURN_IF_ERROR(ParseRenderingIntent(&tokenizer, c)); + JXL_RETURN_IF_ERROR(ParseTransferFunction(&tokenizer, c)); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_description.h b/third-party/libjxl/libjxl/lib/extras/dec/color_description.h new file mode 100644 index 0000000000..23680ff7c6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/color_description.h @@ -0,0 +1,23 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_COLOR_DESCRIPTION_H_ +#define LIB_EXTRAS_COLOR_DESCRIPTION_H_ + +#include + +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Parse the color description into a JxlColorEncoding "RGB_D65_SRG_Rel_Lin". +Status ParseDescription(const std::string& description, + JxlColorEncoding* JXL_RESTRICT c); + +} // namespace jxl + +#endif // LIB_EXTRAS_COLOR_DESCRIPTION_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_description_test.cc b/third-party/libjxl/libjxl/lib/extras/dec/color_description_test.cc new file mode 100644 index 0000000000..a1c04a94e4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/color_description_test.cc @@ -0,0 +1,38 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/color_description.h" + +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +// Verify ParseDescription(Description) yields the same ColorEncoding +TEST(ColorDescriptionTest, RoundTripAll) { + for (const auto& cdesc : test::AllEncodings()) { + const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc); + const std::string description = Description(c_original); + printf("%s\n", description.c_str()); + + JxlColorEncoding c_external = {}; + EXPECT_TRUE(ParseDescription(description, &c_external)); + ColorEncoding c_internal; + EXPECT_TRUE( + ConvertExternalToInternalColorEncoding(c_external, &c_internal)); + EXPECT_TRUE(c_original.SameColorEncoding(c_internal)) + << "Where c_original=" << c_original + << " and c_internal=" << c_internal; + } +} + +TEST(ColorDescriptionTest, NanGamma) { + const std::string description = "Gra_2_Per_gnan"; + JxlColorEncoding c; + EXPECT_FALSE(ParseDescription(description, &c)); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_hints.cc b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.cc new file mode 100644 index 0000000000..5c6d7b84a0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.cc @@ -0,0 +1,78 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/color_hints.h" + +#include + +#include + +#include "lib/extras/dec/color_description.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace extras { + +Status ApplyColorHints(const ColorHints& color_hints, + const bool color_already_set, const bool is_gray, + PackedPixelFile* ppf) { + bool got_color_space = color_already_set; + + JXL_RETURN_IF_ERROR(color_hints.Foreach( + [color_already_set, is_gray, ppf, &got_color_space]( + const std::string& key, const std::string& value) -> Status { + if (color_already_set && (key == "color_space" || key == "icc")) { + JXL_WARNING("Decoder ignoring %s hint", key.c_str()); + return true; + } + if (key == "color_space") { + JxlColorEncoding c_original_external; + if (!ParseDescription(value, &c_original_external)) { + return JXL_FAILURE("Failed to apply color_space"); + } + ppf->color_encoding = c_original_external; + + if (is_gray != + (ppf->color_encoding.color_space == JXL_COLOR_SPACE_GRAY)) { + return JXL_FAILURE("mismatch between file and color_space hint"); + } + + got_color_space = true; + } else if (key == "icc") { + const uint8_t* data = reinterpret_cast(value.data()); + std::vector icc(data, data + value.size()); + ppf->icc.swap(icc); + got_color_space = true; + } else if (key == "exif") { + const uint8_t* data = reinterpret_cast(value.data()); + std::vector blob(data, data + value.size()); + ppf->metadata.exif.swap(blob); + } else if (key == "xmp") { + const uint8_t* data = reinterpret_cast(value.data()); + std::vector blob(data, data + value.size()); + ppf->metadata.xmp.swap(blob); + } else if (key == "jumbf") { + const uint8_t* data = reinterpret_cast(value.data()); + std::vector blob(data, data + value.size()); + ppf->metadata.jumbf.swap(blob); + } else { + JXL_WARNING("Ignoring %s hint", key.c_str()); + } + return true; + })); + + if (!got_color_space) { + ppf->color_encoding.color_space = + is_gray ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB; + ppf->color_encoding.white_point = JXL_WHITE_POINT_D65; + ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB; + ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB; + } + + return true; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/color_hints.h b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.h new file mode 100644 index 0000000000..036f203e26 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/color_hints.h @@ -0,0 +1,74 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_COLOR_HINTS_H_ +#define LIB_EXTRAS_COLOR_HINTS_H_ + +// Not all the formats implemented in the extras lib support bundling color +// information into the file, and those that support it may not have it. +// To allow attaching color information to those file formats the caller can +// define these color hints. +// Besides color space information, 'ColorHints' may also include other +// additional information such as Exif, XMP and JUMBF metadata. + +#include +#include + +#include +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace extras { + +class ColorHints { + public: + // key=color_space, value=Description(c/pp): specify the ColorEncoding of + // the pixels for decoding. Otherwise, if the codec did not obtain an ICC + // profile from the image, assume sRGB. + // + // Strings are taken from the command line, so avoid spaces for convenience. + void Add(const std::string& key, const std::string& value) { + kv_.emplace_back(key, value); + } + + // Calls `func(key, value)` for each key/value in the order they were added, + // returning false immediately if `func` returns false. + template + Status Foreach(const Func& func) const { + for (const KeyValue& kv : kv_) { + Status ok = func(kv.key, kv.value); + if (!ok) { + return JXL_FAILURE("ColorHints::Foreach returned false"); + } + } + return true; + } + + private: + // Splitting into key/value avoids parsing in each codec. + struct KeyValue { + KeyValue(std::string key, std::string value) + : key(std::move(key)), value(std::move(value)) {} + + std::string key; + std::string value; + }; + + std::vector kv_; +}; + +// Apply the color hints to the decoded image in PackedPixelFile if any. +// color_already_set tells whether the color encoding was already set, in which +// case the hints are ignored if any hint is passed. +Status ApplyColorHints(const ColorHints& color_hints, bool color_already_set, + bool is_gray, PackedPixelFile* ppf); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_COLOR_HINTS_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/decode.cc b/third-party/libjxl/libjxl/lib/extras/dec/decode.cc new file mode 100644 index 0000000000..2e052db152 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/decode.cc @@ -0,0 +1,156 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/decode.h" + +#include + +#include "lib/extras/dec/apng.h" +#include "lib/extras/dec/exr.h" +#include "lib/extras/dec/gif.h" +#include "lib/extras/dec/jpg.h" +#include "lib/extras/dec/jxl.h" +#include "lib/extras/dec/pgx.h" +#include "lib/extras/dec/pnm.h" + +namespace jxl { +namespace extras { +namespace { + +// Any valid encoding is larger (ensures codecs can read the first few bytes) +constexpr size_t kMinBytes = 9; + +void BasenameAndExtension(std::string path, std::string* basename, + std::string* extension) { + // Pattern: file.jxl + size_t pos = path.find_last_of('.'); + if (pos < path.size()) { + *basename = path.substr(0, pos); + *extension = path.substr(pos); + return; + } + // Pattern: jxl:- + pos = path.find_first_of(':'); + if (pos < path.size()) { + *basename = path.substr(pos + 1); + *extension = "." + path.substr(0, pos); + return; + } + // Extension not found + *basename = path; + *extension = ""; +} + +} // namespace + +Codec CodecFromPath(std::string path, size_t* JXL_RESTRICT bits_per_sample, + std::string* basename, std::string* extension) { + std::string base; + std::string ext; + BasenameAndExtension(path, &base, &ext); + if (basename) *basename = base; + if (extension) *extension = ext; + + std::transform(ext.begin(), ext.end(), ext.begin(), [](char c) { + return std::tolower(c, std::locale::classic()); + }); + if (ext == ".png") return Codec::kPNG; + + if (ext == ".jpg") return Codec::kJPG; + if (ext == ".jpeg") return Codec::kJPG; + + if (ext == ".pgx") return Codec::kPGX; + + if (ext == ".pam") return Codec::kPNM; + if (ext == ".pnm") return Codec::kPNM; + if (ext == ".pgm") return Codec::kPNM; + if (ext == ".ppm") return Codec::kPNM; + if (ext == ".pfm") { + if (bits_per_sample != nullptr) *bits_per_sample = 32; + return Codec::kPNM; + } + + if (ext == ".gif") return Codec::kGIF; + + if (ext == ".exr") return Codec::kEXR; + + return Codec::kUnknown; +} + +bool CanDecode(Codec codec) { + switch (codec) { + case Codec::kEXR: + return CanDecodeEXR(); + case Codec::kGIF: + return CanDecodeGIF(); + case Codec::kJPG: + return CanDecodeJPG(); + case Codec::kPNG: + return CanDecodeAPNG(); + case Codec::kPNM: + case Codec::kPGX: + case Codec::kJXL: + return true; + default: + return false; + } +} + +Status DecodeBytes(const Span bytes, + const ColorHints& color_hints, extras::PackedPixelFile* ppf, + const SizeConstraints* constraints, Codec* orig_codec) { + if (bytes.size() < kMinBytes) return JXL_FAILURE("Too few bytes"); + + *ppf = extras::PackedPixelFile(); + + // Default values when not set by decoders. + ppf->info.uses_original_profile = true; + ppf->info.orientation = JXL_ORIENT_IDENTITY; + + const auto choose_codec = [&]() -> Codec { + if (DecodeImageAPNG(bytes, color_hints, ppf, constraints)) { + return Codec::kPNG; + } + if (DecodeImagePGX(bytes, color_hints, ppf, constraints)) { + return Codec::kPGX; + } + if (DecodeImagePNM(bytes, color_hints, ppf, constraints)) { + return Codec::kPNM; + } + JXLDecompressParams dparams = {}; + for (const uint32_t num_channels : {1, 2, 3, 4}) { + dparams.accepted_formats.push_back( + {num_channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, /*align=*/0}); + } + size_t decoded_bytes; + if (DecodeImageJXL(bytes.data(), bytes.size(), dparams, &decoded_bytes, + ppf) && + ApplyColorHints(color_hints, true, ppf->info.num_color_channels == 1, + ppf)) { + return Codec::kJXL; + } + if (DecodeImageGIF(bytes, color_hints, ppf, constraints)) { + return Codec::kGIF; + } + if (DecodeImageJPG(bytes, color_hints, ppf, constraints)) { + return Codec::kJPG; + } + if (DecodeImageEXR(bytes, color_hints, ppf, constraints)) { + return Codec::kEXR; + } + return Codec::kUnknown; + }; + + Codec codec = choose_codec(); + if (codec == Codec::kUnknown) { + return JXL_FAILURE("Codecs failed to decode"); + } + if (orig_codec) *orig_codec = codec; + + return true; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/decode.h b/third-party/libjxl/libjxl/lib/extras/dec/decode.h new file mode 100644 index 0000000000..954e54551e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/decode.h @@ -0,0 +1,58 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_DECODE_H_ +#define LIB_EXTRAS_DEC_DECODE_H_ + +// Facade for image decoders (PNG, PNM, ...). + +#include +#include + +#include +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +// Codecs supported by DecodeBytes. +enum class Codec : uint32_t { + kUnknown, // for CodecFromPath + kPNG, + kPNM, + kPGX, + kJPG, + kGIF, + kEXR, + kJXL +}; + +bool CanDecode(Codec codec); + +// If and only if extension is ".pfm", *bits_per_sample is updated to 32 so +// that Encode() would encode to PFM instead of PPM. +Codec CodecFromPath(std::string path, + size_t* JXL_RESTRICT bits_per_sample = nullptr, + std::string* basename = nullptr, + std::string* extension = nullptr); + +// Decodes "bytes" info *ppf. +// color_space_hint may specify the color space, otherwise, defaults to sRGB. +Status DecodeBytes(Span bytes, const ColorHints& color_hints, + extras::PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr, + Codec* orig_codec = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_DECODE_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/exr.cc b/third-party/libjxl/libjxl/lib/extras/dec/exr.cc new file mode 100644 index 0000000000..821e0f4b21 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/exr.cc @@ -0,0 +1,201 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/exr.h" + +#if JPEGXL_ENABLE_EXR +#include +#include +#include +#include +#endif + +#include + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_EXR +namespace { + +namespace OpenEXR = OPENEXR_IMF_NAMESPACE; + +// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using +// uint64_t as recommended causes build failures with previous OpenEXR versions +// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent +// to uint64_t. This alternative should work in all cases. +using ExrInt64 = decltype(std::declval().tellg()); + +constexpr int kExrBitsPerSample = 16; +constexpr int kExrAlphaBits = 16; + +class InMemoryIStream : public OpenEXR::IStream { + public: + // The data pointed to by `bytes` must outlive the InMemoryIStream. + explicit InMemoryIStream(const Span bytes) + : IStream(/*fileName=*/""), bytes_(bytes) {} + + bool isMemoryMapped() const override { return true; } + char* readMemoryMapped(const int n) override { + JXL_ASSERT(pos_ + n <= bytes_.size()); + char* const result = + const_cast(reinterpret_cast(bytes_.data() + pos_)); + pos_ += n; + return result; + } + bool read(char c[], const int n) override { + std::copy_n(readMemoryMapped(n), n, c); + return pos_ < bytes_.size(); + } + + ExrInt64 tellg() override { return pos_; } + void seekg(const ExrInt64 pos) override { + JXL_ASSERT(pos + 1 <= bytes_.size()); + pos_ = pos; + } + + private: + const Span bytes_; + size_t pos_ = 0; +}; + +} // namespace +#endif + +bool CanDecodeEXR() { +#if JPEGXL_ENABLE_EXR + return true; +#else + return false; +#endif +} + +Status DecodeImageEXR(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints) { +#if JPEGXL_ENABLE_EXR + InMemoryIStream is(bytes); + +#ifdef __EXCEPTIONS + std::unique_ptr input_ptr; + try { + input_ptr.reset(new OpenEXR::RgbaInputFile(is)); + } catch (...) { + // silently return false if it is not an EXR file + return false; + } + OpenEXR::RgbaInputFile& input = *input_ptr; +#else + OpenEXR::RgbaInputFile input(is); +#endif + + if ((input.channels() & OpenEXR::RgbaChannels::WRITE_RGB) != + OpenEXR::RgbaChannels::WRITE_RGB) { + return JXL_FAILURE("only RGB OpenEXR files are supported"); + } + const bool has_alpha = (input.channels() & OpenEXR::RgbaChannels::WRITE_A) == + OpenEXR::RgbaChannels::WRITE_A; + + const float intensity_target = OpenEXR::hasWhiteLuminance(input.header()) + ? OpenEXR::whiteLuminance(input.header()) + : 0; + + auto image_size = input.displayWindow().size(); + // Size is computed as max - min, but both bounds are inclusive. + ++image_size.x; + ++image_size.y; + + ppf->info.xsize = image_size.x; + ppf->info.ysize = image_size.y; + ppf->info.num_color_channels = 3; + + const JxlDataType data_type = + kExrBitsPerSample == 16 ? JXL_TYPE_FLOAT16 : JXL_TYPE_FLOAT; + const JxlPixelFormat format{ + /*num_channels=*/3u + (has_alpha ? 1u : 0u), + /*data_type=*/data_type, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0, + }; + ppf->frames.clear(); + // Allocates the frame buffer. + ppf->frames.emplace_back(image_size.x, image_size.y, format); + const auto& frame = ppf->frames.back(); + + const int row_size = input.dataWindow().size().x + 1; + // Number of rows to read at a time. + // https://www.openexr.com/documentation/ReadingAndWritingImageFiles.pdf + // recommends reading the whole file at once. + const int y_chunk_size = input.displayWindow().size().y + 1; + std::vector input_rows(row_size * y_chunk_size); + for (int start_y = + std::max(input.dataWindow().min.y, input.displayWindow().min.y); + start_y <= + std::min(input.dataWindow().max.y, input.displayWindow().max.y); + start_y += y_chunk_size) { + // Inclusive. + const int end_y = std::min( + start_y + y_chunk_size - 1, + std::min(input.dataWindow().max.y, input.displayWindow().max.y)); + input.setFrameBuffer( + input_rows.data() - input.dataWindow().min.x - start_y * row_size, + /*xStride=*/1, /*yStride=*/row_size); + input.readPixels(start_y, end_y); + for (int exr_y = start_y; exr_y <= end_y; ++exr_y) { + const int image_y = exr_y - input.displayWindow().min.y; + const OpenEXR::Rgba* const JXL_RESTRICT input_row = + &input_rows[(exr_y - start_y) * row_size]; + uint8_t* row = static_cast(frame.color.pixels()) + + frame.color.stride * image_y; + const uint32_t pixel_size = + (3 + (has_alpha ? 1 : 0)) * kExrBitsPerSample / 8; + for (int exr_x = + std::max(input.dataWindow().min.x, input.displayWindow().min.x); + exr_x <= + std::min(input.dataWindow().max.x, input.displayWindow().max.x); + ++exr_x) { + const int image_x = exr_x - input.displayWindow().min.x; + // TODO(eustas): UB: OpenEXR::Rgba is not TriviallyCopyable + memcpy(row + image_x * pixel_size, + input_row + (exr_x - input.dataWindow().min.x), pixel_size); + } + } + } + + ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_LINEAR; + ppf->color_encoding.color_space = JXL_COLOR_SPACE_RGB; + ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB; + ppf->color_encoding.white_point = JXL_WHITE_POINT_D65; + if (OpenEXR::hasChromaticities(input.header())) { + ppf->color_encoding.primaries = JXL_PRIMARIES_CUSTOM; + ppf->color_encoding.white_point = JXL_WHITE_POINT_CUSTOM; + const auto& chromaticities = OpenEXR::chromaticities(input.header()); + ppf->color_encoding.primaries_red_xy[0] = chromaticities.red.x; + ppf->color_encoding.primaries_red_xy[1] = chromaticities.red.y; + ppf->color_encoding.primaries_green_xy[0] = chromaticities.green.x; + ppf->color_encoding.primaries_green_xy[1] = chromaticities.green.y; + ppf->color_encoding.primaries_blue_xy[0] = chromaticities.blue.x; + ppf->color_encoding.primaries_blue_xy[1] = chromaticities.blue.y; + ppf->color_encoding.white_point_xy[0] = chromaticities.white.x; + ppf->color_encoding.white_point_xy[1] = chromaticities.white.y; + } + + // EXR uses binary16 or binary32 floating point format. + ppf->info.bits_per_sample = kExrBitsPerSample; + ppf->info.exponent_bits_per_sample = kExrBitsPerSample == 16 ? 5 : 8; + if (has_alpha) { + ppf->info.alpha_bits = kExrAlphaBits; + ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample; + ppf->info.alpha_premultiplied = true; + } + ppf->info.intensity_target = intensity_target; + return true; +#else + return false; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/exr.h b/third-party/libjxl/libjxl/lib/extras/dec/exr.h new file mode 100644 index 0000000000..3a15e086df --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/exr.h @@ -0,0 +1,34 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_EXR_H_ +#define LIB_EXTRAS_DEC_EXR_H_ + +// Decodes OpenEXR images in memory. + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +bool CanDecodeEXR(); + +// Decodes `bytes` into `ppf`. color_hints are ignored. +Status DecodeImageEXR(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_EXR_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/gif.cc b/third-party/libjxl/libjxl/lib/extras/dec/gif.cc new file mode 100644 index 0000000000..3d963941c0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/gif.cc @@ -0,0 +1,415 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/gif.h" + +#if JPEGXL_ENABLE_GIF +#include +#endif +#include +#include + +#include +#include +#include + +#include "lib/extras/size_constraints.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_GIF +namespace { + +struct ReadState { + Span bytes; +}; + +struct DGifCloser { + void operator()(GifFileType* const ptr) const { DGifCloseFile(ptr, nullptr); } +}; +using GifUniquePtr = std::unique_ptr; + +struct PackedRgba { + uint8_t r, g, b, a; +}; + +struct PackedRgb { + uint8_t r, g, b; +}; + +void ensure_have_alpha(PackedFrame* frame) { + if (!frame->extra_channels.empty()) return; + const JxlPixelFormat alpha_format{ + /*num_channels=*/1u, + /*data_type=*/JXL_TYPE_UINT8, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0, + }; + frame->extra_channels.emplace_back(frame->color.xsize, frame->color.ysize, + alpha_format); + // We need to set opaque-by-default. + std::fill_n(static_cast(frame->extra_channels[0].pixels()), + frame->color.xsize * frame->color.ysize, 255u); +} +} // namespace +#endif + +bool CanDecodeGIF() { +#if JPEGXL_ENABLE_GIF + return true; +#else + return false; +#endif +} + +Status DecodeImageGIF(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints) { +#if JPEGXL_ENABLE_GIF + int error = GIF_OK; + ReadState state = {bytes}; + const auto ReadFromSpan = [](GifFileType* const gif, GifByteType* const bytes, + int n) { + ReadState* const state = reinterpret_cast(gif->UserData); + // giflib API requires the input size `n` to be signed int. + if (static_cast(n) > state->bytes.size()) { + n = state->bytes.size(); + } + memcpy(bytes, state->bytes.data(), n); + state->bytes.remove_prefix(n); + return n; + }; + GifUniquePtr gif(DGifOpen(&state, ReadFromSpan, &error)); + if (gif == nullptr) { + if (error == D_GIF_ERR_NOT_GIF_FILE) { + // Not an error. + return false; + } else { + return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(error)); + } + } + error = DGifSlurp(gif.get()); + if (error != GIF_OK) { + return JXL_FAILURE("Failed to read GIF: %s", GifErrorString(gif->Error)); + } + + msan::UnpoisonMemory(gif.get(), sizeof(*gif)); + if (gif->SColorMap) { + msan::UnpoisonMemory(gif->SColorMap, sizeof(*gif->SColorMap)); + msan::UnpoisonMemory( + gif->SColorMap->Colors, + sizeof(*gif->SColorMap->Colors) * gif->SColorMap->ColorCount); + } + msan::UnpoisonMemory(gif->SavedImages, + sizeof(*gif->SavedImages) * gif->ImageCount); + + JXL_RETURN_IF_ERROR( + VerifyDimensions(constraints, gif->SWidth, gif->SHeight)); + uint64_t total_pixel_count = + static_cast(gif->SWidth) * gif->SHeight; + for (int i = 0; i < gif->ImageCount; ++i) { + const SavedImage& image = gif->SavedImages[i]; + uint32_t w = image.ImageDesc.Width; + uint32_t h = image.ImageDesc.Height; + JXL_RETURN_IF_ERROR(VerifyDimensions(constraints, w, h)); + uint64_t pixel_count = static_cast(w) * h; + if (total_pixel_count + pixel_count < total_pixel_count) { + return JXL_FAILURE("Image too big"); + } + total_pixel_count += pixel_count; + if (constraints && (total_pixel_count > constraints->dec_max_pixels)) { + return JXL_FAILURE("Image too big"); + } + } + + if (!gif->SColorMap) { + for (int i = 0; i < gif->ImageCount; ++i) { + if (!gif->SavedImages[i].ImageDesc.ColorMap) { + return JXL_FAILURE("Missing GIF color map"); + } + } + } + + if (gif->ImageCount > 1) { + ppf->info.have_animation = true; + // Delays in GIF are specified in 100ths of a second. + ppf->info.animation.tps_numerator = 100; + ppf->info.animation.tps_denominator = 1; + } + + ppf->frames.clear(); + ppf->frames.reserve(gif->ImageCount); + + ppf->info.xsize = gif->SWidth; + ppf->info.ysize = gif->SHeight; + ppf->info.bits_per_sample = 8; + ppf->info.exponent_bits_per_sample = 0; + // alpha_bits is later set to 8 if we find a frame with transparent pixels. + ppf->info.alpha_bits = 0; + ppf->info.alpha_exponent_bits = 0; + JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false, + /*is_gray=*/false, ppf)); + + ppf->info.num_color_channels = 3; + + // Pixel format for the 'canvas' onto which we paint + // the (potentially individually cropped) GIF frames + // of an animation. + const JxlPixelFormat canvas_format{ + /*num_channels=*/4u, + /*data_type=*/JXL_TYPE_UINT8, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0, + }; + + // Pixel format for the JXL PackedFrame that goes into the + // PackedPixelFile. Here, we use 3 color channels, and provide + // the alpha channel as an extra_channel wherever it is used. + const JxlPixelFormat packed_frame_format{ + /*num_channels=*/3u, + /*data_type=*/JXL_TYPE_UINT8, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0, + }; + + GifColorType background_color; + if (gif->SColorMap == nullptr || + gif->SBackGroundColor >= gif->SColorMap->ColorCount) { + background_color = {0, 0, 0}; + } else { + background_color = gif->SColorMap->Colors[gif->SBackGroundColor]; + } + const PackedRgba background_rgba{background_color.Red, background_color.Green, + background_color.Blue, 0}; + PackedFrame canvas(gif->SWidth, gif->SHeight, canvas_format); + std::fill_n(static_cast(canvas.color.pixels()), + canvas.color.xsize * canvas.color.ysize, background_rgba); + Rect canvas_rect{0, 0, canvas.color.xsize, canvas.color.ysize}; + + Rect previous_rect_if_restore_to_background; + + bool replace = true; + bool last_base_was_none = true; + for (int i = 0; i < gif->ImageCount; ++i) { + const SavedImage& image = gif->SavedImages[i]; + msan::UnpoisonMemory(image.RasterBits, sizeof(*image.RasterBits) * + image.ImageDesc.Width * + image.ImageDesc.Height); + const Rect image_rect(image.ImageDesc.Left, image.ImageDesc.Top, + image.ImageDesc.Width, image.ImageDesc.Height); + + Rect total_rect; + if (previous_rect_if_restore_to_background.xsize() != 0 || + previous_rect_if_restore_to_background.ysize() != 0) { + const size_t xbegin = std::min( + image_rect.x0(), previous_rect_if_restore_to_background.x0()); + const size_t ybegin = std::min( + image_rect.y0(), previous_rect_if_restore_to_background.y0()); + const size_t xend = + std::max(image_rect.x0() + image_rect.xsize(), + previous_rect_if_restore_to_background.x0() + + previous_rect_if_restore_to_background.xsize()); + const size_t yend = + std::max(image_rect.y0() + image_rect.ysize(), + previous_rect_if_restore_to_background.y0() + + previous_rect_if_restore_to_background.ysize()); + total_rect = Rect(xbegin, ybegin, xend - xbegin, yend - ybegin); + previous_rect_if_restore_to_background = Rect(); + replace = true; + } else { + total_rect = image_rect; + replace = false; + } + if (!image_rect.IsInside(canvas_rect)) { + return JXL_FAILURE("GIF frame extends outside of the canvas"); + } + + // Allocates the frame buffer. + ppf->frames.emplace_back(total_rect.xsize(), total_rect.ysize(), + packed_frame_format); + PackedFrame* frame = &ppf->frames.back(); + + // We cannot tell right from the start whether there will be a + // need for an alpha channel. This is discovered only as soon as + // we see a transparent pixel. We hence initialize alpha lazily. + auto set_pixel_alpha = [&frame](size_t x, size_t y, uint8_t a) { + // If we do not have an alpha-channel and a==255 (fully opaque), + // we can skip setting this pixel-value and rely on + // "no alpha channel = no transparency". + if (a == 255 && !frame->extra_channels.empty()) return; + ensure_have_alpha(frame); + static_cast( + frame->extra_channels[0].pixels())[y * frame->color.xsize + x] = a; + }; + + const ColorMapObject* const color_map = + image.ImageDesc.ColorMap ? image.ImageDesc.ColorMap : gif->SColorMap; + JXL_CHECK(color_map); + msan::UnpoisonMemory(color_map, sizeof(*color_map)); + msan::UnpoisonMemory(color_map->Colors, + sizeof(*color_map->Colors) * color_map->ColorCount); + GraphicsControlBlock gcb; + DGifSavedExtensionToGCB(gif.get(), i, &gcb); + msan::UnpoisonMemory(&gcb, sizeof(gcb)); + bool is_full_size = total_rect.x0() == 0 && total_rect.y0() == 0 && + total_rect.xsize() == canvas.color.xsize && + total_rect.ysize() == canvas.color.ysize; + if (ppf->info.have_animation) { + frame->frame_info.duration = gcb.DelayTime; + frame->frame_info.layer_info.have_crop = static_cast(!is_full_size); + frame->frame_info.layer_info.crop_x0 = total_rect.x0(); + frame->frame_info.layer_info.crop_y0 = total_rect.y0(); + frame->frame_info.layer_info.xsize = frame->color.xsize; + frame->frame_info.layer_info.ysize = frame->color.ysize; + if (last_base_was_none) { + replace = true; + } + frame->frame_info.layer_info.blend_info.blendmode = + replace ? JXL_BLEND_REPLACE : JXL_BLEND_BLEND; + // We always only reference at most the last frame + frame->frame_info.layer_info.blend_info.source = + last_base_was_none ? 0u : 1u; + frame->frame_info.layer_info.blend_info.clamp = 1; + frame->frame_info.layer_info.blend_info.alpha = 0; + // TODO(veluca): this could in principle be implemented. + if (last_base_was_none && + (total_rect.x0() != 0 || total_rect.y0() != 0 || + total_rect.xsize() != canvas.color.xsize || + total_rect.ysize() != canvas.color.ysize || !replace)) { + return JXL_FAILURE( + "GIF with dispose-to-0 is not supported for non-full or " + "blended frames"); + } + switch (gcb.DisposalMode) { + case DISPOSE_DO_NOT: + case DISPOSE_BACKGROUND: + frame->frame_info.layer_info.save_as_reference = 1u; + last_base_was_none = false; + break; + case DISPOSE_PREVIOUS: + frame->frame_info.layer_info.save_as_reference = 0u; + break; + default: + frame->frame_info.layer_info.save_as_reference = 0u; + last_base_was_none = true; + } + } + + // Update the canvas by creating a copy first. + PackedImage new_canvas_image(canvas.color.xsize, canvas.color.ysize, + canvas.color.format); + memcpy(new_canvas_image.pixels(), canvas.color.pixels(), + new_canvas_image.pixels_size); + for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) { + // Assumes format.align == 0. row points to the beginning of the y row in + // the image_rect. + PackedRgba* row = static_cast(new_canvas_image.pixels()) + + (y + image_rect.y0()) * new_canvas_image.xsize + + image_rect.x0(); + for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) { + const GifByteType byte = image.RasterBits[byte_index]; + if (byte >= color_map->ColorCount) { + return JXL_FAILURE("GIF color is out of bounds"); + } + + if (byte == gcb.TransparentColor) continue; + GifColorType color = color_map->Colors[byte]; + row[x].r = color.Red; + row[x].g = color.Green; + row[x].b = color.Blue; + row[x].a = 255; + } + } + const PackedImage& sub_frame_image = frame->color; + if (replace) { + // Copy from the new canvas image to the subframe + for (size_t y = 0; y < total_rect.ysize(); ++y) { + const PackedRgba* row_in = + static_cast(new_canvas_image.pixels()) + + (y + total_rect.y0()) * new_canvas_image.xsize + total_rect.x0(); + PackedRgb* row_out = static_cast(sub_frame_image.pixels()) + + y * sub_frame_image.xsize; + for (size_t x = 0; x < sub_frame_image.xsize; ++x) { + row_out[x].r = row_in[x].r; + row_out[x].g = row_in[x].g; + row_out[x].b = row_in[x].b; + set_pixel_alpha(x, y, row_in[x].a); + } + } + } else { + for (size_t y = 0, byte_index = 0; y < image_rect.ysize(); ++y) { + // Assumes format.align == 0 + PackedRgb* row = static_cast(sub_frame_image.pixels()) + + y * sub_frame_image.xsize; + for (size_t x = 0; x < image_rect.xsize(); ++x, ++byte_index) { + const GifByteType byte = image.RasterBits[byte_index]; + if (byte > color_map->ColorCount) { + return JXL_FAILURE("GIF color is out of bounds"); + } + if (byte == gcb.TransparentColor) { + row[x].r = 0; + row[x].g = 0; + row[x].b = 0; + set_pixel_alpha(x, y, 0); + continue; + } + GifColorType color = color_map->Colors[byte]; + row[x].r = color.Red; + row[x].g = color.Green; + row[x].b = color.Blue; + set_pixel_alpha(x, y, 255); + } + } + } + + if (!frame->extra_channels.empty()) { + ppf->info.alpha_bits = 8; + } + + switch (gcb.DisposalMode) { + case DISPOSE_DO_NOT: + canvas.color = std::move(new_canvas_image); + break; + + case DISPOSE_BACKGROUND: + std::fill_n(static_cast(canvas.color.pixels()), + canvas.color.xsize * canvas.color.ysize, background_rgba); + previous_rect_if_restore_to_background = image_rect; + break; + + case DISPOSE_PREVIOUS: + break; + + case DISPOSAL_UNSPECIFIED: + default: + std::fill_n(static_cast(canvas.color.pixels()), + canvas.color.xsize * canvas.color.ysize, background_rgba); + } + } + // Finally, if any frame has an alpha-channel, every frame will need + // to have an alpha-channel. + bool seen_alpha = false; + for (const PackedFrame& frame : ppf->frames) { + if (!frame.extra_channels.empty()) { + seen_alpha = true; + break; + } + } + if (seen_alpha) { + for (PackedFrame& frame : ppf->frames) { + ensure_have_alpha(&frame); + } + } + return true; +#else + return false; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/gif.h b/third-party/libjxl/libjxl/lib/extras/dec/gif.h new file mode 100644 index 0000000000..4d5be8664e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/gif.h @@ -0,0 +1,35 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_GIF_H_ +#define LIB_EXTRAS_DEC_GIF_H_ + +// Decodes GIF images in memory. + +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +bool CanDecodeGIF(); + +// Decodes `bytes` into `ppf`. color_hints are ignored. +Status DecodeImageGIF(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_GIF_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpegli.cc b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.cc new file mode 100644 index 0000000000..ffa1b79c25 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.cc @@ -0,0 +1,271 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/jpegli.h" + +#include +#include + +#include +#include +#include +#include + +#include "lib/jpegli/decode.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { +namespace extras { + +namespace { + +constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69, + 0x66, 0x00, 0x00}; +constexpr int kExifMarker = JPEG_APP0 + 1; +constexpr int kICCMarker = JPEG_APP0 + 2; + +static inline bool IsJPG(const std::vector& bytes) { + if (bytes.size() < 2) return false; + if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false; + return true; +} + +bool MarkerIsExif(const jpeg_saved_marker_ptr marker) { + return marker->marker == kExifMarker && + marker->data_length >= sizeof kExifSignature + 2 && + std::equal(std::begin(kExifSignature), std::end(kExifSignature), + marker->data); +} + +Status ReadICCProfile(jpeg_decompress_struct* const cinfo, + std::vector* const icc) { + uint8_t* icc_data_ptr; + unsigned int icc_data_len; + if (jpegli_read_icc_profile(cinfo, &icc_data_ptr, &icc_data_len)) { + icc->assign(icc_data_ptr, icc_data_ptr + icc_data_len); + free(icc_data_ptr); + return true; + } + return false; +} + +void ReadExif(jpeg_decompress_struct* const cinfo, + std::vector* const exif) { + constexpr size_t kExifSignatureSize = sizeof kExifSignature; + for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr; + marker = marker->next) { + // marker is initialized by libjpeg, which we are not instrumenting with + // msan. + msan::UnpoisonMemory(marker, sizeof(*marker)); + msan::UnpoisonMemory(marker->data, marker->data_length); + if (!MarkerIsExif(marker)) continue; + size_t marker_length = marker->data_length - kExifSignatureSize; + exif->resize(marker_length); + std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data()); + return; + } +} + +JpegliDataType ConvertDataType(JxlDataType type) { + switch (type) { + case JXL_TYPE_UINT8: + return JPEGLI_TYPE_UINT8; + case JXL_TYPE_UINT16: + return JPEGLI_TYPE_UINT16; + case JXL_TYPE_FLOAT: + return JPEGLI_TYPE_FLOAT; + default: + return JPEGLI_TYPE_UINT8; + } +} + +JpegliEndianness ConvertEndianness(JxlEndianness type) { + switch (type) { + case JXL_NATIVE_ENDIAN: + return JPEGLI_NATIVE_ENDIAN; + case JXL_BIG_ENDIAN: + return JPEGLI_BIG_ENDIAN; + case JXL_LITTLE_ENDIAN: + return JPEGLI_LITTLE_ENDIAN; + default: + return JPEGLI_NATIVE_ENDIAN; + } +} + +JxlColorSpace ConvertColorSpace(J_COLOR_SPACE colorspace) { + switch (colorspace) { + case JCS_GRAYSCALE: + return JXL_COLOR_SPACE_GRAY; + case JCS_RGB: + return JXL_COLOR_SPACE_RGB; + default: + return JXL_COLOR_SPACE_UNKNOWN; + } +} + +void MyErrorExit(j_common_ptr cinfo) { + jmp_buf* env = static_cast(cinfo->client_data); + (*cinfo->err->output_message)(cinfo); + jpegli_destroy_decompress(reinterpret_cast(cinfo)); + longjmp(*env, 1); +} + +void MyOutputMessage(j_common_ptr cinfo) { +#if JXL_DEBUG_WARNING == 1 + char buf[JMSG_LENGTH_MAX + 1]; + (*cinfo->err->format_message)(cinfo, buf); + buf[JMSG_LENGTH_MAX] = 0; + JXL_WARNING("%s", buf); +#endif +} + +void UnmapColors(uint8_t* row, size_t xsize, int components, + JSAMPARRAY colormap, size_t num_colors) { + JXL_CHECK(colormap != nullptr); + std::vector tmp(xsize * components); + for (size_t x = 0; x < xsize; ++x) { + JXL_CHECK(row[x] < num_colors); + for (int c = 0; c < components; ++c) { + tmp[x * components + c] = colormap[c][row[x]]; + } + } + memcpy(row, tmp.data(), tmp.size()); +} + +} // namespace + +Status DecodeJpeg(const std::vector& compressed, + const JpegDecompressParams& dparams, ThreadPool* pool, + PackedPixelFile* ppf) { + // Don't do anything for non-JPEG files (no need to report an error) + if (!IsJPG(compressed)) return false; + + // TODO(veluca): use JPEGData also for pixels? + + // We need to declare all the non-trivial destructor local variables before + // the call to setjmp(). + std::unique_ptr row; + + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + // Setup error handling in jpeg library so we can deal with broken jpegs in + // the fuzzer. + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpegli_std_error(&jerr); + jerr.error_exit = &MyErrorExit; + jerr.output_message = &MyOutputMessage; + if (setjmp(env)) { + return false; + } + cinfo.client_data = static_cast(&env); + + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, + reinterpret_cast(compressed.data()), + compressed.size()); + jpegli_save_markers(&cinfo, kICCMarker, 0xFFFF); + jpegli_save_markers(&cinfo, kExifMarker, 0xFFFF); + const auto failure = [&cinfo](const char* str) -> Status { + jpegli_abort_decompress(&cinfo); + jpegli_destroy_decompress(&cinfo); + return JXL_FAILURE("%s", str); + }; + jpegli_read_header(&cinfo, TRUE); + // Might cause CPU-zip bomb. + if (cinfo.arith_code) { + return failure("arithmetic code JPEGs are not supported"); + } + int nbcomp = cinfo.num_components; + if (nbcomp != 1 && nbcomp != 3) { + return failure("unsupported number of components in JPEG"); + } + if (dparams.force_rgb) { + cinfo.out_color_space = JCS_RGB; + } else if (dparams.force_grayscale) { + cinfo.out_color_space = JCS_GRAYSCALE; + } + if (!ReadICCProfile(&cinfo, &ppf->icc)) { + ppf->icc.clear(); + // Default to SRGB + ppf->color_encoding.color_space = + ConvertColorSpace(cinfo.out_color_space); + ppf->color_encoding.white_point = JXL_WHITE_POINT_D65; + ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB; + ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB; + ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL; + } + ReadExif(&cinfo, &ppf->metadata.exif); + + ppf->info.xsize = cinfo.image_width; + ppf->info.ysize = cinfo.image_height; + if (dparams.output_data_type == JXL_TYPE_UINT8) { + ppf->info.bits_per_sample = 8; + ppf->info.exponent_bits_per_sample = 0; + } else if (dparams.output_data_type == JXL_TYPE_UINT16) { + ppf->info.bits_per_sample = 16; + ppf->info.exponent_bits_per_sample = 0; + } else if (dparams.output_data_type == JXL_TYPE_FLOAT) { + ppf->info.bits_per_sample = 32; + ppf->info.exponent_bits_per_sample = 8; + } else { + return failure("unsupported data type"); + } + ppf->info.uses_original_profile = true; + + // No alpha in JPG + ppf->info.alpha_bits = 0; + ppf->info.alpha_exponent_bits = 0; + ppf->info.orientation = JXL_ORIENT_IDENTITY; + + jpegli_set_output_format(&cinfo, ConvertDataType(dparams.output_data_type), + ConvertEndianness(dparams.output_endianness)); + + if (dparams.num_colors > 0) { + cinfo.quantize_colors = TRUE; + cinfo.desired_number_of_colors = dparams.num_colors; + cinfo.two_pass_quantize = dparams.two_pass_quant; + cinfo.dither_mode = (J_DITHER_MODE)dparams.dither_mode; + } + + jpegli_start_decompress(&cinfo); + + ppf->info.num_color_channels = cinfo.out_color_components; + const JxlPixelFormat format{ + /*num_channels=*/static_cast(cinfo.out_color_components), + dparams.output_data_type, + dparams.output_endianness, + /*align=*/0, + }; + ppf->frames.clear(); + // Allocates the frame buffer. + ppf->frames.emplace_back(cinfo.image_width, cinfo.image_height, format); + const auto& frame = ppf->frames.back(); + JXL_ASSERT(sizeof(JSAMPLE) * cinfo.out_color_components * + cinfo.image_width <= + frame.color.stride); + + for (size_t y = 0; y < cinfo.image_height; ++y) { + JSAMPROW rows[] = {reinterpret_cast( + static_cast(frame.color.pixels()) + + frame.color.stride * y)}; + jpegli_read_scanlines(&cinfo, rows, 1); + if (dparams.num_colors > 0) { + UnmapColors(rows[0], cinfo.output_width, cinfo.out_color_components, + cinfo.colormap, cinfo.actual_number_of_colors); + } + } + + jpegli_finish_decompress(&cinfo); + return true; + }; + bool success = try_catch_block(); + jpegli_destroy_decompress(&cinfo); + return success; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpegli.h b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.h new file mode 100644 index 0000000000..574df54c8e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/jpegli.h @@ -0,0 +1,41 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_JPEGLI_H_ +#define LIB_EXTRAS_DEC_JPEGLI_H_ + +// Decodes JPG pixels and metadata in memory using the libjpegli library. + +#include +#include + +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace extras { + +struct JpegDecompressParams { + JxlDataType output_data_type = JXL_TYPE_UINT8; + JxlEndianness output_endianness = JXL_NATIVE_ENDIAN; + bool force_rgb = false; + bool force_grayscale = false; + int num_colors = 0; + bool two_pass_quant = true; + // 0 = none, 1 = ordered, 2 = Floyd-Steinberg + int dither_mode = 2; +}; + +Status DecodeJpeg(const std::vector& compressed, + const JpegDecompressParams& dparams, ThreadPool* pool, + PackedPixelFile* ppf); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_JPEGLI_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpg.cc b/third-party/libjxl/libjxl/lib/extras/dec/jpg.cc new file mode 100644 index 0000000000..3c8a4bccfe --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/jpg.cc @@ -0,0 +1,338 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/jpg.h" + +#if JPEGXL_ENABLE_JPEG +#include +#include +#endif +#include + +#include +#include +#include +#include + +#include "lib/extras/size_constraints.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_JPEG +namespace { + +constexpr unsigned char kICCSignature[12] = { + 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00}; +constexpr int kICCMarker = JPEG_APP0 + 2; + +constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69, + 0x66, 0x00, 0x00}; +constexpr int kExifMarker = JPEG_APP0 + 1; + +static inline bool IsJPG(const Span bytes) { + if (bytes.size() < 2) return false; + if (bytes[0] != 0xFF || bytes[1] != 0xD8) return false; + return true; +} + +bool MarkerIsICC(const jpeg_saved_marker_ptr marker) { + return marker->marker == kICCMarker && + marker->data_length >= sizeof kICCSignature + 2 && + std::equal(std::begin(kICCSignature), std::end(kICCSignature), + marker->data); +} +bool MarkerIsExif(const jpeg_saved_marker_ptr marker) { + return marker->marker == kExifMarker && + marker->data_length >= sizeof kExifSignature + 2 && + std::equal(std::begin(kExifSignature), std::end(kExifSignature), + marker->data); +} + +Status ReadICCProfile(jpeg_decompress_struct* const cinfo, + std::vector* const icc) { + constexpr size_t kICCSignatureSize = sizeof kICCSignature; + // ICC signature + uint8_t index + uint8_t max_index. + constexpr size_t kICCHeadSize = kICCSignatureSize + 2; + // Markers are 1-indexed, and we keep them that way in this vector to get a + // convenient 0 at the front for when we compute the offsets later. + std::vector marker_lengths; + int num_markers = 0; + int seen_markers_count = 0; + bool has_num_markers = false; + for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr; + marker = marker->next) { + // marker is initialized by libjpeg, which we are not instrumenting with + // msan. + msan::UnpoisonMemory(marker, sizeof(*marker)); + msan::UnpoisonMemory(marker->data, marker->data_length); + if (!MarkerIsICC(marker)) continue; + + const int current_marker = marker->data[kICCSignatureSize]; + if (current_marker == 0) { + return JXL_FAILURE("inconsistent JPEG ICC marker numbering"); + } + const int current_num_markers = marker->data[kICCSignatureSize + 1]; + if (current_marker > current_num_markers) { + return JXL_FAILURE("inconsistent JPEG ICC marker numbering"); + } + if (has_num_markers) { + if (current_num_markers != num_markers) { + return JXL_FAILURE("inconsistent numbers of JPEG ICC markers"); + } + } else { + num_markers = current_num_markers; + has_num_markers = true; + marker_lengths.resize(num_markers + 1); + } + + size_t marker_length = marker->data_length - kICCHeadSize; + + if (marker_length == 0) { + // NB: if we allow empty chunks, then the next check is incorrect. + return JXL_FAILURE("Empty ICC chunk"); + } + + if (marker_lengths[current_marker] != 0) { + return JXL_FAILURE("duplicate JPEG ICC marker number"); + } + marker_lengths[current_marker] = marker_length; + seen_markers_count++; + } + + if (marker_lengths.empty()) { + // Not an error. + return false; + } + + if (seen_markers_count != num_markers) { + JXL_DASSERT(has_num_markers); + return JXL_FAILURE("Incomplete set of ICC chunks"); + } + + std::vector offsets = std::move(marker_lengths); + std::partial_sum(offsets.begin(), offsets.end(), offsets.begin()); + icc->resize(offsets.back()); + + for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr; + marker = marker->next) { + if (!MarkerIsICC(marker)) continue; + const uint8_t* first = marker->data + kICCHeadSize; + uint8_t current_marker = marker->data[kICCSignatureSize]; + size_t offset = offsets[current_marker - 1]; + size_t marker_length = offsets[current_marker] - offset; + std::copy_n(first, marker_length, icc->data() + offset); + } + + return true; +} + +void ReadExif(jpeg_decompress_struct* const cinfo, + std::vector* const exif) { + constexpr size_t kExifSignatureSize = sizeof kExifSignature; + for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr; + marker = marker->next) { + // marker is initialized by libjpeg, which we are not instrumenting with + // msan. + msan::UnpoisonMemory(marker, sizeof(*marker)); + msan::UnpoisonMemory(marker->data, marker->data_length); + if (!MarkerIsExif(marker)) continue; + size_t marker_length = marker->data_length - kExifSignatureSize; + exif->resize(marker_length); + std::copy_n(marker->data + kExifSignatureSize, marker_length, exif->data()); + return; + } +} + +void MyErrorExit(j_common_ptr cinfo) { + jmp_buf* env = static_cast(cinfo->client_data); + (*cinfo->err->output_message)(cinfo); + jpeg_destroy_decompress(reinterpret_cast(cinfo)); + longjmp(*env, 1); +} + +void MyOutputMessage(j_common_ptr cinfo) { +#if JXL_DEBUG_WARNING == 1 + char buf[JMSG_LENGTH_MAX + 1]; + (*cinfo->err->format_message)(cinfo, buf); + buf[JMSG_LENGTH_MAX] = 0; + JXL_WARNING("%s", buf); +#endif +} + +void UnmapColors(uint8_t* row, size_t xsize, int components, + JSAMPARRAY colormap, size_t num_colors) { + JXL_CHECK(colormap != nullptr); + std::vector tmp(xsize * components); + for (size_t x = 0; x < xsize; ++x) { + JXL_CHECK(row[x] < num_colors); + for (int c = 0; c < components; ++c) { + tmp[x * components + c] = colormap[c][row[x]]; + } + } + memcpy(row, tmp.data(), tmp.size()); +} + +} // namespace +#endif + +bool CanDecodeJPG() { +#if JPEGXL_ENABLE_JPEG + return true; +#else + return false; +#endif +} + +Status DecodeImageJPG(const Span bytes, + const ColorHints& color_hints, PackedPixelFile* ppf, + const SizeConstraints* constraints, + const JPGDecompressParams* dparams) { +#if JPEGXL_ENABLE_JPEG + // Don't do anything for non-JPEG files (no need to report an error) + if (!IsJPG(bytes)) return false; + + // TODO(veluca): use JPEGData also for pixels? + + // We need to declare all the non-trivial destructor local variables before + // the call to setjmp(). + std::unique_ptr row; + + const auto try_catch_block = [&]() -> bool { + jpeg_decompress_struct cinfo = {}; + // Setup error handling in jpeg library so we can deal with broken jpegs in + // the fuzzer. + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpeg_std_error(&jerr); + jerr.error_exit = &MyErrorExit; + jerr.output_message = &MyOutputMessage; + if (setjmp(env)) { + return false; + } + cinfo.client_data = static_cast(&env); + + jpeg_create_decompress(&cinfo); + jpeg_mem_src(&cinfo, reinterpret_cast(bytes.data()), + bytes.size()); + jpeg_save_markers(&cinfo, kICCMarker, 0xFFFF); + jpeg_save_markers(&cinfo, kExifMarker, 0xFFFF); + const auto failure = [&cinfo](const char* str) -> Status { + jpeg_abort_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + return JXL_FAILURE("%s", str); + }; + int read_header_result = jpeg_read_header(&cinfo, TRUE); + // TODO(eustas): what about JPEG_HEADER_TABLES_ONLY? + if (read_header_result == JPEG_SUSPENDED) { + return failure("truncated JPEG input"); + } + if (!VerifyDimensions(constraints, cinfo.image_width, cinfo.image_height)) { + return failure("image too big"); + } + // Might cause CPU-zip bomb. + if (cinfo.arith_code) { + return failure("arithmetic code JPEGs are not supported"); + } + int nbcomp = cinfo.num_components; + if (nbcomp != 1 && nbcomp != 3) { + return failure("unsupported number of components in JPEG"); + } + if (!ReadICCProfile(&cinfo, &ppf->icc)) { + ppf->icc.clear(); + // Default to SRGB + // Actually, (cinfo.output_components == nbcomp) will be checked after + // `jpeg_start_decompress`. + ppf->color_encoding.color_space = + (nbcomp == 1) ? JXL_COLOR_SPACE_GRAY : JXL_COLOR_SPACE_RGB; + ppf->color_encoding.white_point = JXL_WHITE_POINT_D65; + ppf->color_encoding.primaries = JXL_PRIMARIES_SRGB; + ppf->color_encoding.transfer_function = JXL_TRANSFER_FUNCTION_SRGB; + ppf->color_encoding.rendering_intent = JXL_RENDERING_INTENT_PERCEPTUAL; + } + ReadExif(&cinfo, &ppf->metadata.exif); + if (!ApplyColorHints(color_hints, /*color_already_set=*/true, + /*is_gray=*/false, ppf)) { + return failure("ApplyColorHints failed"); + } + + ppf->info.xsize = cinfo.image_width; + ppf->info.ysize = cinfo.image_height; + // Original data is uint, so exponent_bits_per_sample = 0. + ppf->info.bits_per_sample = BITS_IN_JSAMPLE; + JXL_ASSERT(BITS_IN_JSAMPLE == 8 || BITS_IN_JSAMPLE == 16); + ppf->info.exponent_bits_per_sample = 0; + ppf->info.uses_original_profile = true; + + // No alpha in JPG + ppf->info.alpha_bits = 0; + ppf->info.alpha_exponent_bits = 0; + + ppf->info.num_color_channels = nbcomp; + ppf->info.orientation = JXL_ORIENT_IDENTITY; + + if (dparams && dparams->num_colors > 0) { + cinfo.quantize_colors = TRUE; + cinfo.desired_number_of_colors = dparams->num_colors; + cinfo.two_pass_quantize = dparams->two_pass_quant; + cinfo.dither_mode = (J_DITHER_MODE)dparams->dither_mode; + } + + jpeg_start_decompress(&cinfo); + JXL_ASSERT(cinfo.out_color_components == nbcomp); + JxlDataType data_type = + ppf->info.bits_per_sample <= 8 ? JXL_TYPE_UINT8 : JXL_TYPE_UINT16; + + const JxlPixelFormat format{ + /*num_channels=*/static_cast(nbcomp), + data_type, + /*endianness=*/JXL_NATIVE_ENDIAN, + /*align=*/0, + }; + ppf->frames.clear(); + // Allocates the frame buffer. + ppf->frames.emplace_back(cinfo.image_width, cinfo.image_height, format); + const auto& frame = ppf->frames.back(); + JXL_ASSERT(sizeof(JSAMPLE) * cinfo.out_color_components * + cinfo.image_width <= + frame.color.stride); + + if (cinfo.quantize_colors) { + jxl::msan::UnpoisonMemory(cinfo.colormap, cinfo.out_color_components * + sizeof(cinfo.colormap[0])); + for (int c = 0; c < cinfo.out_color_components; ++c) { + jxl::msan::UnpoisonMemory( + cinfo.colormap[c], + cinfo.actual_number_of_colors * sizeof(cinfo.colormap[c][0])); + } + } + for (size_t y = 0; y < cinfo.image_height; ++y) { + JSAMPROW rows[] = {reinterpret_cast( + static_cast(frame.color.pixels()) + + frame.color.stride * y)}; + jpeg_read_scanlines(&cinfo, rows, 1); + msan::UnpoisonMemory(rows[0], sizeof(JSAMPLE) * cinfo.output_components * + cinfo.image_width); + if (dparams && dparams->num_colors > 0) { + UnmapColors(rows[0], cinfo.output_width, cinfo.out_color_components, + cinfo.colormap, cinfo.actual_number_of_colors); + } + } + + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + return true; + }; + + return try_catch_block(); +#else + return false; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jpg.h b/third-party/libjxl/libjxl/lib/extras/dec/jpg.h new file mode 100644 index 0000000000..15ed1ffd7a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/jpg.h @@ -0,0 +1,46 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_JPG_H_ +#define LIB_EXTRAS_DEC_JPG_H_ + +// Decodes JPG pixels and metadata in memory. + +#include + +#include "lib/extras/codec.h" +#include "lib/extras/dec/color_hints.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +bool CanDecodeJPG(); + +struct JPGDecompressParams { + int num_colors = 0; + bool two_pass_quant = false; + // 0 = none, 1 = ordered, 2 = Floyd-Steinberg + int dither_mode = 0; +}; + +// Decodes `bytes` into `ppf`. color_hints are ignored. +// `elapsed_deinterleave`, if non-null, will be set to the time (in seconds) +// that it took to deinterleave the raw JSAMPLEs to planar floats. +Status DecodeImageJPG(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr, + const JPGDecompressParams* dparams = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_JPG_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jxl.cc b/third-party/libjxl/libjxl/lib/extras/dec/jxl.cc new file mode 100644 index 0000000000..5db0e31482 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/jxl.cc @@ -0,0 +1,568 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/jxl.h" + +#include +#include +#include + +#include "lib/extras/dec/color_description.h" +#include "lib/extras/enc/encode.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/exif.h" + +namespace jxl { +namespace extras { +namespace { + +struct BoxProcessor { + BoxProcessor(JxlDecoder* dec) : dec_(dec) { Reset(); } + + void InitializeOutput(std::vector* out) { + box_data_ = out; + AddMoreOutput(); + } + + bool AddMoreOutput() { + Flush(); + static const size_t kBoxOutputChunkSize = 1 << 16; + box_data_->resize(box_data_->size() + kBoxOutputChunkSize); + next_out_ = box_data_->data() + total_size_; + avail_out_ = box_data_->size() - total_size_; + if (JXL_DEC_SUCCESS != + JxlDecoderSetBoxBuffer(dec_, next_out_, avail_out_)) { + fprintf(stderr, "JxlDecoderSetBoxBuffer failed\n"); + return false; + } + return true; + } + + void FinalizeOutput() { + if (box_data_ == nullptr) return; + Flush(); + box_data_->resize(total_size_); + Reset(); + } + + private: + JxlDecoder* dec_; + std::vector* box_data_; + uint8_t* next_out_; + size_t avail_out_; + size_t total_size_; + + void Reset() { + box_data_ = nullptr; + next_out_ = nullptr; + avail_out_ = 0; + total_size_ = 0; + } + void Flush() { + if (box_data_ == nullptr) return; + size_t remaining = JxlDecoderReleaseBoxBuffer(dec_); + size_t bytes_written = avail_out_ - remaining; + next_out_ += bytes_written; + avail_out_ -= bytes_written; + total_size_ += bytes_written; + } +}; + +void SetBitDepthFromDataType(JxlDataType data_type, uint32_t* bits_per_sample, + uint32_t* exponent_bits_per_sample) { + switch (data_type) { + case JXL_TYPE_UINT8: + *bits_per_sample = 8; + *exponent_bits_per_sample = 0; + break; + case JXL_TYPE_UINT16: + *bits_per_sample = 16; + *exponent_bits_per_sample = 0; + break; + case JXL_TYPE_FLOAT16: + *bits_per_sample = 16; + *exponent_bits_per_sample = 5; + break; + case JXL_TYPE_FLOAT: + *bits_per_sample = 32; + *exponent_bits_per_sample = 8; + break; + } +} + +template +void UpdateBitDepth(JxlBitDepth bit_depth, JxlDataType data_type, T* info) { + if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) { + SetBitDepthFromDataType(data_type, &info->bits_per_sample, + &info->exponent_bits_per_sample); + } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) { + info->bits_per_sample = bit_depth.bits_per_sample; + info->exponent_bits_per_sample = bit_depth.exponent_bits_per_sample; + } +} + +} // namespace + +bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size, + const JXLDecompressParams& dparams, size_t* decoded_bytes, + PackedPixelFile* ppf, std::vector* jpeg_bytes) { + JxlSignature sig = JxlSignatureCheck(bytes, bytes_size); + // silently return false if this is not a JXL file + if (sig == JXL_SIG_INVALID) return false; + + auto decoder = JxlDecoderMake(/*memory_manager=*/nullptr); + JxlDecoder* dec = decoder.get(); + ppf->frames.clear(); + + if (dparams.runner_opaque != nullptr && + JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec, dparams.runner, + dparams.runner_opaque)) { + fprintf(stderr, "JxlEncoderSetParallelRunner failed\n"); + return false; + } + + JxlPixelFormat format; + std::vector accepted_formats = dparams.accepted_formats; + + JxlColorEncoding color_encoding; + size_t num_color_channels = 0; + if (!dparams.color_space.empty()) { + if (!jxl::ParseDescription(dparams.color_space, &color_encoding)) { + fprintf(stderr, "Failed to parse color space %s.\n", + dparams.color_space.c_str()); + return false; + } + num_color_channels = + color_encoding.color_space == JXL_COLOR_SPACE_GRAY ? 1 : 3; + } + + bool can_reconstruct_jpeg = false; + std::vector jpeg_data_chunk; + if (jpeg_bytes != nullptr) { + // This bound is very likely to be enough to hold the entire + // reconstructed JPEG, to avoid having to do expensive retries. + jpeg_data_chunk.resize(bytes_size * 3 / 2 + 1024); + jpeg_bytes->resize(0); + } + + int events = (JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE); + + bool max_passes_defined = + (dparams.max_passes < std::numeric_limits::max()); + if (max_passes_defined || dparams.max_downsampling > 1) { + events |= JXL_DEC_FRAME_PROGRESSION; + if (max_passes_defined) { + JxlDecoderSetProgressiveDetail(dec, JxlProgressiveDetail::kPasses); + } else { + JxlDecoderSetProgressiveDetail(dec, JxlProgressiveDetail::kLastPasses); + } + } + if (jpeg_bytes != nullptr) { + events |= JXL_DEC_JPEG_RECONSTRUCTION; + } else { + events |= (JXL_DEC_COLOR_ENCODING | JXL_DEC_FRAME | JXL_DEC_PREVIEW_IMAGE | + JXL_DEC_BOX); + if (accepted_formats.empty()) { + // decoding just the metadata, not the pixel data + events ^= (JXL_DEC_FULL_IMAGE | JXL_DEC_PREVIEW_IMAGE); + } + } + if (JXL_DEC_SUCCESS != JxlDecoderSubscribeEvents(dec, events)) { + fprintf(stderr, "JxlDecoderSubscribeEvents failed\n"); + return false; + } + if (jpeg_bytes == nullptr) { + if (JXL_DEC_SUCCESS != + JxlDecoderSetRenderSpotcolors(dec, dparams.render_spotcolors)) { + fprintf(stderr, "JxlDecoderSetRenderSpotColors failed\n"); + return false; + } + if (JXL_DEC_SUCCESS != + JxlDecoderSetKeepOrientation(dec, dparams.keep_orientation)) { + fprintf(stderr, "JxlDecoderSetKeepOrientation failed\n"); + return false; + } + if (JXL_DEC_SUCCESS != + JxlDecoderSetUnpremultiplyAlpha(dec, dparams.unpremultiply_alpha)) { + fprintf(stderr, "JxlDecoderSetUnpremultiplyAlpha failed\n"); + return false; + } + if (dparams.display_nits > 0 && + JXL_DEC_SUCCESS != + JxlDecoderSetDesiredIntensityTarget(dec, dparams.display_nits)) { + fprintf(stderr, "Decoder failed to set desired intensity target\n"); + return false; + } + if (JXL_DEC_SUCCESS != JxlDecoderSetDecompressBoxes(dec, JXL_TRUE)) { + fprintf(stderr, "JxlDecoderSetDecompressBoxes failed\n"); + return false; + } + } + if (JXL_DEC_SUCCESS != JxlDecoderSetInput(dec, bytes, bytes_size)) { + fprintf(stderr, "Decoder failed to set input\n"); + return false; + } + uint32_t progression_index = 0; + bool codestream_done = accepted_formats.empty(); + BoxProcessor boxes(dec); + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + if (status == JXL_DEC_ERROR) { + fprintf(stderr, "Failed to decode image\n"); + return false; + } else if (status == JXL_DEC_NEED_MORE_INPUT) { + if (codestream_done) { + break; + } + if (dparams.allow_partial_input) { + if (JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec)) { + fprintf(stderr, + "Input file is truncated and there is no preview " + "available yet.\n"); + return false; + } + break; + } + size_t released_size = JxlDecoderReleaseInput(dec); + fprintf(stderr, + "Input file is truncated (total bytes: %" PRIuS + ", processed bytes: %" PRIuS + ") and --allow_partial_files is not present.\n", + bytes_size, bytes_size - released_size); + return false; + } else if (status == JXL_DEC_BOX) { + boxes.FinalizeOutput(); + JxlBoxType box_type; + if (JXL_DEC_SUCCESS != JxlDecoderGetBoxType(dec, box_type, JXL_TRUE)) { + fprintf(stderr, "JxlDecoderGetBoxType failed\n"); + return false; + } + std::vector* box_data = nullptr; + if (memcmp(box_type, "Exif", 4) == 0) { + box_data = &ppf->metadata.exif; + } else if (memcmp(box_type, "iptc", 4) == 0) { + box_data = &ppf->metadata.iptc; + } else if (memcmp(box_type, "jumb", 4) == 0) { + box_data = &ppf->metadata.jumbf; + } else if (memcmp(box_type, "xml ", 4) == 0) { + box_data = &ppf->metadata.xmp; + } + if (box_data) { + boxes.InitializeOutput(box_data); + } + } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + boxes.AddMoreOutput(); + } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) { + can_reconstruct_jpeg = true; + // Decoding to JPEG. + if (JXL_DEC_SUCCESS != JxlDecoderSetJPEGBuffer(dec, + jpeg_data_chunk.data(), + jpeg_data_chunk.size())) { + fprintf(stderr, "Decoder failed to set JPEG Buffer\n"); + return false; + } + } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) { + // Decoded a chunk to JPEG. + size_t used_jpeg_output = + jpeg_data_chunk.size() - JxlDecoderReleaseJPEGBuffer(dec); + jpeg_bytes->insert(jpeg_bytes->end(), jpeg_data_chunk.data(), + jpeg_data_chunk.data() + used_jpeg_output); + if (used_jpeg_output == 0) { + // Chunk is too small. + jpeg_data_chunk.resize(jpeg_data_chunk.size() * 2); + } + if (JXL_DEC_SUCCESS != JxlDecoderSetJPEGBuffer(dec, + jpeg_data_chunk.data(), + jpeg_data_chunk.size())) { + fprintf(stderr, "Decoder failed to set JPEG Buffer\n"); + return false; + } + } else if (status == JXL_DEC_BASIC_INFO) { + if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec, &ppf->info)) { + fprintf(stderr, "JxlDecoderGetBasicInfo failed\n"); + return false; + } + if (accepted_formats.empty()) continue; + if (num_color_channels != 0) { + // Mark the change in number of color channels due to the requested + // color space. + ppf->info.num_color_channels = num_color_channels; + } + if (dparams.output_bitdepth.type == JXL_BIT_DEPTH_CUSTOM) { + // Select format based on custom bits per sample. + ppf->info.bits_per_sample = dparams.output_bitdepth.bits_per_sample; + } + // Select format according to accepted formats. + if (!jxl::extras::SelectFormat(accepted_formats, ppf->info, &format)) { + fprintf(stderr, "SelectFormat failed\n"); + return false; + } + bool have_alpha = (format.num_channels == 2 || format.num_channels == 4); + if (!have_alpha) { + // Mark in the basic info that alpha channel was dropped. + ppf->info.alpha_bits = 0; + } else { + if (dparams.unpremultiply_alpha) { + // Mark in the basic info that alpha was unpremultiplied. + ppf->info.alpha_premultiplied = false; + } + } + bool alpha_found = false; + for (uint32_t i = 0; i < ppf->info.num_extra_channels; ++i) { + JxlExtraChannelInfo eci; + if (JXL_DEC_SUCCESS != JxlDecoderGetExtraChannelInfo(dec, i, &eci)) { + fprintf(stderr, "JxlDecoderGetExtraChannelInfo failed\n"); + return false; + } + if (eci.type == JXL_CHANNEL_ALPHA && have_alpha && !alpha_found) { + // Skip the first alpha channels because it is already present in the + // interleaved image. + alpha_found = true; + continue; + } + std::string name(eci.name_length + 1, 0); + if (JXL_DEC_SUCCESS != + JxlDecoderGetExtraChannelName(dec, i, &name[0], name.size())) { + fprintf(stderr, "JxlDecoderGetExtraChannelName failed\n"); + return false; + } + name.resize(eci.name_length); + ppf->extra_channels_info.push_back({eci, i, name}); + } + } else if (status == JXL_DEC_COLOR_ENCODING) { + if (!dparams.color_space.empty()) { + if (ppf->info.uses_original_profile) { + fprintf(stderr, + "Warning: --color_space ignored because the image is " + "not XYB encoded.\n"); + } else { + if (JXL_DEC_SUCCESS != + JxlDecoderSetPreferredColorProfile(dec, &color_encoding)) { + fprintf(stderr, "Failed to set color space.\n"); + return false; + } + } + } + size_t icc_size = 0; + JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_DATA; + ppf->color_encoding.color_space = JXL_COLOR_SPACE_UNKNOWN; + if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsEncodedProfile( + dec, target, &ppf->color_encoding) || + dparams.need_icc) { + // only get ICC if it is not an Enum color encoding + if (JXL_DEC_SUCCESS != + JxlDecoderGetICCProfileSize(dec, target, &icc_size)) { + fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n"); + } + if (icc_size != 0) { + ppf->icc.resize(icc_size); + if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile( + dec, target, ppf->icc.data(), icc_size)) { + fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n"); + return false; + } + } + } + icc_size = 0; + target = JXL_COLOR_PROFILE_TARGET_ORIGINAL; + if (JXL_DEC_SUCCESS != + JxlDecoderGetICCProfileSize(dec, target, &icc_size)) { + fprintf(stderr, "JxlDecoderGetICCProfileSize failed\n"); + } + if (icc_size != 0) { + ppf->orig_icc.resize(icc_size); + if (JXL_DEC_SUCCESS != + JxlDecoderGetColorAsICCProfile(dec, target, ppf->orig_icc.data(), + icc_size)) { + fprintf(stderr, "JxlDecoderGetColorAsICCProfile failed\n"); + return false; + } + } + } else if (status == JXL_DEC_FRAME) { + jxl::extras::PackedFrame frame(ppf->info.xsize, ppf->info.ysize, format); + if (JXL_DEC_SUCCESS != JxlDecoderGetFrameHeader(dec, &frame.frame_info)) { + fprintf(stderr, "JxlDecoderGetFrameHeader failed\n"); + return false; + } + frame.name.resize(frame.frame_info.name_length + 1, 0); + if (JXL_DEC_SUCCESS != + JxlDecoderGetFrameName(dec, &frame.name[0], frame.name.size())) { + fprintf(stderr, "JxlDecoderGetFrameName failed\n"); + return false; + } + frame.name.resize(frame.frame_info.name_length); + ppf->frames.emplace_back(std::move(frame)); + progression_index = 0; + } else if (status == JXL_DEC_FRAME_PROGRESSION) { + size_t downsampling = JxlDecoderGetIntendedDownsamplingRatio(dec); + if ((max_passes_defined && progression_index >= dparams.max_passes) || + (!max_passes_defined && downsampling <= dparams.max_downsampling)) { + if (JXL_DEC_SUCCESS != JxlDecoderFlushImage(dec)) { + fprintf(stderr, "JxlDecoderFlushImage failed\n"); + return false; + } + if (ppf->frames.back().frame_info.is_last) { + break; + } + if (JXL_DEC_SUCCESS != JxlDecoderSkipCurrentFrame(dec)) { + fprintf(stderr, "JxlDecoderSkipCurrentFrame failed\n"); + return false; + } + } + ++progression_index; + } else if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) { + size_t buffer_size; + if (JXL_DEC_SUCCESS != + JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)) { + fprintf(stderr, "JxlDecoderPreviewOutBufferSize failed\n"); + return false; + } + ppf->preview_frame = std::unique_ptr( + new jxl::extras::PackedFrame(ppf->info.preview.xsize, + ppf->info.preview.ysize, format)); + if (buffer_size != ppf->preview_frame->color.pixels_size) { + fprintf(stderr, "Invalid out buffer size %" PRIuS " %" PRIuS "\n", + buffer_size, ppf->preview_frame->color.pixels_size); + return false; + } + if (JXL_DEC_SUCCESS != + JxlDecoderSetPreviewOutBuffer( + dec, &format, ppf->preview_frame->color.pixels(), buffer_size)) { + fprintf(stderr, "JxlDecoderSetPreviewOutBuffer failed\n"); + return false; + } + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + if (jpeg_bytes != nullptr) { + break; + } + size_t buffer_size; + if (JXL_DEC_SUCCESS != + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)) { + fprintf(stderr, "JxlDecoderImageOutBufferSize failed\n"); + return false; + } + jxl::extras::PackedFrame& frame = ppf->frames.back(); + if (buffer_size != frame.color.pixels_size) { + fprintf(stderr, "Invalid out buffer size %" PRIuS " %" PRIuS "\n", + buffer_size, frame.color.pixels_size); + return false; + } + + if (dparams.use_image_callback) { + auto callback = [](void* opaque, size_t x, size_t y, size_t num_pixels, + const void* pixels) { + auto* ppf = reinterpret_cast(opaque); + jxl::extras::PackedImage& color = ppf->frames.back().color; + uint8_t* pixels_buffer = reinterpret_cast(color.pixels()); + size_t sample_size = color.pixel_stride(); + memcpy(pixels_buffer + (color.stride * y + sample_size * x), pixels, + num_pixels * sample_size); + }; + if (JXL_DEC_SUCCESS != + JxlDecoderSetImageOutCallback(dec, &format, callback, ppf)) { + fprintf(stderr, "JxlDecoderSetImageOutCallback failed\n"); + return false; + } + } else { + if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec, &format, + frame.color.pixels(), + buffer_size)) { + fprintf(stderr, "JxlDecoderSetImageOutBuffer failed\n"); + return false; + } + } + if (JXL_DEC_SUCCESS != + JxlDecoderSetImageOutBitDepth(dec, &dparams.output_bitdepth)) { + fprintf(stderr, "JxlDecoderSetImageOutBitDepth failed\n"); + return false; + } + UpdateBitDepth(dparams.output_bitdepth, format.data_type, &ppf->info); + bool have_alpha = (format.num_channels == 2 || format.num_channels == 4); + if (have_alpha) { + // Interleaved alpha channels has the same bit depth as color channels. + ppf->info.alpha_bits = ppf->info.bits_per_sample; + ppf->info.alpha_exponent_bits = ppf->info.exponent_bits_per_sample; + } + JxlPixelFormat ec_format = format; + ec_format.num_channels = 1; + for (auto& eci : ppf->extra_channels_info) { + frame.extra_channels.emplace_back(jxl::extras::PackedImage( + ppf->info.xsize, ppf->info.ysize, ec_format)); + auto& ec = frame.extra_channels.back(); + size_t buffer_size; + if (JXL_DEC_SUCCESS != JxlDecoderExtraChannelBufferSize( + dec, &ec_format, &buffer_size, eci.index)) { + fprintf(stderr, "JxlDecoderExtraChannelBufferSize failed\n"); + return false; + } + if (buffer_size != ec.pixels_size) { + fprintf(stderr, + "Invalid extra channel buffer size" + " %" PRIuS " %" PRIuS "\n", + buffer_size, ec.pixels_size); + return false; + } + if (JXL_DEC_SUCCESS != + JxlDecoderSetExtraChannelBuffer(dec, &ec_format, ec.pixels(), + buffer_size, eci.index)) { + fprintf(stderr, "JxlDecoderSetExtraChannelBuffer failed\n"); + return false; + } + UpdateBitDepth(dparams.output_bitdepth, ec_format.data_type, + &eci.ec_info); + } + } else if (status == JXL_DEC_SUCCESS) { + // Decoding finished successfully. + break; + } else if (status == JXL_DEC_PREVIEW_IMAGE) { + // Nothing to do. + } else if (status == JXL_DEC_FULL_IMAGE) { + if (jpeg_bytes != nullptr || ppf->frames.back().frame_info.is_last) { + codestream_done = true; + } + } else { + fprintf(stderr, "Error: unexpected status: %d\n", + static_cast(status)); + return false; + } + } + boxes.FinalizeOutput(); + if (!ppf->metadata.exif.empty()) { + // Verify that Exif box has a valid TIFF header at the specified offset. + // Discard bytes preceding the header. + if (ppf->metadata.exif.size() >= 4) { + uint32_t offset = LoadBE32(ppf->metadata.exif.data()); + if (offset <= ppf->metadata.exif.size() - 8) { + std::vector exif(ppf->metadata.exif.begin() + 4 + offset, + ppf->metadata.exif.end()); + bool bigendian; + if (IsExif(exif, &bigendian)) { + ppf->metadata.exif = std::move(exif); + } else { + fprintf(stderr, "Warning: invalid TIFF header in Exif\n"); + } + } else { + fprintf(stderr, "Warning: invalid Exif offset: %" PRIu32 "\n", offset); + } + } else { + fprintf(stderr, "Warning: invalid Exif length: %" PRIuS "\n", + ppf->metadata.exif.size()); + } + } + if (jpeg_bytes != nullptr) { + if (!can_reconstruct_jpeg) return false; + size_t used_jpeg_output = + jpeg_data_chunk.size() - JxlDecoderReleaseJPEGBuffer(dec); + jpeg_bytes->insert(jpeg_bytes->end(), jpeg_data_chunk.data(), + jpeg_data_chunk.data() + used_jpeg_output); + } + if (decoded_bytes) { + *decoded_bytes = bytes_size - JxlDecoderReleaseInput(dec); + } + return true; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/jxl.h b/third-party/libjxl/libjxl/lib/extras/dec/jxl.h new file mode 100644 index 0000000000..d717ee7164 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/jxl.h @@ -0,0 +1,73 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_JXL_H_ +#define LIB_EXTRAS_DEC_JXL_H_ + +// Decodes JPEG XL images in memory. + +#include +#include +#include + +#include +#include +#include + +#include "lib/extras/packed_image.h" + +namespace jxl { +namespace extras { + +struct JXLDecompressParams { + // If empty, little endian float formats will be accepted. + std::vector accepted_formats; + + // Requested output color space description. + std::string color_space; + // If set, performs tone mapping to this intensity target luminance. + float display_nits = 0.0; + // Whether spot colors are rendered on the image. + bool render_spotcolors = true; + // Whether to keep or undo the orientation given in the header. + bool keep_orientation = false; + + // If runner_opaque is set, the decoder uses this parallel runner. + JxlParallelRunner runner; + void* runner_opaque = nullptr; + + // Whether truncated input should be treated as an error. + bool allow_partial_input = false; + + // Set to true if an ICC profile has to be synthesized for Enum color + // encodings + bool need_icc = false; + + // How many passes to decode at most. By default, decode everything. + uint32_t max_passes = std::numeric_limits::max(); + + // Alternatively, one can specify the maximum tolerable downscaling factor + // with respect to the full size of the image. By default, nothing less than + // the full size is requested. + size_t max_downsampling = 1; + + // Whether to use the image callback or the image buffer to get the output. + bool use_image_callback = true; + // Whether to unpremultiply colors for associated alpha channels. + bool unpremultiply_alpha = false; + + // Controls the effective bit depth of the output pixels. + JxlBitDepth output_bitdepth = {JXL_BIT_DEPTH_FROM_CODESTREAM, 0, 0}; +}; + +bool DecodeImageJXL(const uint8_t* bytes, size_t bytes_size, + const JXLDecompressParams& dparams, size_t* decoded_bytes, + PackedPixelFile* ppf, + std::vector* jpeg_bytes = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_JXL_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pgx.cc b/third-party/libjxl/libjxl/lib/extras/dec/pgx.cc new file mode 100644 index 0000000000..a99eb0f4ee --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/pgx.cc @@ -0,0 +1,202 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/pgx.h" + +#include + +#include "lib/extras/size_constraints.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { +namespace extras { +namespace { + +struct HeaderPGX { + // NOTE: PGX is always grayscale + size_t xsize; + size_t ysize; + size_t bits_per_sample; + bool big_endian; + bool is_signed; +}; + +class Parser { + public: + explicit Parser(const Span bytes) + : pos_(bytes.data()), end_(pos_ + bytes.size()) {} + + // Sets "pos" to the first non-header byte/pixel on success. + Status ParseHeader(HeaderPGX* header, const uint8_t** pos) { + // codec.cc ensures we have at least two bytes => no range check here. + if (pos_[0] != 'P' || pos_[1] != 'G') return false; + pos_ += 2; + return ParseHeaderPGX(header, pos); + } + + // Exposed for testing + Status ParseUnsigned(size_t* number) { + if (pos_ == end_) return JXL_FAILURE("PGX: reached end before number"); + if (!IsDigit(*pos_)) return JXL_FAILURE("PGX: expected unsigned number"); + + *number = 0; + while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') { + *number *= 10; + *number += *pos_ - '0'; + ++pos_; + } + + return true; + } + + private: + static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; } + static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; } + static bool IsWhitespace(const uint8_t c) { + return IsLineBreak(c) || c == '\t' || c == ' '; + } + + Status SkipSpace() { + if (pos_ == end_) return JXL_FAILURE("PGX: reached end before space"); + const uint8_t c = *pos_; + if (c != ' ') return JXL_FAILURE("PGX: expected space"); + ++pos_; + return true; + } + + Status SkipLineBreak() { + if (pos_ == end_) return JXL_FAILURE("PGX: reached end before line break"); + // Line break can be either "\n" (0a) or "\r\n" (0d 0a). + if (*pos_ == '\n') { + pos_++; + return true; + } else if (*pos_ == '\r' && pos_ + 1 != end_ && *(pos_ + 1) == '\n') { + pos_ += 2; + return true; + } + return JXL_FAILURE("PGX: expected line break"); + } + + Status SkipSingleWhitespace() { + if (pos_ == end_) return JXL_FAILURE("PGX: reached end before whitespace"); + if (!IsWhitespace(*pos_)) return JXL_FAILURE("PGX: expected whitespace"); + ++pos_; + return true; + } + + Status ParseHeaderPGX(HeaderPGX* header, const uint8_t** pos) { + JXL_RETURN_IF_ERROR(SkipSpace()); + if (pos_ + 2 > end_) return JXL_FAILURE("PGX: header too small"); + if (*pos_ == 'M' && *(pos_ + 1) == 'L') { + header->big_endian = true; + } else if (*pos_ == 'L' && *(pos_ + 1) == 'M') { + header->big_endian = false; + } else { + return JXL_FAILURE("PGX: invalid endianness"); + } + pos_ += 2; + JXL_RETURN_IF_ERROR(SkipSpace()); + if (pos_ == end_) return JXL_FAILURE("PGX: header too small"); + if (*pos_ == '+') { + header->is_signed = false; + } else if (*pos_ == '-') { + header->is_signed = true; + } else { + return JXL_FAILURE("PGX: invalid signedness"); + } + pos_++; + // Skip optional space + if (pos_ < end_ && *pos_ == ' ') pos_++; + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->bits_per_sample)); + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize)); + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize)); + // 0xa, or 0xd 0xa. + JXL_RETURN_IF_ERROR(SkipLineBreak()); + + // TODO(jon): could do up to 24-bit by converting the values to + // JXL_TYPE_FLOAT. + if (header->bits_per_sample > 16) { + return JXL_FAILURE("PGX: >16 bits not yet supported"); + } + // TODO(lode): support signed integers. This may require changing the way + // external_image works. + if (header->is_signed) { + return JXL_FAILURE("PGX: signed not yet supported"); + } + + size_t numpixels = header->xsize * header->ysize; + size_t bytes_per_pixel = header->bits_per_sample <= 8 ? 1 : 2; + if (pos_ + numpixels * bytes_per_pixel > end_) { + return JXL_FAILURE("PGX: data too small"); + } + + *pos = pos_; + return true; + } + + const uint8_t* pos_; + const uint8_t* const end_; +}; + +} // namespace + +Status DecodeImagePGX(const Span bytes, + const ColorHints& color_hints, PackedPixelFile* ppf, + const SizeConstraints* constraints) { + Parser parser(bytes); + HeaderPGX header = {}; + const uint8_t* pos; + if (!parser.ParseHeader(&header, &pos)) return false; + JXL_RETURN_IF_ERROR( + VerifyDimensions(constraints, header.xsize, header.ysize)); + if (header.bits_per_sample == 0 || header.bits_per_sample > 32) { + return JXL_FAILURE("PGX: bits_per_sample invalid"); + } + + JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false, + /*is_gray=*/true, ppf)); + ppf->info.xsize = header.xsize; + ppf->info.ysize = header.ysize; + // Original data is uint, so exponent_bits_per_sample = 0. + ppf->info.bits_per_sample = header.bits_per_sample; + ppf->info.exponent_bits_per_sample = 0; + ppf->info.uses_original_profile = true; + + // No alpha in PGX + ppf->info.alpha_bits = 0; + ppf->info.alpha_exponent_bits = 0; + ppf->info.num_color_channels = 1; // Always grayscale + ppf->info.orientation = JXL_ORIENT_IDENTITY; + + JxlDataType data_type; + if (header.bits_per_sample > 8) { + data_type = JXL_TYPE_UINT16; + } else { + data_type = JXL_TYPE_UINT8; + } + + const JxlPixelFormat format{ + /*num_channels=*/1, + /*data_type=*/data_type, + /*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN, + /*align=*/0, + }; + ppf->frames.clear(); + // Allocates the frame buffer. + ppf->frames.emplace_back(header.xsize, header.ysize, format); + const auto& frame = ppf->frames.back(); + size_t pgx_remaining_size = bytes.data() + bytes.size() - pos; + if (pgx_remaining_size < frame.color.pixels_size) { + return JXL_FAILURE("PGX file too small"); + } + memcpy(frame.color.pixels(), pos, frame.color.pixels_size); + return true; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pgx.h b/third-party/libjxl/libjxl/lib/extras/dec/pgx.h new file mode 100644 index 0000000000..2cbd3b4dcf --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/pgx.h @@ -0,0 +1,35 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_PGX_H_ +#define LIB_EXTRAS_DEC_PGX_H_ + +// Decodes PGX pixels in memory. + +#include +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +// Decodes `bytes` into `ppf`. +Status DecodeImagePGX(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_PGX_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pgx_test.cc b/third-party/libjxl/libjxl/lib/extras/dec/pgx_test.cc new file mode 100644 index 0000000000..78ed689d07 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/pgx_test.cc @@ -0,0 +1,79 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/pgx.h" + +#include "lib/extras/packed_image_convert.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace extras { +namespace { + +Span MakeSpan(const char* str) { + return Span(reinterpret_cast(str), + strlen(str)); +} + +TEST(CodecPGXTest, Test8bits) { + std::string pgx = "PG ML + 8 2 3\npixels"; + + PackedPixelFile ppf; + ThreadPool* pool = nullptr; + + EXPECT_TRUE(DecodeImagePGX(MakeSpan(pgx.c_str()), ColorHints(), &ppf)); + CodecInOut io; + EXPECT_TRUE(ConvertPackedPixelFileToCodecInOut(ppf, pool, &io)); + + ScaleImage(255.f, io.Main().color()); + + EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample); + EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample); + EXPECT_TRUE(io.metadata.m.color_encoding.IsGray()); + EXPECT_EQ(2u, io.xsize()); + EXPECT_EQ(3u, io.ysize()); + + float eps = 1e-5; + EXPECT_NEAR('p', io.Main().color()->Plane(0).Row(0)[0], eps); + EXPECT_NEAR('i', io.Main().color()->Plane(0).Row(0)[1], eps); + EXPECT_NEAR('x', io.Main().color()->Plane(0).Row(1)[0], eps); + EXPECT_NEAR('e', io.Main().color()->Plane(0).Row(1)[1], eps); + EXPECT_NEAR('l', io.Main().color()->Plane(0).Row(2)[0], eps); + EXPECT_NEAR('s', io.Main().color()->Plane(0).Row(2)[1], eps); +} + +TEST(CodecPGXTest, Test16bits) { + std::string pgx = "PG ML + 16 2 3\np_i_x_e_l_s_"; + + PackedPixelFile ppf; + ThreadPool* pool = nullptr; + + EXPECT_TRUE(DecodeImagePGX(MakeSpan(pgx.c_str()), ColorHints(), &ppf)); + CodecInOut io; + EXPECT_TRUE(ConvertPackedPixelFileToCodecInOut(ppf, pool, &io)); + + ScaleImage(255.f, io.Main().color()); + + EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample); + EXPECT_EQ(16u, io.metadata.m.bit_depth.bits_per_sample); + EXPECT_TRUE(io.metadata.m.color_encoding.IsGray()); + EXPECT_EQ(2u, io.xsize()); + EXPECT_EQ(3u, io.ysize()); + + // Comparing ~16-bit numbers in floats, only ~7 bits left. + float eps = 1e-3; + const auto& plane = io.Main().color()->Plane(0); + EXPECT_NEAR(256.0f * 'p' + '_', plane.Row(0)[0] * 257, eps); + EXPECT_NEAR(256.0f * 'i' + '_', plane.Row(0)[1] * 257, eps); + EXPECT_NEAR(256.0f * 'x' + '_', plane.Row(1)[0] * 257, eps); + EXPECT_NEAR(256.0f * 'e' + '_', plane.Row(1)[1] * 257, eps); + EXPECT_NEAR(256.0f * 'l' + '_', plane.Row(2)[0] * 257, eps); + EXPECT_NEAR(256.0f * 's' + '_', plane.Row(2)[1] * 257, eps); +} + +} // namespace +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pnm.cc b/third-party/libjxl/libjxl/lib/extras/dec/pnm.cc new file mode 100644 index 0000000000..c3c2247769 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/pnm.cc @@ -0,0 +1,474 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/pnm.h" + +#include +#include + +#include + +#include "lib/extras/size_constraints.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace extras { +namespace { + +struct HeaderPNM { + size_t xsize; + size_t ysize; + bool is_gray; // PGM + bool has_alpha; // PAM + size_t bits_per_sample; + bool floating_point; + bool big_endian; + std::vector ec_types; // PAM +}; + +class Parser { + public: + explicit Parser(const Span bytes) + : pos_(bytes.data()), end_(pos_ + bytes.size()) {} + + // Sets "pos" to the first non-header byte/pixel on success. + Status ParseHeader(HeaderPNM* header, const uint8_t** pos) { + // codec.cc ensures we have at least two bytes => no range check here. + if (pos_[0] != 'P') return false; + const uint8_t type = pos_[1]; + pos_ += 2; + + switch (type) { + case '4': + return JXL_FAILURE("pbm not supported"); + + case '5': + header->is_gray = true; + return ParseHeaderPNM(header, pos); + + case '6': + header->is_gray = false; + return ParseHeaderPNM(header, pos); + + case '7': + return ParseHeaderPAM(header, pos); + + case 'F': + header->is_gray = false; + return ParseHeaderPFM(header, pos); + + case 'f': + header->is_gray = true; + return ParseHeaderPFM(header, pos); + } + return false; + } + + // Exposed for testing + Status ParseUnsigned(size_t* number) { + if (pos_ == end_) return JXL_FAILURE("PNM: reached end before number"); + if (!IsDigit(*pos_)) return JXL_FAILURE("PNM: expected unsigned number"); + + *number = 0; + while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') { + *number *= 10; + *number += *pos_ - '0'; + ++pos_; + } + + return true; + } + + Status ParseSigned(double* number) { + if (pos_ == end_) return JXL_FAILURE("PNM: reached end before signed"); + + if (*pos_ != '-' && *pos_ != '+' && !IsDigit(*pos_)) { + return JXL_FAILURE("PNM: expected signed number"); + } + + // Skip sign + const bool is_neg = *pos_ == '-'; + if (is_neg || *pos_ == '+') { + ++pos_; + if (pos_ == end_) return JXL_FAILURE("PNM: reached end before digits"); + } + + // Leading digits + *number = 0.0; + while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') { + *number *= 10; + *number += *pos_ - '0'; + ++pos_; + } + + // Decimal places? + if (pos_ < end_ && *pos_ == '.') { + ++pos_; + double place = 0.1; + while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') { + *number += (*pos_ - '0') * place; + place *= 0.1; + ++pos_; + } + } + + if (is_neg) *number = -*number; + return true; + } + + private: + static bool IsDigit(const uint8_t c) { return '0' <= c && c <= '9'; } + static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; } + static bool IsWhitespace(const uint8_t c) { + return IsLineBreak(c) || c == '\t' || c == ' '; + } + + Status SkipBlank() { + if (pos_ == end_) return JXL_FAILURE("PNM: reached end before blank"); + const uint8_t c = *pos_; + if (c != ' ' && c != '\n') return JXL_FAILURE("PNM: expected blank"); + ++pos_; + return true; + } + + Status SkipSingleWhitespace() { + if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace"); + if (!IsWhitespace(*pos_)) return JXL_FAILURE("PNM: expected whitespace"); + ++pos_; + return true; + } + + Status SkipWhitespace() { + if (pos_ == end_) return JXL_FAILURE("PNM: reached end before whitespace"); + if (!IsWhitespace(*pos_) && *pos_ != '#') { + return JXL_FAILURE("PNM: expected whitespace/comment"); + } + + while (pos_ < end_ && IsWhitespace(*pos_)) { + ++pos_; + } + + // Comment(s) + while (pos_ != end_ && *pos_ == '#') { + while (pos_ != end_ && !IsLineBreak(*pos_)) { + ++pos_; + } + // Newline(s) + while (pos_ != end_ && IsLineBreak(*pos_)) pos_++; + } + + while (pos_ < end_ && IsWhitespace(*pos_)) { + ++pos_; + } + return true; + } + + Status MatchString(const char* keyword, bool skipws = true) { + const uint8_t* ppos = pos_; + while (*keyword) { + if (ppos >= end_) return JXL_FAILURE("PAM: unexpected end of input"); + if (*keyword != *ppos) return false; + ppos++; + keyword++; + } + pos_ = ppos; + if (skipws) { + JXL_RETURN_IF_ERROR(SkipWhitespace()); + } else { + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + } + return true; + } + + Status ParseHeaderPAM(HeaderPNM* header, const uint8_t** pos) { + size_t depth = 3; + size_t max_val = 255; + JXL_RETURN_IF_ERROR(SkipWhitespace()); + while (!MatchString("ENDHDR", /*skipws=*/false)) { + if (MatchString("WIDTH")) { + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize)); + JXL_RETURN_IF_ERROR(SkipWhitespace()); + } else if (MatchString("HEIGHT")) { + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize)); + JXL_RETURN_IF_ERROR(SkipWhitespace()); + } else if (MatchString("DEPTH")) { + JXL_RETURN_IF_ERROR(ParseUnsigned(&depth)); + JXL_RETURN_IF_ERROR(SkipWhitespace()); + } else if (MatchString("MAXVAL")) { + JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val)); + JXL_RETURN_IF_ERROR(SkipWhitespace()); + } else if (MatchString("TUPLTYPE")) { + if (MatchString("RGB_ALPHA")) { + header->has_alpha = true; + } else if (MatchString("RGB")) { + } else if (MatchString("GRAYSCALE_ALPHA")) { + header->has_alpha = true; + header->is_gray = true; + } else if (MatchString("GRAYSCALE")) { + header->is_gray = true; + } else if (MatchString("BLACKANDWHITE_ALPHA")) { + header->has_alpha = true; + header->is_gray = true; + max_val = 1; + } else if (MatchString("BLACKANDWHITE")) { + header->is_gray = true; + max_val = 1; + } else if (MatchString("Alpha")) { + header->ec_types.push_back(JXL_CHANNEL_ALPHA); + } else if (MatchString("Depth")) { + header->ec_types.push_back(JXL_CHANNEL_DEPTH); + } else if (MatchString("SpotColor")) { + header->ec_types.push_back(JXL_CHANNEL_SPOT_COLOR); + } else if (MatchString("SelectionMask")) { + header->ec_types.push_back(JXL_CHANNEL_SELECTION_MASK); + } else if (MatchString("Black")) { + header->ec_types.push_back(JXL_CHANNEL_BLACK); + } else if (MatchString("CFA")) { + header->ec_types.push_back(JXL_CHANNEL_CFA); + } else if (MatchString("Thermal")) { + header->ec_types.push_back(JXL_CHANNEL_THERMAL); + } else { + return JXL_FAILURE("PAM: unknown TUPLTYPE"); + } + } else { + constexpr size_t kMaxHeaderLength = 20; + char unknown_header[kMaxHeaderLength + 1]; + size_t len = std::min(kMaxHeaderLength, end_ - pos_); + strncpy(unknown_header, reinterpret_cast(pos_), len); + unknown_header[len] = 0; + return JXL_FAILURE("PAM: unknown header keyword: %s", unknown_header); + } + } + size_t num_channels = header->is_gray ? 1 : 3; + if (header->has_alpha) num_channels++; + if (num_channels + header->ec_types.size() != depth) { + return JXL_FAILURE("PAM: bad DEPTH"); + } + if (max_val == 0 || max_val >= 65536) { + return JXL_FAILURE("PAM: bad MAXVAL"); + } + // e.g. When `max_val` is 1 , we want 1 bit: + header->bits_per_sample = FloorLog2Nonzero(max_val) + 1; + if ((1u << header->bits_per_sample) - 1 != max_val) + return JXL_FAILURE("PNM: unsupported MaxVal (expected 2^n - 1)"); + // PAM does not pack bits as in PBM. + + header->floating_point = false; + header->big_endian = true; + *pos = pos_; + return true; + } + + Status ParseHeaderPNM(HeaderPNM* header, const uint8_t** pos) { + JXL_RETURN_IF_ERROR(SkipWhitespace()); + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize)); + + JXL_RETURN_IF_ERROR(SkipWhitespace()); + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize)); + + JXL_RETURN_IF_ERROR(SkipWhitespace()); + size_t max_val; + JXL_RETURN_IF_ERROR(ParseUnsigned(&max_val)); + if (max_val == 0 || max_val >= 65536) { + return JXL_FAILURE("PNM: bad MaxVal"); + } + header->bits_per_sample = FloorLog2Nonzero(max_val) + 1; + if ((1u << header->bits_per_sample) - 1 != max_val) + return JXL_FAILURE("PNM: unsupported MaxVal (expected 2^n - 1)"); + header->floating_point = false; + header->big_endian = true; + + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + + *pos = pos_; + return true; + } + + Status ParseHeaderPFM(HeaderPNM* header, const uint8_t** pos) { + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->xsize)); + + JXL_RETURN_IF_ERROR(SkipBlank()); + JXL_RETURN_IF_ERROR(ParseUnsigned(&header->ysize)); + + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + // The scale has no meaning as multiplier, only its sign is used to + // indicate endianness. All software expects nominal range 0..1. + double scale; + JXL_RETURN_IF_ERROR(ParseSigned(&scale)); + if (scale == 0.0) { + return JXL_FAILURE("PFM: bad scale factor value."); + } else if (std::abs(scale) != 1.0) { + JXL_WARNING("PFM: Discarding non-unit scale factor"); + } + header->big_endian = scale > 0.0; + header->bits_per_sample = 32; + header->floating_point = true; + + JXL_RETURN_IF_ERROR(SkipSingleWhitespace()); + + *pos = pos_; + return true; + } + + const uint8_t* pos_; + const uint8_t* const end_; +}; + +Span MakeSpan(const char* str) { + return Span(reinterpret_cast(str), + strlen(str)); +} + +} // namespace + +Status DecodeImagePNM(const Span bytes, + const ColorHints& color_hints, PackedPixelFile* ppf, + const SizeConstraints* constraints) { + Parser parser(bytes); + HeaderPNM header = {}; + const uint8_t* pos = nullptr; + if (!parser.ParseHeader(&header, &pos)) return false; + JXL_RETURN_IF_ERROR( + VerifyDimensions(constraints, header.xsize, header.ysize)); + + if (header.bits_per_sample == 0 || header.bits_per_sample > 32) { + return JXL_FAILURE("PNM: bits_per_sample invalid"); + } + + // PPM specify that in the raster, the sample values are "nonlinear" (BP.709, + // with gamma number of 2.2). Deviate from the specification and assume + // `sRGB` in our implementation. + JXL_RETURN_IF_ERROR(ApplyColorHints(color_hints, /*color_already_set=*/false, + header.is_gray, ppf)); + + ppf->info.xsize = header.xsize; + ppf->info.ysize = header.ysize; + if (header.floating_point) { + ppf->info.bits_per_sample = 32; + ppf->info.exponent_bits_per_sample = 8; + } else { + ppf->info.bits_per_sample = header.bits_per_sample; + ppf->info.exponent_bits_per_sample = 0; + } + + ppf->info.orientation = JXL_ORIENT_IDENTITY; + + // No alpha in PNM and PFM + ppf->info.alpha_bits = (header.has_alpha ? ppf->info.bits_per_sample : 0); + ppf->info.alpha_exponent_bits = 0; + ppf->info.num_color_channels = (header.is_gray ? 1 : 3); + uint32_t num_alpha_channels = (header.has_alpha ? 1 : 0); + uint32_t num_interleaved_channels = + ppf->info.num_color_channels + num_alpha_channels; + ppf->info.num_extra_channels = num_alpha_channels + header.ec_types.size(); + + for (auto type : header.ec_types) { + PackedExtraChannel pec; + pec.ec_info.bits_per_sample = ppf->info.bits_per_sample; + pec.ec_info.type = type; + ppf->extra_channels_info.emplace_back(std::move(pec)); + } + + JxlDataType data_type; + if (header.floating_point) { + // There's no float16 pnm version. + data_type = JXL_TYPE_FLOAT; + } else { + if (header.bits_per_sample > 8) { + data_type = JXL_TYPE_UINT16; + } else { + data_type = JXL_TYPE_UINT8; + } + } + + const JxlPixelFormat format{ + /*num_channels=*/num_interleaved_channels, + /*data_type=*/data_type, + /*endianness=*/header.big_endian ? JXL_BIG_ENDIAN : JXL_LITTLE_ENDIAN, + /*align=*/0, + }; + const JxlPixelFormat ec_format{1, format.data_type, format.endianness, 0}; + ppf->frames.clear(); + ppf->frames.emplace_back(header.xsize, header.ysize, format); + auto* frame = &ppf->frames.back(); + for (size_t i = 0; i < header.ec_types.size(); ++i) { + frame->extra_channels.emplace_back(header.xsize, header.ysize, ec_format); + } + size_t pnm_remaining_size = bytes.data() + bytes.size() - pos; + if (pnm_remaining_size < frame->color.pixels_size) { + return JXL_FAILURE("PNM file too small"); + } + + uint8_t* out = reinterpret_cast(frame->color.pixels()); + std::vector ec_out(header.ec_types.size()); + for (size_t i = 0; i < ec_out.size(); ++i) { + ec_out[i] = reinterpret_cast(frame->extra_channels[i].pixels()); + } + if (ec_out.empty()) { + const bool flipped_y = header.bits_per_sample == 32; // PFMs are flipped + for (size_t y = 0; y < header.ysize; ++y) { + size_t y_in = flipped_y ? header.ysize - 1 - y : y; + const uint8_t* row_in = &pos[y_in * frame->color.stride]; + uint8_t* row_out = &out[y * frame->color.stride]; + memcpy(row_out, row_in, frame->color.stride); + } + } else { + size_t pwidth = PackedImage::BitsPerChannel(data_type) / 8; + for (size_t y = 0; y < header.ysize; ++y) { + for (size_t x = 0; x < header.xsize; ++x) { + memcpy(out, pos, frame->color.pixel_stride()); + out += frame->color.pixel_stride(); + pos += frame->color.pixel_stride(); + for (auto& p : ec_out) { + memcpy(p, pos, pwidth); + pos += pwidth; + p += pwidth; + } + } + } + } + return true; +} + +void TestCodecPNM() { + size_t u = 77777; // Initialized to wrong value. + double d = 77.77; +// Failing to parse invalid strings results in a crash if `JXL_CRASH_ON_ERROR` +// is defined and hence the tests fail. Therefore we only run these tests if +// `JXL_CRASH_ON_ERROR` is not defined. +#ifndef JXL_CRASH_ON_ERROR + JXL_CHECK(false == Parser(MakeSpan("")).ParseUnsigned(&u)); + JXL_CHECK(false == Parser(MakeSpan("+")).ParseUnsigned(&u)); + JXL_CHECK(false == Parser(MakeSpan("-")).ParseUnsigned(&u)); + JXL_CHECK(false == Parser(MakeSpan("A")).ParseUnsigned(&u)); + + JXL_CHECK(false == Parser(MakeSpan("")).ParseSigned(&d)); + JXL_CHECK(false == Parser(MakeSpan("+")).ParseSigned(&d)); + JXL_CHECK(false == Parser(MakeSpan("-")).ParseSigned(&d)); + JXL_CHECK(false == Parser(MakeSpan("A")).ParseSigned(&d)); +#endif + JXL_CHECK(true == Parser(MakeSpan("1")).ParseUnsigned(&u)); + JXL_CHECK(u == 1); + + JXL_CHECK(true == Parser(MakeSpan("32")).ParseUnsigned(&u)); + JXL_CHECK(u == 32); + + JXL_CHECK(true == Parser(MakeSpan("1")).ParseSigned(&d)); + JXL_CHECK(d == 1.0); + JXL_CHECK(true == Parser(MakeSpan("+2")).ParseSigned(&d)); + JXL_CHECK(d == 2.0); + JXL_CHECK(true == Parser(MakeSpan("-3")).ParseSigned(&d)); + JXL_CHECK(std::abs(d - -3.0) < 1E-15); + JXL_CHECK(true == Parser(MakeSpan("3.141592")).ParseSigned(&d)); + JXL_CHECK(std::abs(d - 3.141592) < 1E-15); + JXL_CHECK(true == Parser(MakeSpan("-3.141592")).ParseSigned(&d)); + JXL_CHECK(std::abs(d - -3.141592) < 1E-15); +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/dec/pnm.h b/third-party/libjxl/libjxl/lib/extras/dec/pnm.h new file mode 100644 index 0000000000..0745b2f20d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/dec/pnm.h @@ -0,0 +1,41 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_DEC_PNM_H_ +#define LIB_EXTRAS_DEC_PNM_H_ + +// Decodes PBM/PGM/PPM/PFM pixels in memory. + +#include +#include + +// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown) +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints; + +namespace extras { + +// Decodes `bytes` into `ppf`. color_hints may specify "color_space", which +// defaults to sRGB. +Status DecodeImagePNM(Span bytes, const ColorHints& color_hints, + PackedPixelFile* ppf, + const SizeConstraints* constraints = nullptr); + +void TestCodecPNM(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_DEC_PNM_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/apng.cc b/third-party/libjxl/libjxl/lib/extras/enc/apng.cc new file mode 100644 index 0000000000..53ef297367 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/apng.cc @@ -0,0 +1,454 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/apng.h" + +// Parts of this code are taken from apngdis, which has the following license: +/* APNG Disassembler 2.8 + * + * Deconstructs APNG files into individual frames. + * + * http://apngdis.sourceforge.net + * + * Copyright (c) 2010-2015 Max Stepin + * maxst at users.sourceforge.net + * + * zlib license + * ------------ + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + * + */ + +#include +#include + +#include +#include + +#include "lib/extras/exif.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/printf_macros.h" +#if JPEGXL_ENABLE_APNG +#include "png.h" /* original (unpatched) libpng is ok */ +#endif + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_APNG +namespace { + +constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69, + 0x66, 0x00, 0x00}; + +class APNGEncoder : public Encoder { + public: + std::vector AcceptedFormats() const override { + std::vector formats; + for (const uint32_t num_channels : {1, 2, 3, 4}) { + for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) { + for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) { + formats.push_back( + JxlPixelFormat{num_channels, data_type, endianness, /*align=*/0}); + } + } + } + return formats; + } + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + encoded_image->icc.clear(); + encoded_image->bitstreams.resize(1); + return EncodePackedPixelFileToAPNG(ppf, pool, + &encoded_image->bitstreams.front()); + } + + private: + Status EncodePackedPixelFileToAPNG(const PackedPixelFile& ppf, + ThreadPool* pool, + std::vector* bytes) const; +}; + +static void PngWrite(png_structp png_ptr, png_bytep data, png_size_t length) { + std::vector* bytes = + static_cast*>(png_get_io_ptr(png_ptr)); + bytes->insert(bytes->end(), data, data + length); +} + +// Stores XMP and EXIF/IPTC into key/value strings for PNG +class BlobsWriterPNG { + public: + static Status Encode(const PackedMetadata& blobs, + std::vector* strings) { + if (!blobs.exif.empty()) { + // PNG viewers typically ignore Exif orientation but not all of them do + // (and e.g. cjxl doesn't), so we overwrite the Exif orientation to the + // identity to avoid repeated orientation. + std::vector exif = blobs.exif; + ResetExifOrientation(exif); + // By convention, the data is prefixed with "Exif\0\0" when stored in + // the legacy (and non-standard) "Raw profile type exif" text chunk + // currently used here. + // TODO: Store Exif data in an eXIf chunk instead, which always begins + // with the TIFF header. + if (exif.size() >= sizeof kExifSignature && + memcmp(exif.data(), kExifSignature, sizeof kExifSignature) != 0) { + exif.insert(exif.begin(), kExifSignature, + kExifSignature + sizeof kExifSignature); + } + JXL_RETURN_IF_ERROR(EncodeBase16("exif", exif, strings)); + } + if (!blobs.iptc.empty()) { + JXL_RETURN_IF_ERROR(EncodeBase16("iptc", blobs.iptc, strings)); + } + if (!blobs.xmp.empty()) { + // TODO: Store XMP data in an "XML:com.adobe.xmp" text chunk instead. + JXL_RETURN_IF_ERROR(EncodeBase16("xmp", blobs.xmp, strings)); + } + return true; + } + + private: + static JXL_INLINE char EncodeNibble(const uint8_t nibble) { + JXL_ASSERT(nibble < 16); + return (nibble < 10) ? '0' + nibble : 'a' + nibble - 10; + } + + static Status EncodeBase16(const std::string& type, + const std::vector& bytes, + std::vector* strings) { + // Encoding: base16 with newline after 72 chars. + const size_t base16_size = + 2 * bytes.size() + DivCeil(bytes.size(), size_t(36)) + 1; + std::string base16; + base16.reserve(base16_size); + for (size_t i = 0; i < bytes.size(); ++i) { + if (i % 36 == 0) base16.push_back('\n'); + base16.push_back(EncodeNibble(bytes[i] >> 4)); + base16.push_back(EncodeNibble(bytes[i] & 0x0F)); + } + base16.push_back('\n'); + JXL_ASSERT(base16.length() == base16_size); + + char key[30]; + snprintf(key, sizeof(key), "Raw profile type %s", type.c_str()); + + char header[30]; + snprintf(header, sizeof(header), "\n%s\n%8" PRIuS, type.c_str(), + bytes.size()); + + strings->push_back(std::string(key)); + strings->push_back(std::string(header) + base16); + return true; + } +}; + +void MaybeAddCICP(const JxlColorEncoding& c_enc, png_structp png_ptr, + png_infop info_ptr) { + png_byte cicp_data[4] = {}; + png_unknown_chunk cicp_chunk; + if (c_enc.color_space != JXL_COLOR_SPACE_RGB) { + return; + } + if (c_enc.primaries == JXL_PRIMARIES_P3) { + if (c_enc.white_point == JXL_WHITE_POINT_D65) { + cicp_data[0] = 12; + } else if (c_enc.white_point == JXL_WHITE_POINT_DCI) { + cicp_data[0] = 11; + } else { + return; + } + } else if (c_enc.primaries != JXL_PRIMARIES_CUSTOM && + c_enc.white_point == JXL_WHITE_POINT_D65) { + cicp_data[0] = static_cast(c_enc.primaries); + } else { + return; + } + if (c_enc.transfer_function == JXL_TRANSFER_FUNCTION_UNKNOWN || + c_enc.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) { + return; + } + cicp_data[1] = static_cast(c_enc.transfer_function); + cicp_data[2] = 0; + cicp_data[3] = 1; + cicp_chunk.data = cicp_data; + cicp_chunk.size = sizeof(cicp_data); + cicp_chunk.location = PNG_HAVE_IHDR; + memcpy(cicp_chunk.name, "cICP", 5); + png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_ALWAYS, + reinterpret_cast("cICP"), 1); + png_set_unknown_chunks(png_ptr, info_ptr, &cicp_chunk, 1); +} + +bool MaybeAddSRGB(const JxlColorEncoding& c_enc, png_structp png_ptr, + png_infop info_ptr) { + if (c_enc.transfer_function == JXL_TRANSFER_FUNCTION_SRGB && + (c_enc.color_space == JXL_COLOR_SPACE_GRAY || + (c_enc.color_space == JXL_COLOR_SPACE_RGB && + c_enc.primaries == JXL_PRIMARIES_SRGB && + c_enc.white_point == JXL_WHITE_POINT_D65))) { + png_set_sRGB(png_ptr, info_ptr, c_enc.rendering_intent); + png_set_cHRM_fixed(png_ptr, info_ptr, 31270, 32900, 64000, 33000, 30000, + 60000, 15000, 6000); + png_set_gAMA_fixed(png_ptr, info_ptr, 45455); + return true; + } + return false; +} + +void MaybeAddCHRM(const JxlColorEncoding& c_enc, png_structp png_ptr, + png_infop info_ptr) { + if (c_enc.color_space != JXL_COLOR_SPACE_RGB) return; + if (c_enc.primaries == 0) return; + png_set_cHRM(png_ptr, info_ptr, c_enc.white_point_xy[0], + c_enc.white_point_xy[1], c_enc.primaries_red_xy[0], + c_enc.primaries_red_xy[1], c_enc.primaries_green_xy[0], + c_enc.primaries_green_xy[1], c_enc.primaries_blue_xy[0], + c_enc.primaries_blue_xy[1]); +} + +void MaybeAddGAMA(const JxlColorEncoding& c_enc, png_structp png_ptr, + png_infop info_ptr) { + switch (c_enc.transfer_function) { + case JXL_TRANSFER_FUNCTION_LINEAR: + png_set_gAMA_fixed(png_ptr, info_ptr, PNG_FP_1); + break; + case JXL_TRANSFER_FUNCTION_SRGB: + png_set_gAMA_fixed(png_ptr, info_ptr, 45455); + break; + case JXL_TRANSFER_FUNCTION_GAMMA: + png_set_gAMA(png_ptr, info_ptr, c_enc.gamma); + break; + + default:; + // No gAMA chunk. + } +} + +void MaybeAddCLLi(const JxlColorEncoding& c_enc, const float intensity_target, + png_structp png_ptr, png_infop info_ptr) { + if (c_enc.transfer_function != JXL_TRANSFER_FUNCTION_PQ) return; + + const uint32_t max_cll = + static_cast(10000.f * Clamp1(intensity_target, 0.f, 10000.f)); + png_byte chunk_data[8] = {}; + chunk_data[0] = (max_cll >> 24) & 0xFF; + chunk_data[1] = (max_cll >> 16) & 0xFF; + chunk_data[2] = (max_cll >> 8) & 0xFF; + chunk_data[3] = max_cll & 0xFF; + // Leave MaxFALL set to 0. + png_unknown_chunk chunk; + memcpy(chunk.name, "cLLi", 5); + chunk.data = chunk_data; + chunk.size = sizeof chunk_data; + chunk.location = PNG_HAVE_IHDR; + png_set_keep_unknown_chunks(png_ptr, PNG_HANDLE_CHUNK_ALWAYS, + reinterpret_cast("cLLi"), 1); + png_set_unknown_chunks(png_ptr, info_ptr, &chunk, 1); +} + +Status APNGEncoder::EncodePackedPixelFileToAPNG( + const PackedPixelFile& ppf, ThreadPool* pool, + std::vector* bytes) const { + size_t xsize = ppf.info.xsize; + size_t ysize = ppf.info.ysize; + bool has_alpha = ppf.info.alpha_bits != 0; + bool is_gray = ppf.info.num_color_channels == 1; + size_t color_channels = ppf.info.num_color_channels; + size_t num_channels = color_channels + (has_alpha ? 1 : 0); + size_t num_samples = num_channels * xsize * ysize; + + if (!ppf.info.have_animation && ppf.frames.size() != 1) { + return JXL_FAILURE("Invalid number of frames"); + } + + size_t count = 0; + size_t anim_chunks = 0; + + for (const auto& frame : ppf.frames) { + JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info)); + + const PackedImage& color = frame.color; + const JxlPixelFormat format = color.format; + const uint8_t* in = reinterpret_cast(color.pixels()); + size_t data_bits_per_sample = PackedImage::BitsPerChannel(format.data_type); + size_t bytes_per_sample = data_bits_per_sample / 8; + size_t out_bytes_per_sample = bytes_per_sample > 1 ? 2 : 1; + size_t out_stride = xsize * num_channels * out_bytes_per_sample; + size_t out_size = ysize * out_stride; + std::vector out(out_size); + + if (format.data_type == JXL_TYPE_UINT8) { + if (ppf.info.bits_per_sample < 8) { + float mul = 255.0 / ((1u << ppf.info.bits_per_sample) - 1); + for (size_t i = 0; i < num_samples; ++i) { + out[i] = static_cast(in[i] * mul + 0.5); + } + } else { + memcpy(&out[0], in, out_size); + } + } else if (format.data_type == JXL_TYPE_UINT16) { + if (ppf.info.bits_per_sample < 16 || + format.endianness != JXL_BIG_ENDIAN) { + float mul = 65535.0 / ((1u << ppf.info.bits_per_sample) - 1); + const uint8_t* p_in = in; + uint8_t* p_out = out.data(); + for (size_t i = 0; i < num_samples; ++i, p_in += 2, p_out += 2) { + uint32_t val = (format.endianness == JXL_BIG_ENDIAN ? LoadBE16(p_in) + : LoadLE16(p_in)); + StoreBE16(static_cast(val * mul + 0.5), p_out); + } + } else { + memcpy(&out[0], in, out_size); + } + } + png_structp png_ptr; + png_infop info_ptr; + + png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + + if (!png_ptr) return JXL_FAILURE("Could not init png encoder"); + + info_ptr = png_create_info_struct(png_ptr); + if (!info_ptr) return JXL_FAILURE("Could not init png info struct"); + + png_set_write_fn(png_ptr, bytes, PngWrite, NULL); + png_set_flush(png_ptr, 0); + + int width = xsize; + int height = ysize; + + png_byte color_type = (is_gray ? PNG_COLOR_TYPE_GRAY : PNG_COLOR_TYPE_RGB); + if (has_alpha) color_type |= PNG_COLOR_MASK_ALPHA; + png_byte bit_depth = out_bytes_per_sample * 8; + + png_set_IHDR(png_ptr, info_ptr, width, height, bit_depth, color_type, + PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, + PNG_FILTER_TYPE_BASE); + if (count == 0) { + if (!MaybeAddSRGB(ppf.color_encoding, png_ptr, info_ptr)) { + MaybeAddCICP(ppf.color_encoding, png_ptr, info_ptr); + if (!ppf.icc.empty()) { + png_set_benign_errors(png_ptr, 1); + png_set_iCCP(png_ptr, info_ptr, "1", 0, ppf.icc.data(), + ppf.icc.size()); + } + MaybeAddCHRM(ppf.color_encoding, png_ptr, info_ptr); + MaybeAddGAMA(ppf.color_encoding, png_ptr, info_ptr); + } + MaybeAddCLLi(ppf.color_encoding, ppf.info.intensity_target, png_ptr, + info_ptr); + + std::vector textstrings; + JXL_RETURN_IF_ERROR(BlobsWriterPNG::Encode(ppf.metadata, &textstrings)); + for (size_t kk = 0; kk + 1 < textstrings.size(); kk += 2) { + png_text text; + text.key = const_cast(textstrings[kk].c_str()); + text.text = const_cast(textstrings[kk + 1].c_str()); + text.compression = PNG_TEXT_COMPRESSION_zTXt; + png_set_text(png_ptr, info_ptr, &text, 1); + } + + png_write_info(png_ptr, info_ptr); + } else { + // fake writing a header, otherwise libpng gets confused + size_t pos = bytes->size(); + png_write_info(png_ptr, info_ptr); + bytes->resize(pos); + } + + if (ppf.info.have_animation) { + if (count == 0) { + png_byte adata[8]; + png_save_uint_32(adata, ppf.frames.size()); + png_save_uint_32(adata + 4, ppf.info.animation.num_loops); + png_byte actl[5] = "acTL"; + png_write_chunk(png_ptr, actl, adata, 8); + } + png_byte fdata[26]; + // TODO(jon): also make this work for the non-coalesced case + png_save_uint_32(fdata, anim_chunks++); + png_save_uint_32(fdata + 4, width); + png_save_uint_32(fdata + 8, height); + png_save_uint_32(fdata + 12, 0); + png_save_uint_32(fdata + 16, 0); + png_save_uint_16(fdata + 20, frame.frame_info.duration * + ppf.info.animation.tps_denominator); + png_save_uint_16(fdata + 22, ppf.info.animation.tps_numerator); + fdata[24] = 1; + fdata[25] = 0; + png_byte fctl[5] = "fcTL"; + png_write_chunk(png_ptr, fctl, fdata, 26); + } + + std::vector rows(height); + for (int y = 0; y < height; ++y) { + rows[y] = out.data() + y * out_stride; + } + + png_write_flush(png_ptr); + const size_t pos = bytes->size(); + png_write_image(png_ptr, &rows[0]); + png_write_flush(png_ptr); + if (count > 0) { + std::vector fdata(4); + png_save_uint_32(fdata.data(), anim_chunks++); + size_t p = pos; + while (p + 8 < bytes->size()) { + size_t len = png_get_uint_32(bytes->data() + p); + JXL_ASSERT(bytes->operator[](p + 4) == 'I'); + JXL_ASSERT(bytes->operator[](p + 5) == 'D'); + JXL_ASSERT(bytes->operator[](p + 6) == 'A'); + JXL_ASSERT(bytes->operator[](p + 7) == 'T'); + fdata.insert(fdata.end(), bytes->data() + p + 8, + bytes->data() + p + 8 + len); + p += len + 12; + } + bytes->resize(pos); + + png_byte fdat[5] = "fdAT"; + png_write_chunk(png_ptr, fdat, fdata.data(), fdata.size()); + } + + count++; + if (count == ppf.frames.size() || !ppf.info.have_animation) { + png_write_end(png_ptr, NULL); + } + + png_destroy_write_struct(&png_ptr, &info_ptr); + } + + return true; +} + +} // namespace +#endif + +std::unique_ptr GetAPNGEncoder() { +#if JPEGXL_ENABLE_APNG + return jxl::make_unique(); +#else + return nullptr; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/apng.h b/third-party/libjxl/libjxl/lib/extras/enc/apng.h new file mode 100644 index 0000000000..2a2139c8fa --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/apng.h @@ -0,0 +1,23 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_APNG_H_ +#define LIB_EXTRAS_ENC_APNG_H_ + +// Encodes APNG images in memory. + +#include + +#include "lib/extras/enc/encode.h" + +namespace jxl { +namespace extras { + +std::unique_ptr GetAPNGEncoder(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_APNG_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/encode.cc b/third-party/libjxl/libjxl/lib/extras/enc/encode.cc new file mode 100644 index 0000000000..8a84103c21 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/encode.cc @@ -0,0 +1,177 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/encode.h" + +#include + +#include "lib/extras/enc/apng.h" +#include "lib/extras/enc/exr.h" +#include "lib/extras/enc/jpg.h" +#include "lib/extras/enc/npy.h" +#include "lib/extras/enc/pgx.h" +#include "lib/extras/enc/pnm.h" +#include "lib/jxl/base/printf_macros.h" + +namespace jxl { +namespace extras { + +Status Encoder::VerifyBasicInfo(const JxlBasicInfo& info) { + if (info.xsize == 0 || info.ysize == 0) { + return JXL_FAILURE("Empty image"); + } + if (info.num_color_channels != 1 && info.num_color_channels != 3) { + return JXL_FAILURE("Invalid number of color channels"); + } + if (info.alpha_bits > 0 && info.alpha_bits != info.bits_per_sample) { + return JXL_FAILURE("Alpha bit depth does not match image bit depth"); + } + if (info.orientation != JXL_ORIENT_IDENTITY) { + return JXL_FAILURE("Orientation must be identity"); + } + return true; +} + +Status Encoder::VerifyFormat(const JxlPixelFormat& format) const { + for (auto f : AcceptedFormats()) { + if (f.num_channels != format.num_channels) continue; + if (f.data_type != format.data_type) continue; + if (f.data_type == JXL_TYPE_UINT8 || f.endianness == format.endianness) { + return true; + } + } + return JXL_FAILURE("Format is not in the list of accepted formats."); +} + +Status Encoder::VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample, + uint32_t exponent_bits) { + if ((data_type == JXL_TYPE_UINT8 && + (bits_per_sample == 0 || bits_per_sample > 8 || exponent_bits != 0)) || + (data_type == JXL_TYPE_UINT16 && + (bits_per_sample <= 8 || bits_per_sample > 16 || exponent_bits != 0)) || + (data_type == JXL_TYPE_FLOAT16 && + (bits_per_sample > 16 || exponent_bits > 5))) { + return JXL_FAILURE( + "Incompatible data_type %d and bit depth %u with exponent bits %u", + (int)data_type, bits_per_sample, exponent_bits); + } + return true; +} + +Status Encoder::VerifyImageSize(const PackedImage& image, + const JxlBasicInfo& info) { + if (image.pixels() == nullptr) { + return JXL_FAILURE("Invalid image."); + } + if (image.stride != image.xsize * image.pixel_stride()) { + return JXL_FAILURE("Invalid image stride."); + } + if (image.pixels_size != image.ysize * image.stride) { + return JXL_FAILURE("Invalid image size."); + } + size_t info_num_channels = + (info.num_color_channels + (info.alpha_bits > 0 ? 1 : 0)); + if (image.xsize != info.xsize || image.ysize != info.ysize || + image.format.num_channels != info_num_channels) { + return JXL_FAILURE("Frame size does not match image size"); + } + return true; +} + +Status Encoder::VerifyPackedImage(const PackedImage& image, + const JxlBasicInfo& info) const { + JXL_RETURN_IF_ERROR(VerifyImageSize(image, info)); + JXL_RETURN_IF_ERROR(VerifyFormat(image.format)); + JXL_RETURN_IF_ERROR(VerifyBitDepth(image.format.data_type, + info.bits_per_sample, + info.exponent_bits_per_sample)); + return true; +} + +Status SelectFormat(const std::vector& accepted_formats, + const JxlBasicInfo& basic_info, JxlPixelFormat* format) { + const size_t original_bit_depth = basic_info.bits_per_sample; + size_t current_bit_depth = 0; + size_t num_alpha_channels = (basic_info.alpha_bits != 0 ? 1 : 0); + size_t num_channels = basic_info.num_color_channels + num_alpha_channels; + for (;;) { + for (const JxlPixelFormat& candidate : accepted_formats) { + if (candidate.num_channels != num_channels) continue; + const size_t candidate_bit_depth = + PackedImage::BitsPerChannel(candidate.data_type); + if ( + // Candidate bit depth is less than what we have and still enough + (original_bit_depth <= candidate_bit_depth && + candidate_bit_depth < current_bit_depth) || + // Or larger than the too-small bit depth we currently have + (current_bit_depth < candidate_bit_depth && + current_bit_depth < original_bit_depth)) { + *format = candidate; + current_bit_depth = candidate_bit_depth; + } + } + if (current_bit_depth == 0) { + if (num_channels > basic_info.num_color_channels) { + // Try dropping the alpha channel. + --num_channels; + continue; + } + return JXL_FAILURE("no appropriate format found"); + } + break; + } + if (current_bit_depth < original_bit_depth) { + JXL_WARNING("encoding %" PRIuS "-bit original to %" PRIuS " bits", + original_bit_depth, current_bit_depth); + } + return true; +} + +template +class MetadataEncoder : public Encoder { + public: + std::vector AcceptedFormats() const override { + std::vector formats; + // empty, i.e. no need for actual pixel data + return formats; + } + + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded, + ThreadPool* pool) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + encoded->icc.clear(); + encoded->bitstreams.resize(1); + if (metadata == 0) encoded->bitstreams.front() = ppf.metadata.exif; + if (metadata == 1) encoded->bitstreams.front() = ppf.metadata.xmp; + if (metadata == 2) encoded->bitstreams.front() = ppf.metadata.jumbf; + return true; + } +}; + +std::unique_ptr Encoder::FromExtension(std::string extension) { + std::transform( + extension.begin(), extension.end(), extension.begin(), + [](char c) { return std::tolower(c, std::locale::classic()); }); + if (extension == ".png" || extension == ".apng") return GetAPNGEncoder(); + if (extension == ".jpg") return GetJPEGEncoder(); + if (extension == ".jpeg") return GetJPEGEncoder(); + if (extension == ".npy") return GetNumPyEncoder(); + if (extension == ".pgx") return GetPGXEncoder(); + if (extension == ".pam") return GetPAMEncoder(); + if (extension == ".pgm") return GetPGMEncoder(); + if (extension == ".ppm") return GetPPMEncoder(); + if (extension == ".pfm") return GetPFMEncoder(); + if (extension == ".exr") return GetEXREncoder(); + if (extension == ".exif") return jxl::make_unique>(); + if (extension == ".xmp") return jxl::make_unique>(); + if (extension == ".xml") return jxl::make_unique>(); + if (extension == ".jumbf") return jxl::make_unique>(); + if (extension == ".jumb") return jxl::make_unique>(); + + return nullptr; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/encode.h b/third-party/libjxl/libjxl/lib/extras/enc/encode.h new file mode 100644 index 0000000000..43a02aab6d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/encode.h @@ -0,0 +1,85 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_ENCODE_H_ +#define LIB_EXTRAS_ENC_ENCODE_H_ + +// Facade for image encoders. + +#include +#include + +#include "lib/extras/dec/decode.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace extras { + +struct EncodedImage { + // One (if the format supports animations or the image has only one frame) or + // more sequential bitstreams. + std::vector> bitstreams; + + // For each extra channel one or more sequential bitstreams. + std::vector>> extra_channel_bitstreams; + + std::vector preview_bitstream; + + // If the format does not support embedding color profiles into the bitstreams + // above, it will be present here, to be written as a separate file. If it + // does support them, this field will be empty. + std::vector icc; + + // Additional output for conformance testing, only filled in by NumPyEncoder. + std::vector metadata; +}; + +class Encoder { + public: + static std::unique_ptr FromExtension(std::string extension); + + virtual ~Encoder() = default; + + // Set of pixel formats that this encoder takes as input. + // If empty, the 'encoder' does not need any pixels (it's metadata-only). + virtual std::vector AcceptedFormats() const = 0; + + // Any existing data in encoded_image is discarded. + virtual Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool = nullptr) const = 0; + + void SetOption(std::string name, std::string value) { + options_[std::move(name)] = std::move(value); + } + + static Status VerifyBasicInfo(const JxlBasicInfo& info); + static Status VerifyImageSize(const PackedImage& image, + const JxlBasicInfo& info); + static Status VerifyBitDepth(JxlDataType data_type, uint32_t bits_per_sample, + uint32_t exponent_bits); + + protected: + const std::unordered_map& options() const { + return options_; + } + + Status VerifyFormat(const JxlPixelFormat& format) const; + + Status VerifyPackedImage(const PackedImage& image, + const JxlBasicInfo& info) const; + + private: + std::unordered_map options_; +}; + +// TODO(sboukortt): consider exposing this as part of the C API. +Status SelectFormat(const std::vector& accepted_formats, + const JxlBasicInfo& basic_info, JxlPixelFormat* format); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_ENCODE_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/exr.cc b/third-party/libjxl/libjxl/lib/extras/enc/exr.cc new file mode 100644 index 0000000000..d4005c3097 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/exr.cc @@ -0,0 +1,208 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/exr.h" + +#if JPEGXL_ENABLE_EXR +#include +#include +#include +#include +#endif +#include + +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/byte_order.h" + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_EXR +namespace { + +namespace OpenEXR = OPENEXR_IMF_NAMESPACE; +namespace Imath = IMATH_NAMESPACE; + +// OpenEXR::Int64 is deprecated in favor of using uint64_t directly, but using +// uint64_t as recommended causes build failures with previous OpenEXR versions +// on macOS, where the definition for OpenEXR::Int64 was actually not equivalent +// to uint64_t. This alternative should work in all cases. +using ExrInt64 = decltype(std::declval().tellg()); + +class InMemoryOStream : public OpenEXR::OStream { + public: + // `bytes` must outlive the InMemoryOStream. + explicit InMemoryOStream(std::vector* const bytes) + : OStream(/*fileName=*/""), bytes_(*bytes) {} + + void write(const char c[], const int n) override { + if (bytes_.size() < pos_ + n) { + bytes_.resize(pos_ + n); + } + std::copy_n(c, n, bytes_.begin() + pos_); + pos_ += n; + } + + ExrInt64 tellp() override { return pos_; } + void seekp(const ExrInt64 pos) override { + if (bytes_.size() + 1 < pos) { + bytes_.resize(pos - 1); + } + pos_ = pos; + } + + private: + std::vector& bytes_; + size_t pos_ = 0; +}; + +// Loads a Big-Endian float +float LoadBEFloat(const uint8_t* p) { + uint32_t u = LoadBE32(p); + float result; + memcpy(&result, &u, 4); + return result; +} + +// Loads a Little-Endian float +float LoadLEFloat(const uint8_t* p) { + uint32_t u = LoadLE32(p); + float result; + memcpy(&result, &u, 4); + return result; +} + +Status EncodeImageEXR(const PackedImage& image, const JxlBasicInfo& info, + const JxlColorEncoding& c_enc, ThreadPool* pool, + std::vector* bytes) { + OpenEXR::setGlobalThreadCount(0); + + const size_t xsize = info.xsize; + const size_t ysize = info.ysize; + const bool has_alpha = info.alpha_bits > 0; + const bool alpha_is_premultiplied = info.alpha_premultiplied; + + if (info.num_color_channels != 3 || + c_enc.color_space != JXL_COLOR_SPACE_RGB || + c_enc.transfer_function != JXL_TRANSFER_FUNCTION_LINEAR) { + return JXL_FAILURE("Unsupported color encoding for OpenEXR output."); + } + + const size_t num_channels = 3 + (has_alpha ? 1 : 0); + const JxlPixelFormat format = image.format; + + if (format.data_type != JXL_TYPE_FLOAT) { + return JXL_FAILURE("Unsupported pixel format for OpenEXR output"); + } + + const uint8_t* in = reinterpret_cast(image.pixels()); + size_t in_stride = num_channels * 4 * xsize; + + OpenEXR::Header header(xsize, ysize); + OpenEXR::Chromaticities chromaticities; + chromaticities.red = + Imath::V2f(c_enc.primaries_red_xy[0], c_enc.primaries_red_xy[1]); + chromaticities.green = + Imath::V2f(c_enc.primaries_green_xy[0], c_enc.primaries_green_xy[1]); + chromaticities.blue = + Imath::V2f(c_enc.primaries_blue_xy[0], c_enc.primaries_blue_xy[1]); + chromaticities.white = + Imath::V2f(c_enc.white_point_xy[0], c_enc.white_point_xy[1]); + OpenEXR::addChromaticities(header, chromaticities); + OpenEXR::addWhiteLuminance(header, info.intensity_target); + + auto loadFloat = + format.endianness == JXL_BIG_ENDIAN ? LoadBEFloat : LoadLEFloat; + auto loadAlpha = + has_alpha ? loadFloat : [](const uint8_t* p) -> float { return 1.0f; }; + + // Ensure that the destructor of RgbaOutputFile has run before we look at the + // size of `bytes`. + { + InMemoryOStream os(bytes); + OpenEXR::RgbaOutputFile output( + os, header, has_alpha ? OpenEXR::WRITE_RGBA : OpenEXR::WRITE_RGB); + // How many rows to write at once. Again, the OpenEXR documentation + // recommends writing the whole image in one call. + const int y_chunk_size = ysize; + std::vector output_rows(xsize * y_chunk_size); + + for (size_t start_y = 0; start_y < ysize; start_y += y_chunk_size) { + // Inclusive. + const size_t end_y = std::min(start_y + y_chunk_size - 1, ysize - 1); + output.setFrameBuffer(output_rows.data() - start_y * xsize, + /*xStride=*/1, /*yStride=*/xsize); + for (size_t y = start_y; y <= end_y; ++y) { + const uint8_t* in_row = &in[(y - start_y) * in_stride]; + OpenEXR::Rgba* const JXL_RESTRICT row_data = + &output_rows[(y - start_y) * xsize]; + for (size_t x = 0; x < xsize; ++x) { + const uint8_t* in_pixel = &in_row[4 * num_channels * x]; + float r = loadFloat(&in_pixel[0]); + float g = loadFloat(&in_pixel[4]); + float b = loadFloat(&in_pixel[8]); + const float alpha = loadAlpha(&in_pixel[12]); + if (!alpha_is_premultiplied) { + r *= alpha; + g *= alpha; + b *= alpha; + } + row_data[x] = OpenEXR::Rgba(r, g, b, alpha); + } + } + output.writePixels(/*numScanLines=*/end_y - start_y + 1); + } + } + + return true; +} + +class EXREncoder : public Encoder { + std::vector AcceptedFormats() const override { + std::vector formats; + for (const uint32_t num_channels : {1, 2, 3, 4}) { + for (const JxlDataType data_type : {JXL_TYPE_FLOAT}) { + for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) { + formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels, + /*data_type=*/data_type, + /*endianness=*/endianness, + /*align=*/0}); + } + } + } + return formats; + } + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool = nullptr) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + encoded_image->icc.clear(); + encoded_image->bitstreams.clear(); + encoded_image->bitstreams.reserve(ppf.frames.size()); + for (const auto& frame : ppf.frames) { + JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info)); + encoded_image->bitstreams.emplace_back(); + JXL_RETURN_IF_ERROR(EncodeImageEXR(frame.color, ppf.info, + ppf.color_encoding, pool, + &encoded_image->bitstreams.back())); + } + return true; + } +}; + +} // namespace +#endif + +std::unique_ptr GetEXREncoder() { +#if JPEGXL_ENABLE_EXR + return jxl::make_unique(); +#else + return nullptr; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/exr.h b/third-party/libjxl/libjxl/lib/extras/enc/exr.h new file mode 100644 index 0000000000..1baaa0272f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/exr.h @@ -0,0 +1,23 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_EXR_H_ +#define LIB_EXTRAS_ENC_EXR_H_ + +// Encodes OpenEXR images in memory. + +#include + +#include "lib/extras/enc/encode.h" + +namespace jxl { +namespace extras { + +std::unique_ptr GetEXREncoder(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_EXR_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpegli.cc b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.cc new file mode 100644 index 0000000000..c0b0798c52 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.cc @@ -0,0 +1,526 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/jpegli.h" + +#include +#include +#include + +#include "lib/extras/enc/encode.h" +#include "lib/jpegli/encode.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_xyb.h" + +namespace jxl { +namespace extras { + +namespace { + +void MyErrorExit(j_common_ptr cinfo) { + jmp_buf* env = static_cast(cinfo->client_data); + (*cinfo->err->output_message)(cinfo); + jpegli_destroy_compress(reinterpret_cast(cinfo)); + longjmp(*env, 1); +} + +Status VerifyInput(const PackedPixelFile& ppf) { + const JxlBasicInfo& info = ppf.info; + JXL_RETURN_IF_ERROR(Encoder::VerifyBasicInfo(info)); + if (ppf.frames.size() != 1) { + return JXL_FAILURE("JPEG input must have exactly one frame."); + } + const PackedImage& image = ppf.frames[0].color; + JXL_RETURN_IF_ERROR(Encoder::VerifyImageSize(image, info)); + if (image.format.data_type == JXL_TYPE_FLOAT16) { + return JXL_FAILURE("FLOAT16 input is not supported."); + } + JXL_RETURN_IF_ERROR(Encoder::VerifyBitDepth(image.format.data_type, + info.bits_per_sample, + info.exponent_bits_per_sample)); + if ((image.format.data_type == JXL_TYPE_UINT8 && info.bits_per_sample != 8) || + (image.format.data_type == JXL_TYPE_UINT16 && + info.bits_per_sample != 16)) { + return JXL_FAILURE("Only full bit depth unsigned types are supported."); + } + return true; +} + +Status GetColorEncoding(const PackedPixelFile& ppf, const JxlCmsInterface* cms, + ColorEncoding* color_encoding) { + if (!ppf.icc.empty()) { + PaddedBytes icc; + icc.assign(ppf.icc.data(), ppf.icc.data() + ppf.icc.size()); + JXL_RETURN_IF_ERROR(color_encoding->SetICC(std::move(icc), cms)); + } else { + JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding( + ppf.color_encoding, color_encoding)); + } + if (color_encoding->ICC().empty()) { + return JXL_FAILURE("Invalid color encoding."); + } + return true; +} + +bool HasICCProfile(const std::vector& app_data) { + size_t pos = 0; + while (pos < app_data.size()) { + if (pos + 16 > app_data.size()) return false; + uint8_t marker = app_data[pos + 1]; + size_t marker_len = (app_data[pos + 2] << 8) + app_data[pos + 3] + 2; + if (marker == 0xe2 && memcmp(&app_data[pos + 4], "ICC_PROFILE", 12) == 0) { + return true; + } + pos += marker_len; + } + return false; +} + +Status WriteAppData(j_compress_ptr cinfo, + const std::vector& app_data) { + size_t pos = 0; + while (pos < app_data.size()) { + if (pos + 4 > app_data.size()) { + return JXL_FAILURE("Incomplete APP header."); + } + uint8_t marker = app_data[pos + 1]; + size_t marker_len = (app_data[pos + 2] << 8) + app_data[pos + 3] + 2; + if (app_data[pos] != 0xff || marker < 0xe0 || marker > 0xef) { + return JXL_FAILURE("Invalid APP marker %02x %02x", app_data[pos], marker); + } + if (marker_len <= 4) { + return JXL_FAILURE("Invalid APP marker length."); + } + if (pos + marker_len > app_data.size()) { + return JXL_FAILURE("Incomplete APP data"); + } + jpegli_write_marker(cinfo, marker, &app_data[pos + 4], marker_len - 4); + pos += marker_len; + } + return true; +} + +static constexpr int kICCMarker = 0xe2; +constexpr unsigned char kICCSignature[12] = { + 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00}; +static constexpr uint8_t kUnknownTf = 2; +static constexpr unsigned char kCICPTagSignature[4] = {0x63, 0x69, 0x63, 0x70}; +static constexpr size_t kCICPTagSize = 12; + +bool FindCICPTag(const uint8_t* icc_data, size_t len, bool is_first_chunk, + size_t* cicp_offset, size_t* cicp_length, uint8_t* cicp_tag, + size_t* cicp_pos) { + if (is_first_chunk) { + // Look up the offset of the CICP tag from the first chunk of ICC data. + if (len < 132) { + return false; + } + uint32_t tag_count = LoadBE32(&icc_data[128]); + if (len < 132 + 12 * tag_count) { + return false; + } + for (uint32_t i = 0; i < tag_count; ++i) { + if (memcmp(&icc_data[132 + 12 * i], kCICPTagSignature, 4) == 0) { + *cicp_offset = LoadBE32(&icc_data[136 + 12 * i]); + *cicp_length = LoadBE32(&icc_data[140 + 12 * i]); + } + } + if (*cicp_length < kCICPTagSize) { + return false; + } + } + if (*cicp_offset < len) { + size_t n_bytes = std::min(len - *cicp_offset, kCICPTagSize - *cicp_pos); + memcpy(&cicp_tag[*cicp_pos], &icc_data[*cicp_offset], n_bytes); + *cicp_pos += n_bytes; + *cicp_offset = 0; + } else { + *cicp_offset -= len; + } + return true; +} + +uint8_t LookupCICPTransferFunctionFromAppData(const uint8_t* app_data, + size_t len) { + size_t last_index = 0; + size_t cicp_offset = 0; + size_t cicp_length = 0; + uint8_t cicp_tag[kCICPTagSize] = {}; + size_t cicp_pos = 0; + size_t pos = 0; + while (pos < len) { + const uint8_t* marker = &app_data[pos]; + if (pos + 4 > len) { + return kUnknownTf; + } + size_t marker_size = (marker[2] << 8) + marker[3] + 2; + if (pos + marker_size > len) { + return kUnknownTf; + } + if (marker_size < 18 || marker[0] != 0xff || marker[1] != kICCMarker || + memcmp(&marker[4], kICCSignature, 12) != 0) { + pos += marker_size; + continue; + } + uint8_t index = marker[16]; + uint8_t total = marker[17]; + const uint8_t* payload = marker + 18; + const size_t payload_size = marker_size - 18; + if (index != last_index + 1 || index > total) { + return kUnknownTf; + } + if (!FindCICPTag(payload, payload_size, last_index == 0, &cicp_offset, + &cicp_length, &cicp_tag[0], &cicp_pos)) { + return kUnknownTf; + } + if (cicp_pos == kCICPTagSize) { + break; + } + ++last_index; + } + if (cicp_pos >= kCICPTagSize && memcmp(cicp_tag, kCICPTagSignature, 4) == 0) { + return cicp_tag[9]; + } + return kUnknownTf; +} + +uint8_t LookupCICPTransferFunctionFromICCProfile(const uint8_t* icc_data, + size_t len) { + size_t cicp_offset = 0; + size_t cicp_length = 0; + uint8_t cicp_tag[kCICPTagSize] = {}; + size_t cicp_pos = 0; + if (!FindCICPTag(icc_data, len, true, &cicp_offset, &cicp_length, + &cicp_tag[0], &cicp_pos)) { + return kUnknownTf; + } + if (cicp_pos >= kCICPTagSize && memcmp(cicp_tag, kCICPTagSignature, 4) == 0) { + return cicp_tag[9]; + } + return kUnknownTf; +} + +JpegliDataType ConvertDataType(JxlDataType type) { + switch (type) { + case JXL_TYPE_UINT8: + return JPEGLI_TYPE_UINT8; + case JXL_TYPE_UINT16: + return JPEGLI_TYPE_UINT16; + case JXL_TYPE_FLOAT: + return JPEGLI_TYPE_FLOAT; + default: + return JPEGLI_TYPE_UINT8; + } +} + +JpegliEndianness ConvertEndianness(JxlEndianness endianness) { + switch (endianness) { + case JXL_NATIVE_ENDIAN: + return JPEGLI_NATIVE_ENDIAN; + case JXL_LITTLE_ENDIAN: + return JPEGLI_LITTLE_ENDIAN; + case JXL_BIG_ENDIAN: + return JPEGLI_BIG_ENDIAN; + default: + return JPEGLI_NATIVE_ENDIAN; + } +} + +void ToFloatRow(const uint8_t* row_in, JxlPixelFormat format, size_t len, + float* row_out) { + bool is_little_endian = + (format.endianness == JXL_LITTLE_ENDIAN || + (format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian())); + static constexpr double kMul8 = 1.0 / 255.0; + static constexpr double kMul16 = 1.0 / 65535.0; + if (format.data_type == JXL_TYPE_UINT8) { + for (size_t x = 0; x < len; ++x) { + row_out[x] = row_in[x] * kMul8; + } + } else if (format.data_type == JXL_TYPE_UINT16 && is_little_endian) { + for (size_t x = 0; x < len; ++x) { + row_out[x] = LoadLE16(&row_in[2 * x]) * kMul16; + } + } else if (format.data_type == JXL_TYPE_UINT16 && !is_little_endian) { + for (size_t x = 0; x < len; ++x) { + row_out[x] = LoadBE16(&row_in[2 * x]) * kMul16; + } + } else if (format.data_type == JXL_TYPE_FLOAT && is_little_endian) { + for (size_t x = 0; x < len; ++x) { + row_out[x] = LoadLEFloat(&row_in[4 * x]); + } + } else if (format.data_type == JXL_TYPE_FLOAT && !is_little_endian) { + for (size_t x = 0; x < len; ++x) { + row_out[x] = LoadBEFloat(&row_in[4 * x]); + } + } +} + +Status EncodeJpegToTargetSize(const PackedPixelFile& ppf, + const JpegSettings& jpeg_settings, + size_t target_size, ThreadPool* pool, + std::vector* output) { + output->clear(); + size_t best_error = std::numeric_limits::max(); + float distance0 = -1.0f; + float distance1 = -1.0f; + float distance = 1.0f; + for (int step = 0; step < 15; ++step) { + JpegSettings settings = jpeg_settings; + settings.libjpeg_quality = 0; + settings.distance = distance; + settings.target_size = 0; + std::vector compressed; + JXL_RETURN_IF_ERROR(EncodeJpeg(ppf, settings, pool, &compressed)); + size_t size = compressed.size(); + // prefer being under the target size to being over it + size_t error = size < target_size + ? target_size - size + : static_cast(1.2f * (size - target_size)); + if (error < best_error) { + best_error = error; + std::swap(*output, compressed); + } + float rel_error = size * 1.0f / target_size; + if (std::abs(rel_error - 1.0f) < 0.002f) { + break; + } + if (size < target_size) { + distance1 = distance; + } else { + distance0 = distance; + } + if (distance1 == -1) { + distance *= std::pow(rel_error, 1.5) * 1.05; + } else if (distance0 == -1) { + distance *= std::pow(rel_error, 1.5) * 0.95; + } else { + distance = 0.5 * (distance0 + distance1); + } + } + return true; +} + +} // namespace + +Status EncodeJpeg(const PackedPixelFile& ppf, const JpegSettings& jpeg_settings, + ThreadPool* pool, std::vector* compressed) { + if (jpeg_settings.libjpeg_quality > 0) { + auto encoder = Encoder::FromExtension(".jpg"); + encoder->SetOption("q", std::to_string(jpeg_settings.libjpeg_quality)); + if (!jpeg_settings.libjpeg_chroma_subsampling.empty()) { + encoder->SetOption("chroma_subsampling", + jpeg_settings.libjpeg_chroma_subsampling); + } + EncodedImage encoded; + JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded, pool)); + size_t target_size = encoded.bitstreams[0].size(); + return EncodeJpegToTargetSize(ppf, jpeg_settings, target_size, pool, + compressed); + } + if (jpeg_settings.target_size > 0) { + return EncodeJpegToTargetSize(ppf, jpeg_settings, jpeg_settings.target_size, + pool, compressed); + } + JXL_RETURN_IF_ERROR(VerifyInput(ppf)); + + const JxlCmsInterface& cms = GetJxlCms(); + + ColorEncoding color_encoding; + JXL_RETURN_IF_ERROR(GetColorEncoding(ppf, &cms, &color_encoding)); + + ColorSpaceTransform c_transform(cms); + ColorEncoding xyb_encoding; + if (jpeg_settings.xyb) { + if (ppf.info.num_color_channels != 3) { + return JXL_FAILURE("Only RGB input is supported in XYB mode."); + } + if (HasICCProfile(jpeg_settings.app_data)) { + return JXL_FAILURE("APP data ICC profile is not supported in XYB mode."); + } + const ColorEncoding& c_desired = ColorEncoding::LinearSRGB(false); + JXL_RETURN_IF_ERROR( + c_transform.Init(color_encoding, c_desired, 255.0f, ppf.info.xsize, 1)); + xyb_encoding.SetColorSpace(jxl::ColorSpace::kXYB); + xyb_encoding.rendering_intent = jxl::RenderingIntent::kPerceptual; + JXL_RETURN_IF_ERROR(xyb_encoding.CreateICC()); + } + const ColorEncoding& output_encoding = + jpeg_settings.xyb ? xyb_encoding : color_encoding; + + // We need to declare all the non-trivial destructor local variables + // before the call to setjmp(). + std::vector pixels; + unsigned char* output_buffer = nullptr; + unsigned long output_size = 0; + std::vector row_bytes; + size_t rowlen = RoundUpTo(ppf.info.xsize, VectorSize()); + hwy::AlignedFreeUniquePtr xyb_tmp = + hwy::AllocateAligned(6 * rowlen); + hwy::AlignedFreeUniquePtr premul_absorb = + hwy::AllocateAligned(VectorSize() * 12); + ComputePremulAbsorb(255.0f, premul_absorb.get()); + + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpegli_std_error(&jerr); + jerr.error_exit = &MyErrorExit; + if (setjmp(env)) { + return false; + } + cinfo.client_data = static_cast(&env); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &output_buffer, &output_size); + const JxlBasicInfo& info = ppf.info; + cinfo.image_width = info.xsize; + cinfo.image_height = info.ysize; + cinfo.input_components = info.num_color_channels; + cinfo.in_color_space = + cinfo.input_components == 1 ? JCS_GRAYSCALE : JCS_RGB; + if (jpeg_settings.xyb) { + jpegli_set_xyb_mode(&cinfo); + } else if (jpeg_settings.use_std_quant_tables) { + jpegli_use_standard_quant_tables(&cinfo); + } + uint8_t cicp_tf = kUnknownTf; + if (!jpeg_settings.app_data.empty()) { + cicp_tf = LookupCICPTransferFunctionFromAppData( + jpeg_settings.app_data.data(), jpeg_settings.app_data.size()); + } else if (!output_encoding.IsSRGB()) { + cicp_tf = LookupCICPTransferFunctionFromICCProfile( + output_encoding.ICC().data(), output_encoding.ICC().size()); + } + jpegli_set_cicp_transfer_function(&cinfo, cicp_tf); + jpegli_set_defaults(&cinfo); + if (!jpeg_settings.chroma_subsampling.empty()) { + if (jpeg_settings.chroma_subsampling == "444") { + cinfo.comp_info[0].h_samp_factor = 1; + cinfo.comp_info[0].v_samp_factor = 1; + } else if (jpeg_settings.chroma_subsampling == "440") { + cinfo.comp_info[0].h_samp_factor = 1; + cinfo.comp_info[0].v_samp_factor = 2; + } else if (jpeg_settings.chroma_subsampling == "422") { + cinfo.comp_info[0].h_samp_factor = 2; + cinfo.comp_info[0].v_samp_factor = 1; + } else if (jpeg_settings.chroma_subsampling == "420") { + cinfo.comp_info[0].h_samp_factor = 2; + cinfo.comp_info[0].v_samp_factor = 2; + } else { + return false; + } + for (int i = 1; i < cinfo.num_components; ++i) { + cinfo.comp_info[i].h_samp_factor = 1; + cinfo.comp_info[i].v_samp_factor = 1; + } + } + jpegli_enable_adaptive_quantization( + &cinfo, jpeg_settings.use_adaptive_quantization); + if (jpeg_settings.psnr_target > 0.0) { + jpegli_set_psnr(&cinfo, jpeg_settings.psnr_target, + jpeg_settings.search_tolerance, + jpeg_settings.min_distance, jpeg_settings.max_distance); + } else if (jpeg_settings.quality > 0.0) { + float distance = jpegli_quality_to_distance(jpeg_settings.quality); + jpegli_set_distance(&cinfo, distance, TRUE); + } else { + jpegli_set_distance(&cinfo, jpeg_settings.distance, TRUE); + } + jpegli_set_progressive_level(&cinfo, jpeg_settings.progressive_level); + cinfo.optimize_coding = jpeg_settings.optimize_coding; + if (!jpeg_settings.app_data.empty()) { + // Make sure jpegli_start_compress() does not write any APP markers. + cinfo.write_JFIF_header = false; + cinfo.write_Adobe_marker = false; + } + const PackedImage& image = ppf.frames[0].color; + if (jpeg_settings.xyb) { + jpegli_set_input_format(&cinfo, JPEGLI_TYPE_FLOAT, JPEGLI_NATIVE_ENDIAN); + } else { + jpegli_set_input_format(&cinfo, ConvertDataType(image.format.data_type), + ConvertEndianness(image.format.endianness)); + } + jpegli_start_compress(&cinfo, TRUE); + if (!jpeg_settings.app_data.empty()) { + JXL_RETURN_IF_ERROR(WriteAppData(&cinfo, jpeg_settings.app_data)); + } + if ((jpeg_settings.app_data.empty() && !output_encoding.IsSRGB()) || + jpeg_settings.xyb) { + jpegli_write_icc_profile(&cinfo, output_encoding.ICC().data(), + output_encoding.ICC().size()); + } + const uint8_t* pixels = reinterpret_cast(image.pixels()); + if (jpeg_settings.xyb) { + float* src_buf = c_transform.BufSrc(0); + float* dst_buf = c_transform.BufDst(0); + for (size_t y = 0; y < image.ysize; ++y) { + // convert to float + ToFloatRow(&pixels[y * image.stride], image.format, 3 * image.xsize, + src_buf); + // convert to linear srgb + if (!c_transform.Run(0, src_buf, dst_buf)) { + return false; + } + // deinterleave channels + float* row0 = &xyb_tmp[0]; + float* row1 = &xyb_tmp[rowlen]; + float* row2 = &xyb_tmp[2 * rowlen]; + for (size_t x = 0; x < image.xsize; ++x) { + row0[x] = dst_buf[3 * x + 0]; + row1[x] = dst_buf[3 * x + 1]; + row2[x] = dst_buf[3 * x + 2]; + } + // convert to xyb + LinearRGBRowToXYB(row0, row1, row2, premul_absorb.get(), image.xsize); + // scale xyb + ScaleXYBRow(row0, row1, row2, image.xsize); + // interleave channels + float* row_out = &xyb_tmp[3 * rowlen]; + for (size_t x = 0; x < image.xsize; ++x) { + row_out[3 * x + 0] = row0[x]; + row_out[3 * x + 1] = row1[x]; + row_out[3 * x + 2] = row2[x]; + } + // feed to jpegli as native endian floats + JSAMPROW row[] = {reinterpret_cast(row_out)}; + jpegli_write_scanlines(&cinfo, row, 1); + } + } else { + row_bytes.resize(image.stride); + if (cinfo.num_components == (int)image.format.num_channels) { + for (size_t y = 0; y < info.ysize; ++y) { + memcpy(&row_bytes[0], pixels + y * image.stride, image.stride); + JSAMPROW row[] = {row_bytes.data()}; + jpegli_write_scanlines(&cinfo, row, 1); + } + } else { + for (size_t y = 0; y < info.ysize; ++y) { + int bytes_per_channel = + PackedImage::BitsPerChannel(image.format.data_type) / 8; + int bytes_per_pixel = cinfo.num_components * bytes_per_channel; + for (size_t x = 0; x < info.xsize; ++x) { + memcpy(&row_bytes[x * bytes_per_pixel], + &pixels[y * image.stride + x * image.pixel_stride()], + bytes_per_pixel); + } + JSAMPROW row[] = {row_bytes.data()}; + jpegli_write_scanlines(&cinfo, row, 1); + } + } + } + jpegli_finish_compress(&cinfo); + compressed->resize(output_size); + std::copy_n(output_buffer, output_size, compressed->data()); + return true; + }; + bool success = try_catch_block(); + jpegli_destroy_compress(&cinfo); + if (output_buffer) free(output_buffer); + return success; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpegli.h b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.h new file mode 100644 index 0000000000..9538b2e3fc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/jpegli.h @@ -0,0 +1,53 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_JPEGLI_H_ +#define LIB_EXTRAS_ENC_JPEGLI_H_ + +// Encodes JPG pixels and metadata in memory using the libjpegli library. + +#include + +#include +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace extras { + +struct JpegSettings { + bool xyb = false; + size_t target_size = 0; + float quality = 0.0f; + float distance = 1.f; + bool use_adaptive_quantization = true; + bool use_std_quant_tables = false; + int progressive_level = 2; + bool optimize_coding = true; + std::string chroma_subsampling; + int libjpeg_quality = 0; + std::string libjpeg_chroma_subsampling; + // Parameters for selecting distance based on PSNR target. + float psnr_target = 0.0f; + float search_tolerance = 0.01; + float min_distance = 0.1f; + float max_distance = 25.0f; + // If not empty, must contain concatenated APP marker segments. In this case, + // these and only these APP marker segments will be written to the JPEG + // output. In xyb mode app_data must not contain an ICC profile, in this + // case an additional APP2 ICC profile for the XYB colorspace will be emitted. + std::vector app_data; +}; + +Status EncodeJpeg(const PackedPixelFile& ppf, const JpegSettings& jpeg_settings, + ThreadPool* pool, std::vector* compressed); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_JPEGLI_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpg.cc b/third-party/libjxl/libjxl/lib/extras/enc/jpg.cc new file mode 100644 index 0000000000..f1355bbcb7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/jpg.cc @@ -0,0 +1,630 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/jpg.h" + +#if JPEGXL_ENABLE_JPEG +#include +#include +#endif +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lib/extras/exif.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" +#if JPEGXL_ENABLE_SJPEG +#include "sjpeg.h" +#include "sjpegi.h" +#endif + +namespace jxl { +namespace extras { + +#if JPEGXL_ENABLE_JPEG +namespace { + +constexpr unsigned char kICCSignature[12] = { + 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00}; +constexpr int kICCMarker = JPEG_APP0 + 2; +constexpr size_t kMaxBytesInMarker = 65533; + +constexpr unsigned char kExifSignature[6] = {0x45, 0x78, 0x69, + 0x66, 0x00, 0x00}; +constexpr int kExifMarker = JPEG_APP0 + 1; + +enum class JpegEncoder { + kLibJpeg, + kSJpeg, +}; + +#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0])) + +// Popular jpeg scan scripts +// The fields of the individual scans are: +// comps_in_scan, component_index[], Ss, Se, Ah, Al +static constexpr jpeg_scan_info kScanScript1[] = { + {1, {0}, 0, 0, 0, 0}, // + {1, {1}, 0, 0, 0, 0}, // + {1, {2}, 0, 0, 0, 0}, // + {1, {0}, 1, 8, 0, 0}, // + {1, {0}, 9, 63, 0, 0}, // + {1, {1}, 1, 63, 0, 0}, // + {1, {2}, 1, 63, 0, 0}, // +}; +static constexpr size_t kNumScans1 = ARRAY_SIZE(kScanScript1); + +static constexpr jpeg_scan_info kScanScript2[] = { + {1, {0}, 0, 0, 0, 0}, // + {1, {1}, 0, 0, 0, 0}, // + {1, {2}, 0, 0, 0, 0}, // + {1, {0}, 1, 2, 0, 1}, // + {1, {0}, 3, 63, 0, 1}, // + {1, {0}, 1, 63, 1, 0}, // + {1, {1}, 1, 63, 0, 0}, // + {1, {2}, 1, 63, 0, 0}, // +}; +static constexpr size_t kNumScans2 = ARRAY_SIZE(kScanScript2); + +static constexpr jpeg_scan_info kScanScript3[] = { + {1, {0}, 0, 0, 0, 0}, // + {1, {1}, 0, 0, 0, 0}, // + {1, {2}, 0, 0, 0, 0}, // + {1, {0}, 1, 63, 0, 2}, // + {1, {0}, 1, 63, 2, 1}, // + {1, {0}, 1, 63, 1, 0}, // + {1, {1}, 1, 63, 0, 0}, // + {1, {2}, 1, 63, 0, 0}, // +}; +static constexpr size_t kNumScans3 = ARRAY_SIZE(kScanScript3); + +static constexpr jpeg_scan_info kScanScript4[] = { + {3, {0, 1, 2}, 0, 0, 0, 1}, // + {1, {0}, 1, 5, 0, 2}, // + {1, {2}, 1, 63, 0, 1}, // + {1, {1}, 1, 63, 0, 1}, // + {1, {0}, 6, 63, 0, 2}, // + {1, {0}, 1, 63, 2, 1}, // + {3, {0, 1, 2}, 0, 0, 1, 0}, // + {1, {2}, 1, 63, 1, 0}, // + {1, {1}, 1, 63, 1, 0}, // + {1, {0}, 1, 63, 1, 0}, // +}; +static constexpr size_t kNumScans4 = ARRAY_SIZE(kScanScript4); + +static constexpr jpeg_scan_info kScanScript5[] = { + {3, {0, 1, 2}, 0, 0, 0, 1}, // + {1, {0}, 1, 5, 0, 2}, // + {1, {1}, 1, 5, 0, 2}, // + {1, {2}, 1, 5, 0, 2}, // + {1, {1}, 6, 63, 0, 2}, // + {1, {2}, 6, 63, 0, 2}, // + {1, {0}, 6, 63, 0, 2}, // + {1, {0}, 1, 63, 2, 1}, // + {1, {1}, 1, 63, 2, 1}, // + {1, {2}, 1, 63, 2, 1}, // + {3, {0, 1, 2}, 0, 0, 1, 0}, // + {1, {0}, 1, 63, 1, 0}, // + {1, {1}, 1, 63, 1, 0}, // + {1, {2}, 1, 63, 1, 0}, // +}; +static constexpr size_t kNumScans5 = ARRAY_SIZE(kScanScript5); + +// default progressive mode of jpegli +static constexpr jpeg_scan_info kScanScript6[] = { + {3, {0, 1, 2}, 0, 0, 0, 0}, // + {1, {0}, 1, 2, 0, 0}, // + {1, {1}, 1, 2, 0, 0}, // + {1, {2}, 1, 2, 0, 0}, // + {1, {0}, 3, 63, 0, 2}, // + {1, {1}, 3, 63, 0, 2}, // + {1, {2}, 3, 63, 0, 2}, // + {1, {0}, 3, 63, 2, 1}, // + {1, {1}, 3, 63, 2, 1}, // + {1, {2}, 3, 63, 2, 1}, // + {1, {0}, 3, 63, 1, 0}, // + {1, {1}, 3, 63, 1, 0}, // + {1, {2}, 3, 63, 1, 0}, // +}; +static constexpr size_t kNumScans6 = ARRAY_SIZE(kScanScript6); + +// Adapt RGB scan info to grayscale jpegs. +void FilterScanComponents(const jpeg_compress_struct* cinfo, + jpeg_scan_info* si) { + const int all_comps_in_scan = si->comps_in_scan; + si->comps_in_scan = 0; + for (int j = 0; j < all_comps_in_scan; ++j) { + const int component = si->component_index[j]; + if (component < cinfo->input_components) { + si->component_index[si->comps_in_scan++] = component; + } + } +} + +Status SetJpegProgression(int progressive_id, + std::vector* scan_infos, + jpeg_compress_struct* cinfo) { + if (progressive_id < 0) { + return true; + } + if (progressive_id == 0) { + jpeg_simple_progression(cinfo); + return true; + } + constexpr const jpeg_scan_info* kScanScripts[] = {kScanScript1, kScanScript2, + kScanScript3, kScanScript4, + kScanScript5, kScanScript6}; + constexpr size_t kNumScans[] = {kNumScans1, kNumScans2, kNumScans3, + kNumScans4, kNumScans5, kNumScans6}; + if (progressive_id > static_cast(ARRAY_SIZE(kNumScans))) { + return JXL_FAILURE("Unknown jpeg scan script id %d", progressive_id); + } + const jpeg_scan_info* scan_script = kScanScripts[progressive_id - 1]; + const size_t num_scans = kNumScans[progressive_id - 1]; + // filter scan script for number of components + for (size_t i = 0; i < num_scans; ++i) { + jpeg_scan_info scan_info = scan_script[i]; + FilterScanComponents(cinfo, &scan_info); + if (scan_info.comps_in_scan > 0) { + scan_infos->emplace_back(std::move(scan_info)); + } + } + cinfo->scan_info = scan_infos->data(); + cinfo->num_scans = scan_infos->size(); + return true; +} + +bool IsSRGBEncoding(const JxlColorEncoding& c) { + return ((c.color_space == JXL_COLOR_SPACE_RGB || + c.color_space == JXL_COLOR_SPACE_GRAY) && + c.primaries == JXL_PRIMARIES_SRGB && + c.white_point == JXL_WHITE_POINT_D65 && + c.transfer_function == JXL_TRANSFER_FUNCTION_SRGB); +} + +void WriteICCProfile(jpeg_compress_struct* const cinfo, + const std::vector& icc) { + constexpr size_t kMaxIccBytesInMarker = + kMaxBytesInMarker - sizeof kICCSignature - 2; + const int num_markers = + static_cast(DivCeil(icc.size(), kMaxIccBytesInMarker)); + size_t begin = 0; + for (int current_marker = 0; current_marker < num_markers; ++current_marker) { + const size_t length = std::min(kMaxIccBytesInMarker, icc.size() - begin); + jpeg_write_m_header( + cinfo, kICCMarker, + static_cast(length + sizeof kICCSignature + 2)); + for (const unsigned char c : kICCSignature) { + jpeg_write_m_byte(cinfo, c); + } + jpeg_write_m_byte(cinfo, current_marker + 1); + jpeg_write_m_byte(cinfo, num_markers); + for (size_t i = 0; i < length; ++i) { + jpeg_write_m_byte(cinfo, icc[begin]); + ++begin; + } + } +} +void WriteExif(jpeg_compress_struct* const cinfo, + const std::vector& exif) { + jpeg_write_m_header( + cinfo, kExifMarker, + static_cast(exif.size() + sizeof kExifSignature)); + for (const unsigned char c : kExifSignature) { + jpeg_write_m_byte(cinfo, c); + } + for (size_t i = 0; i < exif.size(); ++i) { + jpeg_write_m_byte(cinfo, exif[i]); + } +} + +Status SetChromaSubsampling(const std::string& subsampling, + jpeg_compress_struct* const cinfo) { + const std::pair, std::array>> + options[] = {{"444", {{{1, 1, 1}}, {{1, 1, 1}}}}, + {"420", {{{2, 1, 1}}, {{2, 1, 1}}}}, + {"422", {{{2, 1, 1}}, {{1, 1, 1}}}}, + {"440", {{{1, 1, 1}}, {{2, 1, 1}}}}}; + for (const auto& option : options) { + if (subsampling == option.first) { + for (size_t i = 0; i < 3; i++) { + cinfo->comp_info[i].h_samp_factor = option.second.first[i]; + cinfo->comp_info[i].v_samp_factor = option.second.second[i]; + } + return true; + } + } + return false; +} + +struct JpegParams { + // Common between sjpeg and libjpeg + int quality = 100; + std::string chroma_subsampling = "444"; + // Libjpeg parameters + int progressive_id = -1; + bool optimize_coding = true; + bool is_xyb = false; + // Sjpeg parameters + int libjpeg_quality = 0; + std::string libjpeg_chroma_subsampling = "444"; + float psnr_target = 0; + std::string custom_base_quant_fn; + float search_q_start = 65.0f; + float search_q_min = 1.0f; + float search_q_max = 100.0f; + int search_max_iters = 20; + float search_tolerance = 0.1f; + float search_q_precision = 0.01f; + float search_first_iter_slope = 3.0f; + bool enable_adaptive_quant = true; +}; + +Status EncodeWithLibJpeg(const PackedImage& image, const JxlBasicInfo& info, + const std::vector& icc, + std::vector exif, const JpegParams& params, + std::vector* bytes) { + if (BITS_IN_JSAMPLE != 8 || sizeof(JSAMPLE) != 1) { + return JXL_FAILURE("Only 8 bit JSAMPLE is supported."); + } + jpeg_compress_struct cinfo = {}; + jpeg_error_mgr jerr; + cinfo.err = jpeg_std_error(&jerr); + jpeg_create_compress(&cinfo); + unsigned char* buffer = nullptr; + unsigned long size = 0; + jpeg_mem_dest(&cinfo, &buffer, &size); + cinfo.image_width = image.xsize; + cinfo.image_height = image.ysize; + cinfo.input_components = info.num_color_channels; + cinfo.in_color_space = info.num_color_channels == 1 ? JCS_GRAYSCALE : JCS_RGB; + jpeg_set_defaults(&cinfo); + cinfo.optimize_coding = params.optimize_coding; + if (cinfo.input_components == 3) { + JXL_RETURN_IF_ERROR( + SetChromaSubsampling(params.chroma_subsampling, &cinfo)); + } + if (params.is_xyb) { + // Tell libjpeg not to convert XYB data to YCbCr. + jpeg_set_colorspace(&cinfo, JCS_RGB); + } + jpeg_set_quality(&cinfo, params.quality, TRUE); + std::vector scan_infos; + JXL_RETURN_IF_ERROR( + SetJpegProgression(params.progressive_id, &scan_infos, &cinfo)); + jpeg_start_compress(&cinfo, TRUE); + if (!icc.empty()) { + WriteICCProfile(&cinfo, icc); + } + if (!exif.empty()) { + ResetExifOrientation(exif); + WriteExif(&cinfo, exif); + } + if (cinfo.input_components > 3 || cinfo.input_components < 0) + return JXL_FAILURE("invalid numbers of components"); + + std::vector row_bytes(image.stride); + const uint8_t* pixels = reinterpret_cast(image.pixels()); + if (cinfo.num_components == (int)image.format.num_channels && + image.format.data_type == JXL_TYPE_UINT8) { + for (size_t y = 0; y < info.ysize; ++y) { + memcpy(&row_bytes[0], pixels + y * image.stride, image.stride); + JSAMPROW row[] = {row_bytes.data()}; + jpeg_write_scanlines(&cinfo, row, 1); + } + } else if (image.format.data_type == JXL_TYPE_UINT8) { + for (size_t y = 0; y < info.ysize; ++y) { + const uint8_t* image_row = pixels + y * image.stride; + for (size_t x = 0; x < info.xsize; ++x) { + const uint8_t* image_pixel = image_row + x * image.pixel_stride(); + memcpy(&row_bytes[x * cinfo.num_components], image_pixel, + cinfo.num_components); + } + JSAMPROW row[] = {row_bytes.data()}; + jpeg_write_scanlines(&cinfo, row, 1); + } + } else { + for (size_t y = 0; y < info.ysize; ++y) { + const uint8_t* image_row = pixels + y * image.stride; + for (size_t x = 0; x < info.xsize; ++x) { + const uint8_t* image_pixel = image_row + x * image.pixel_stride(); + for (int c = 0; c < cinfo.num_components; ++c) { + uint32_t val16 = (image_pixel[2 * c] << 8) + image_pixel[2 * c + 1]; + row_bytes[x * cinfo.num_components + c] = (val16 + 128) / 257; + } + } + JSAMPROW row[] = {row_bytes.data()}; + jpeg_write_scanlines(&cinfo, row, 1); + } + } + jpeg_finish_compress(&cinfo); + jpeg_destroy_compress(&cinfo); + bytes->resize(size); + // Compressed image data is initialized by libjpeg, which we are not + // instrumenting with msan. + msan::UnpoisonMemory(buffer, size); + std::copy_n(buffer, size, bytes->data()); + std::free(buffer); + return true; +} + +#if JPEGXL_ENABLE_SJPEG +struct MySearchHook : public sjpeg::SearchHook { + uint8_t base_tables[2][64]; + float q_start; + float q_precision; + float first_iter_slope; + void ReadBaseTables(const std::string& fn) { + const uint8_t kJPEGAnnexKMatrices[2][64] = { + {16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55, + 14, 13, 16, 24, 40, 57, 69, 56, 14, 17, 22, 29, 51, 87, 80, 62, + 18, 22, 37, 56, 68, 109, 103, 77, 24, 35, 55, 64, 81, 104, 113, 92, + 49, 64, 78, 87, 103, 121, 120, 101, 72, 92, 95, 98, 112, 100, 103, 99}, + {17, 18, 24, 47, 99, 99, 99, 99, 18, 21, 26, 66, 99, 99, 99, 99, + 24, 26, 56, 99, 99, 99, 99, 99, 47, 66, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, + 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}}; + memcpy(base_tables[0], kJPEGAnnexKMatrices[0], sizeof(base_tables[0])); + memcpy(base_tables[1], kJPEGAnnexKMatrices[1], sizeof(base_tables[1])); + if (!fn.empty()) { + std::ifstream f(fn); + std::string line; + int idx = 0; + while (idx < 128 && std::getline(f, line)) { + if (line.empty() || line[0] == '#') continue; + std::istringstream line_stream(line); + std::string token; + while (idx < 128 && std::getline(line_stream, token, ',')) { + uint8_t val = std::stoi(token); + base_tables[idx / 64][idx % 64] = val; + idx++; + } + } + } + } + bool Setup(const sjpeg::EncoderParam& param) override { + sjpeg::SearchHook::Setup(param); + q = q_start; + return true; + } + void NextMatrix(int idx, uint8_t dst[64]) override { + float factor = (q <= 0) ? 5000.0f + : (q < 50.0f) ? 5000.0f / q + : (q < 100.0f) ? 2 * (100.0f - q) + : 0.0f; + sjpeg::SetQuantMatrix(base_tables[idx], factor, dst); + } + bool Update(float result) override { + value = result; + if (fabs(value - target) < tolerance * target) { + return true; + } + if (value > target) { + qmax = q; + } else { + qmin = q; + } + if (qmin == qmax) { + return true; + } + const float last_q = q; + if (pass == 0) { + q += first_iter_slope * + (for_size ? 0.1 * std::log(target / value) : (target - value)); + q = std::max(qmin, std::min(qmax, q)); + } else { + q = (qmin + qmax) / 2.; + } + return (pass > 0 && fabs(q - last_q) < q_precision); + } + ~MySearchHook() override {} +}; +#endif + +Status EncodeWithSJpeg(const PackedImage& image, const JxlBasicInfo& info, + const std::vector& icc, + std::vector exif, const JpegParams& params, + std::vector* bytes) { +#if !JPEGXL_ENABLE_SJPEG + return JXL_FAILURE("JPEG XL was built without sjpeg support"); +#else + if (image.format.data_type != JXL_TYPE_UINT8) { + return JXL_FAILURE("Unsupported pixel data type"); + } + if (info.alpha_bits > 0) { + return JXL_FAILURE("alpha is not supported"); + } + sjpeg::EncoderParam param(params.quality); + if (!icc.empty()) { + param.iccp.assign(icc.begin(), icc.end()); + } + if (!exif.empty()) { + ResetExifOrientation(exif); + param.exif.assign(exif.begin(), exif.end()); + } + if (params.chroma_subsampling == "444") { + param.yuv_mode = SJPEG_YUV_444; + } else if (params.chroma_subsampling == "420") { + param.yuv_mode = SJPEG_YUV_420; + } else if (params.chroma_subsampling == "420sharp") { + param.yuv_mode = SJPEG_YUV_SHARP; + } else { + return JXL_FAILURE("sjpeg does not support this chroma subsampling mode"); + } + param.adaptive_quantization = params.enable_adaptive_quant; + std::unique_ptr hook; + if (params.libjpeg_quality > 0) { + JpegParams libjpeg_params; + libjpeg_params.quality = params.libjpeg_quality; + libjpeg_params.chroma_subsampling = params.libjpeg_chroma_subsampling; + std::vector libjpeg_bytes; + JXL_RETURN_IF_ERROR(EncodeWithLibJpeg(image, info, icc, exif, + libjpeg_params, &libjpeg_bytes)); + param.target_mode = sjpeg::EncoderParam::TARGET_SIZE; + param.target_value = libjpeg_bytes.size(); + } + if (params.psnr_target > 0) { + param.target_mode = sjpeg::EncoderParam::TARGET_PSNR; + param.target_value = params.psnr_target; + } + if (param.target_mode != sjpeg::EncoderParam::TARGET_NONE) { + param.passes = params.search_max_iters; + param.tolerance = params.search_tolerance; + param.qmin = params.search_q_min; + param.qmax = params.search_q_max; + hook.reset(new MySearchHook()); + hook->ReadBaseTables(params.custom_base_quant_fn); + hook->q_start = params.search_q_start; + hook->q_precision = params.search_q_precision; + hook->first_iter_slope = params.search_first_iter_slope; + param.search_hook = hook.get(); + } + size_t stride = info.xsize * 3; + const uint8_t* pixels = reinterpret_cast(image.pixels()); + std::string output; + JXL_RETURN_IF_ERROR( + sjpeg::Encode(pixels, image.xsize, image.ysize, stride, param, &output)); + bytes->assign( + reinterpret_cast(output.data()), + reinterpret_cast(output.data() + output.size())); + return true; +#endif +} + +Status EncodeImageJPG(const PackedImage& image, const JxlBasicInfo& info, + const std::vector& icc, + std::vector exif, JpegEncoder encoder, + const JpegParams& params, ThreadPool* pool, + std::vector* bytes) { + if (params.quality > 100) { + return JXL_FAILURE("please specify a 0-100 JPEG quality"); + } + + switch (encoder) { + case JpegEncoder::kLibJpeg: + JXL_RETURN_IF_ERROR( + EncodeWithLibJpeg(image, info, icc, std::move(exif), params, bytes)); + break; + case JpegEncoder::kSJpeg: + JXL_RETURN_IF_ERROR( + EncodeWithSJpeg(image, info, icc, std::move(exif), params, bytes)); + break; + default: + return JXL_FAILURE("tried to use an unknown JPEG encoder"); + } + + return true; +} + +class JPEGEncoder : public Encoder { + std::vector AcceptedFormats() const override { + std::vector formats; + for (const uint32_t num_channels : {1, 2, 3, 4}) { + for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) { + formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels, + /*data_type=*/JXL_TYPE_UINT8, + /*endianness=*/endianness, + /*align=*/0}); + } + formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels, + /*data_type=*/JXL_TYPE_UINT16, + /*endianness=*/JXL_BIG_ENDIAN, + /*align=*/0}); + } + return formats; + } + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool = nullptr) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + JpegEncoder jpeg_encoder = JpegEncoder::kLibJpeg; + JpegParams params; + for (const auto& it : options()) { + if (it.first == "q") { + std::istringstream is(it.second); + JXL_RETURN_IF_ERROR(static_cast(is >> params.quality)); + } else if (it.first == "libjpeg_quality") { + std::istringstream is(it.second); + JXL_RETURN_IF_ERROR(static_cast(is >> params.libjpeg_quality)); + } else if (it.first == "chroma_subsampling") { + params.chroma_subsampling = it.second; + } else if (it.first == "libjpeg_chroma_subsampling") { + params.libjpeg_chroma_subsampling = it.second; + } else if (it.first == "jpeg_encoder") { + if (it.second == "libjpeg") { + jpeg_encoder = JpegEncoder::kLibJpeg; + } else if (it.second == "sjpeg") { + jpeg_encoder = JpegEncoder::kSJpeg; + } else { + return JXL_FAILURE("unknown jpeg encoder \"%s\"", it.second.c_str()); + } + } else if (it.first == "progressive") { + std::istringstream is(it.second); + JXL_RETURN_IF_ERROR(static_cast(is >> params.progressive_id)); + } else if (it.first == "optimize" && it.second == "OFF") { + params.optimize_coding = false; + } else if (it.first == "adaptive_q" && it.second == "OFF") { + params.enable_adaptive_quant = false; + } else if (it.first == "psnr") { + params.psnr_target = std::stof(it.second); + } else if (it.first == "base_quant_fn") { + params.custom_base_quant_fn = it.second; + } else if (it.first == "search_q_start") { + params.search_q_start = std::stof(it.second); + } else if (it.first == "search_q_min") { + params.search_q_min = std::stof(it.second); + } else if (it.first == "search_q_max") { + params.search_q_max = std::stof(it.second); + } else if (it.first == "search_max_iters") { + params.search_max_iters = std::stoi(it.second); + } else if (it.first == "search_tolerance") { + params.search_tolerance = std::stof(it.second); + } else if (it.first == "search_q_precision") { + params.search_q_precision = std::stof(it.second); + } else if (it.first == "search_first_iter_slope") { + params.search_first_iter_slope = std::stof(it.second); + } + } + params.is_xyb = (ppf.color_encoding.color_space == JXL_COLOR_SPACE_XYB); + std::vector icc; + if (!IsSRGBEncoding(ppf.color_encoding)) { + icc = ppf.icc; + } + encoded_image->bitstreams.clear(); + encoded_image->bitstreams.reserve(ppf.frames.size()); + for (const auto& frame : ppf.frames) { + JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info)); + encoded_image->bitstreams.emplace_back(); + JXL_RETURN_IF_ERROR(EncodeImageJPG( + frame.color, ppf.info, icc, ppf.metadata.exif, jpeg_encoder, params, + pool, &encoded_image->bitstreams.back())); + } + return true; + } +}; + +} // namespace +#endif + +std::unique_ptr GetJPEGEncoder() { +#if JPEGXL_ENABLE_JPEG + return jxl::make_unique(); +#else + return nullptr; +#endif +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jpg.h b/third-party/libjxl/libjxl/lib/extras/enc/jpg.h new file mode 100644 index 0000000000..20b37cd168 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/jpg.h @@ -0,0 +1,23 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_JPG_H_ +#define LIB_EXTRAS_ENC_JPG_H_ + +// Encodes JPG pixels and metadata in memory. + +#include + +#include "lib/extras/enc/encode.h" + +namespace jxl { +namespace extras { + +std::unique_ptr GetJPEGEncoder(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_JPG_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jxl.cc b/third-party/libjxl/libjxl/lib/extras/enc/jxl.cc new file mode 100644 index 0000000000..036cd13e5d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/jxl.cc @@ -0,0 +1,318 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/jxl.h" + +#include +#include + +#include "lib/jxl/exif.h" + +namespace jxl { +namespace extras { + +JxlEncoderStatus SetOption(const JXLOption& opt, + JxlEncoderFrameSettings* settings) { + return opt.is_float + ? JxlEncoderFrameSettingsSetFloatOption(settings, opt.id, opt.fval) + : JxlEncoderFrameSettingsSetOption(settings, opt.id, opt.ival); +} + +bool SetFrameOptions(const std::vector& options, size_t frame_index, + size_t* option_idx, JxlEncoderFrameSettings* settings) { + while (*option_idx < options.size()) { + const auto& opt = options[*option_idx]; + if (opt.frame_index > frame_index) { + break; + } + if (JXL_ENC_SUCCESS != SetOption(opt, settings)) { + fprintf(stderr, "Setting option id %d failed.\n", opt.id); + return false; + } + (*option_idx)++; + } + return true; +} + +bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf, + const std::vector* jpeg_bytes, + std::vector* compressed) { + auto encoder = JxlEncoderMake(/*memory_manager=*/nullptr); + JxlEncoder* enc = encoder.get(); + + if (params.allow_expert_options) { + JxlEncoderAllowExpertOptions(enc); + } + + if (params.runner_opaque != nullptr && + JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc, params.runner, + params.runner_opaque)) { + fprintf(stderr, "JxlEncoderSetParallelRunner failed\n"); + return false; + } + + auto settings = JxlEncoderFrameSettingsCreate(enc, nullptr); + size_t option_idx = 0; + if (!SetFrameOptions(params.options, 0, &option_idx, settings)) { + return false; + } + if (JXL_ENC_SUCCESS != + JxlEncoderSetFrameDistance(settings, params.distance)) { + fprintf(stderr, "Setting frame distance failed.\n"); + return false; + } + if (params.debug_image) { + JxlEncoderSetDebugImageCallback(settings, params.debug_image, + params.debug_image_opaque); + } + if (params.stats) { + JxlEncoderCollectStats(settings, params.stats); + } + + bool use_boxes = !ppf.metadata.exif.empty() || !ppf.metadata.xmp.empty() || + !ppf.metadata.jumbf.empty() || !ppf.metadata.iptc.empty(); + bool use_container = params.use_container || use_boxes || + (jpeg_bytes && params.jpeg_store_metadata); + + if (JXL_ENC_SUCCESS != + JxlEncoderUseContainer(enc, static_cast(use_container))) { + fprintf(stderr, "JxlEncoderUseContainer failed.\n"); + return false; + } + + if (jpeg_bytes) { + if (params.jpeg_store_metadata && + JXL_ENC_SUCCESS != JxlEncoderStoreJPEGMetadata(enc, JXL_TRUE)) { + fprintf(stderr, "Storing JPEG metadata failed.\n"); + return false; + } + if (!params.jpeg_store_metadata && params.jpeg_strip_exif) { + JxlEncoderFrameSettingsSetOption(settings, + JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF, 0); + } + if (!params.jpeg_store_metadata && params.jpeg_strip_xmp) { + JxlEncoderFrameSettingsSetOption(settings, + JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP, 0); + } + if (params.jpeg_strip_jumbf) { + JxlEncoderFrameSettingsSetOption( + settings, JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF, 0); + } + if (JXL_ENC_SUCCESS != JxlEncoderAddJPEGFrame(settings, jpeg_bytes->data(), + jpeg_bytes->size())) { + JxlEncoderError error = JxlEncoderGetError(enc); + if (error == JXL_ENC_ERR_BAD_INPUT) { + fprintf(stderr, + "Error while decoding the JPEG image. It may be corrupt (e.g. " + "truncated) or of an unsupported type (e.g. CMYK).\n"); + } else if (error == JXL_ENC_ERR_JBRD) { + fprintf(stderr, + "JPEG bitstream reconstruction data could not be created. " + "Possibly there is too much tail data.\n" + "Try using --jpeg_store_metadata 0, to losslessly " + "recompress the JPEG image data without bitstream " + "reconstruction data.\n"); + } else { + fprintf(stderr, "JxlEncoderAddJPEGFrame() failed.\n"); + } + return false; + } + } else { + size_t num_alpha_channels = 0; // Adjusted below. + JxlBasicInfo basic_info = ppf.info; + basic_info.xsize *= params.already_downsampled; + basic_info.ysize *= params.already_downsampled; + if (basic_info.alpha_bits > 0) num_alpha_channels = 1; + if (params.intensity_target > 0) { + basic_info.intensity_target = params.intensity_target; + } + basic_info.num_extra_channels = + std::max(num_alpha_channels, ppf.info.num_extra_channels); + basic_info.num_color_channels = ppf.info.num_color_channels; + const bool lossless = params.distance == 0; + basic_info.uses_original_profile = lossless; + if (params.override_bitdepth != 0) { + basic_info.bits_per_sample = params.override_bitdepth; + basic_info.exponent_bits_per_sample = + params.override_bitdepth == 32 ? 8 : 0; + } + if (JXL_ENC_SUCCESS != + JxlEncoderSetCodestreamLevel(enc, params.codestream_level)) { + fprintf(stderr, "Setting --codestream_level failed.\n"); + return false; + } + if (JXL_ENC_SUCCESS != JxlEncoderSetBasicInfo(enc, &basic_info)) { + fprintf(stderr, "JxlEncoderSetBasicInfo() failed.\n"); + return false; + } + if (JXL_ENC_SUCCESS != + JxlEncoderSetUpsamplingMode(enc, params.already_downsampled, + params.upsampling_mode)) { + fprintf(stderr, "JxlEncoderSetUpsamplingMode() failed.\n"); + return false; + } + if (JXL_ENC_SUCCESS != + JxlEncoderSetFrameBitDepth(settings, ¶ms.input_bitdepth)) { + fprintf(stderr, "JxlEncoderSetFrameBitDepth() failed.\n"); + return false; + } + if (num_alpha_channels != 0 && + JXL_ENC_SUCCESS != JxlEncoderSetExtraChannelDistance( + settings, 0, params.alpha_distance)) { + fprintf(stderr, "Setting alpha distance failed.\n"); + return false; + } + if (lossless && + JXL_ENC_SUCCESS != JxlEncoderSetFrameLossless(settings, JXL_TRUE)) { + fprintf(stderr, "JxlEncoderSetFrameLossless() failed.\n"); + return false; + } + if (!ppf.icc.empty()) { + if (JXL_ENC_SUCCESS != + JxlEncoderSetICCProfile(enc, ppf.icc.data(), ppf.icc.size())) { + fprintf(stderr, "JxlEncoderSetICCProfile() failed.\n"); + return false; + } + } else { + if (JXL_ENC_SUCCESS != + JxlEncoderSetColorEncoding(enc, &ppf.color_encoding)) { + fprintf(stderr, "JxlEncoderSetColorEncoding() failed.\n"); + return false; + } + } + + if (use_boxes) { + if (JXL_ENC_SUCCESS != JxlEncoderUseBoxes(enc)) { + fprintf(stderr, "JxlEncoderUseBoxes() failed.\n"); + return false; + } + // Prepend 4 zero bytes to exif for tiff header offset + std::vector exif_with_offset; + bool bigendian; + if (IsExif(ppf.metadata.exif, &bigendian)) { + exif_with_offset.resize(ppf.metadata.exif.size() + 4); + memcpy(exif_with_offset.data() + 4, ppf.metadata.exif.data(), + ppf.metadata.exif.size()); + } + const struct BoxInfo { + const char* type; + const std::vector& bytes; + } boxes[] = { + {"Exif", exif_with_offset}, + {"xml ", ppf.metadata.xmp}, + {"jumb", ppf.metadata.jumbf}, + {"xml ", ppf.metadata.iptc}, + }; + for (size_t i = 0; i < sizeof boxes / sizeof *boxes; ++i) { + const BoxInfo& box = boxes[i]; + if (!box.bytes.empty() && + JXL_ENC_SUCCESS != JxlEncoderAddBox(enc, box.type, box.bytes.data(), + box.bytes.size(), + params.compress_boxes)) { + fprintf(stderr, "JxlEncoderAddBox() failed (%s).\n", box.type); + return false; + } + } + JxlEncoderCloseBoxes(enc); + } + + for (size_t num_frame = 0; num_frame < ppf.frames.size(); ++num_frame) { + const jxl::extras::PackedFrame& pframe = ppf.frames[num_frame]; + const jxl::extras::PackedImage& pimage = pframe.color; + JxlPixelFormat ppixelformat = pimage.format; + if (JXL_ENC_SUCCESS != + JxlEncoderSetFrameHeader(settings, &pframe.frame_info)) { + fprintf(stderr, "JxlEncoderSetFrameHeader() failed.\n"); + return false; + } + if (!SetFrameOptions(params.options, num_frame, &option_idx, settings)) { + return false; + } + if (num_alpha_channels > 0) { + JxlExtraChannelInfo extra_channel_info; + JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &extra_channel_info); + extra_channel_info.bits_per_sample = ppf.info.alpha_bits; + extra_channel_info.exponent_bits_per_sample = + ppf.info.alpha_exponent_bits; + if (params.premultiply != -1) { + if (params.premultiply != 0 && params.premultiply != 1) { + fprintf(stderr, "premultiply must be one of: -1, 0, 1.\n"); + return false; + } + extra_channel_info.alpha_premultiplied = params.premultiply; + } + if (JXL_ENC_SUCCESS != + JxlEncoderSetExtraChannelInfo(enc, 0, &extra_channel_info)) { + fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n"); + return false; + } + // We take the extra channel blend info frame_info, but don't do + // clamping. + JxlBlendInfo extra_channel_blend_info = + pframe.frame_info.layer_info.blend_info; + extra_channel_blend_info.clamp = JXL_FALSE; + JxlEncoderSetExtraChannelBlendInfo(settings, 0, + &extra_channel_blend_info); + } + size_t num_interleaved_alpha = + (ppixelformat.num_channels - ppf.info.num_color_channels); + // Add extra channel info for the rest of the extra channels. + for (size_t i = 0; i < ppf.info.num_extra_channels; ++i) { + if (i < ppf.extra_channels_info.size()) { + const auto& ec_info = ppf.extra_channels_info[i].ec_info; + if (JXL_ENC_SUCCESS != + JxlEncoderSetExtraChannelInfo(enc, num_interleaved_alpha + i, + &ec_info)) { + fprintf(stderr, "JxlEncoderSetExtraChannelInfo() failed.\n"); + return false; + } + } + } + if (JXL_ENC_SUCCESS != JxlEncoderAddImageFrame(settings, &ppixelformat, + pimage.pixels(), + pimage.pixels_size)) { + fprintf(stderr, "JxlEncoderAddImageFrame() failed.\n"); + return false; + } + // Only set extra channel buffer if it is provided non-interleaved. + for (size_t i = 0; i < pframe.extra_channels.size(); ++i) { + if (JXL_ENC_SUCCESS != + JxlEncoderSetExtraChannelBuffer(settings, &ppixelformat, + pframe.extra_channels[i].pixels(), + pframe.extra_channels[i].stride * + pframe.extra_channels[i].ysize, + num_interleaved_alpha + i)) { + fprintf(stderr, "JxlEncoderSetExtraChannelBuffer() failed.\n"); + return false; + } + } + } + } + JxlEncoderCloseInput(enc); + // Reading compressed output + compressed->clear(); + compressed->resize(4096); + uint8_t* next_out = compressed->data(); + size_t avail_out = compressed->size() - (next_out - compressed->data()); + JxlEncoderStatus result = JXL_ENC_NEED_MORE_OUTPUT; + while (result == JXL_ENC_NEED_MORE_OUTPUT) { + result = JxlEncoderProcessOutput(enc, &next_out, &avail_out); + if (result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed->data(); + compressed->resize(compressed->size() * 2); + next_out = compressed->data() + offset; + avail_out = compressed->size() - offset; + } + } + compressed->resize(next_out - compressed->data()); + if (result != JXL_ENC_SUCCESS) { + fprintf(stderr, "JxlEncoderProcessOutput failed.\n"); + return false; + } + return true; +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/jxl.h b/third-party/libjxl/libjxl/lib/extras/enc/jxl.h new file mode 100644 index 0000000000..8c270c4935 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/jxl.h @@ -0,0 +1,85 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_JXL_H_ +#define LIB_EXTRAS_ENC_JXL_H_ + +#include +#include +#include +#include +#include + +#include + +#include "lib/extras/packed_image.h" + +namespace jxl { +namespace extras { + +struct JXLOption { + JXLOption(JxlEncoderFrameSettingId id, int64_t val, size_t frame_index) + : id(id), is_float(false), ival(val), frame_index(frame_index) {} + JXLOption(JxlEncoderFrameSettingId id, float val, size_t frame_index) + : id(id), is_float(true), fval(val), frame_index(frame_index) {} + + JxlEncoderFrameSettingId id; + bool is_float; + union { + int64_t ival; + float fval; + }; + size_t frame_index; +}; + +struct JXLCompressParams { + std::vector options; + // Target butteraugli distance, 0.0 means lossless. + float distance = 1.0f; + float alpha_distance = 1.0f; + // If set to true, forces container mode. + bool use_container = false; + // Whether to enable/disable byte-exact jpeg reconstruction for jpeg inputs. + bool jpeg_store_metadata = true; + bool jpeg_strip_exif = false; + bool jpeg_strip_xmp = false; + bool jpeg_strip_jumbf = false; + // Whether to create brob boxes. + bool compress_boxes = true; + // Upper bound on the intensity level present in the image in nits (zero means + // that the library chooses a default). + float intensity_target = 0; + int already_downsampled = 1; + int upsampling_mode = -1; + // Overrides for bitdepth, codestream level and alpha premultiply. + size_t override_bitdepth = 0; + int32_t codestream_level = -1; + int32_t premultiply = -1; + // Override input buffer interpretation. + JxlBitDepth input_bitdepth = {JXL_BIT_DEPTH_FROM_PIXEL_FORMAT, 0, 0}; + // If runner_opaque is set, the decoder uses this parallel runner. + JxlParallelRunner runner = JxlThreadParallelRunner; + void* runner_opaque = nullptr; + JxlDebugImageCallback debug_image = nullptr; + void* debug_image_opaque = nullptr; + JxlEncoderStats* stats = nullptr; + bool allow_expert_options = false; + + void AddOption(JxlEncoderFrameSettingId id, int64_t val) { + options.emplace_back(JXLOption(id, val, 0)); + } + void AddFloatOption(JxlEncoderFrameSettingId id, float val) { + options.emplace_back(JXLOption(id, val, 0)); + } +}; + +bool EncodeImageJXL(const JXLCompressParams& params, const PackedPixelFile& ppf, + const std::vector* jpeg_bytes, + std::vector* compressed); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_JXL_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/npy.cc b/third-party/libjxl/libjxl/lib/extras/enc/npy.cc new file mode 100644 index 0000000000..e7a659184b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/npy.cc @@ -0,0 +1,322 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/npy.h" + +#include +#include + +#include +#include +#include + +#include "lib/extras/packed_image.h" + +namespace jxl { +namespace extras { +namespace { + +// JSON value writing + +class JSONField { + public: + virtual ~JSONField() = default; + virtual void Write(std::ostream& o, uint32_t indent) const = 0; + + protected: + JSONField() = default; +}; + +class JSONValue : public JSONField { + public: + template + explicit JSONValue(const T& value) : value_(std::to_string(value)) {} + + explicit JSONValue(const std::string& value) : value_("\"" + value + "\"") {} + + explicit JSONValue(bool value) : value_(value ? "true" : "false") {} + + void Write(std::ostream& o, uint32_t indent) const override { o << value_; } + + private: + std::string value_; +}; + +class JSONDict : public JSONField { + public: + JSONDict() = default; + + template + T* AddEmpty(const std::string& key) { + static_assert(std::is_convertible::value, + "T must be a JSONField"); + T* ret = new T(); + values_.emplace_back( + key, std::unique_ptr(static_cast(ret))); + return ret; + } + + template + void Add(const std::string& key, const T& value) { + values_.emplace_back(key, std::unique_ptr(new JSONValue(value))); + } + + void Write(std::ostream& o, uint32_t indent) const override { + std::string indent_str(indent, ' '); + o << "{"; + bool is_first = true; + for (const auto& key_value : values_) { + if (!is_first) { + o << ","; + } + is_first = false; + o << std::endl << indent_str << " \"" << key_value.first << "\": "; + key_value.second->Write(o, indent + 2); + } + if (!values_.empty()) { + o << std::endl << indent_str; + } + o << "}"; + } + + private: + // Dictionary with order. + std::vector>> values_; +}; + +class JSONArray : public JSONField { + public: + JSONArray() = default; + + template + T* AddEmpty() { + static_assert(std::is_convertible::value, + "T must be a JSONField"); + T* ret = new T(); + values_.emplace_back(ret); + return ret; + } + + template + void Add(const T& value) { + values_.emplace_back(new JSONValue(value)); + } + + void Write(std::ostream& o, uint32_t indent) const override { + std::string indent_str(indent, ' '); + o << "["; + bool is_first = true; + for (const auto& value : values_) { + if (!is_first) { + o << ","; + } + is_first = false; + o << std::endl << indent_str << " "; + value->Write(o, indent + 2); + } + if (!values_.empty()) { + o << std::endl << indent_str; + } + o << "]"; + } + + private: + std::vector> values_; +}; + +void GenerateMetadata(const PackedPixelFile& ppf, std::vector* out) { + JSONDict meta; + // Same order as in 18181-3 CD. + + // Frames. + auto* meta_frames = meta.AddEmpty("frames"); + for (size_t i = 0; i < ppf.frames.size(); i++) { + auto* frame_i = meta_frames->AddEmpty(); + if (ppf.info.have_animation) { + frame_i->Add("duration", + JSONValue(ppf.frames[i].frame_info.duration * 1.0f * + ppf.info.animation.tps_denominator / + ppf.info.animation.tps_numerator)); + } + + frame_i->Add("name", JSONValue(ppf.frames[i].name)); + + if (ppf.info.animation.have_timecodes) { + frame_i->Add("timecode", JSONValue(ppf.frames[i].frame_info.timecode)); + } + } + +#define METADATA(FIELD) meta.Add(#FIELD, ppf.info.FIELD) + + METADATA(intensity_target); + METADATA(min_nits); + METADATA(relative_to_max_display); + METADATA(linear_below); + + if (ppf.info.have_preview) { + meta.AddEmpty("preview"); + // TODO(veluca): can we have duration/name/timecode here? + } + + { + auto ectype = meta.AddEmpty("extra_channel_type"); + auto bps = meta.AddEmpty("bits_per_sample"); + auto ebps = meta.AddEmpty("exp_bits_per_sample"); + bps->Add(ppf.info.bits_per_sample); + ebps->Add(ppf.info.exponent_bits_per_sample); + for (size_t i = 0; i < ppf.extra_channels_info.size(); i++) { + switch (ppf.extra_channels_info[i].ec_info.type) { + case JXL_CHANNEL_ALPHA: { + ectype->Add(std::string("Alpha")); + break; + } + case JXL_CHANNEL_DEPTH: { + ectype->Add(std::string("Depth")); + break; + } + case JXL_CHANNEL_SPOT_COLOR: { + ectype->Add(std::string("SpotColor")); + break; + } + case JXL_CHANNEL_SELECTION_MASK: { + ectype->Add(std::string("SelectionMask")); + break; + } + case JXL_CHANNEL_BLACK: { + ectype->Add(std::string("Black")); + break; + } + case JXL_CHANNEL_CFA: { + ectype->Add(std::string("CFA")); + break; + } + case JXL_CHANNEL_THERMAL: { + ectype->Add(std::string("Thermal")); + break; + } + default: { + ectype->Add(std::string("UNKNOWN")); + break; + } + } + bps->Add(ppf.extra_channels_info[i].ec_info.bits_per_sample); + ebps->Add(ppf.extra_channels_info[i].ec_info.exponent_bits_per_sample); + } + } + + std::ostringstream os; + meta.Write(os, 0); + out->resize(os.str().size()); + memcpy(out->data(), os.str().data(), os.str().size()); +} + +void Append(std::vector* out, const void* data, size_t size) { + size_t pos = out->size(); + out->resize(pos + size); + memcpy(out->data() + pos, data, size); +} + +void WriteNPYHeader(size_t xsize, size_t ysize, uint32_t num_channels, + size_t num_frames, std::vector* out) { + const uint8_t header[] = "\x93NUMPY\x01\x00"; + Append(out, header, 8); + std::stringstream ss; + ss << "{'descr': '(ss.str().size() % 256), + static_cast(ss.str().size() / 256)}; + Append(out, header_len, 2); + Append(out, ss.str().data(), ss.str().size()); +} + +bool WriteFrameToNPYArray(size_t xsize, size_t ysize, const PackedFrame& frame, + std::vector* out) { + const auto& color = frame.color; + if (color.xsize != xsize || color.ysize != ysize) { + return false; + } + for (const auto& ec : frame.extra_channels) { + if (ec.xsize != xsize || ec.ysize != ysize) { + return false; + } + } + // interleave the samples from color and extra channels + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + { + size_t sample_size = color.pixel_stride(); + size_t offset = y * color.stride + x * sample_size; + uint8_t* pixels = reinterpret_cast(color.pixels()); + JXL_ASSERT(offset + sample_size <= color.pixels_size); + Append(out, pixels + offset, sample_size); + } + for (const auto& ec : frame.extra_channels) { + size_t sample_size = ec.pixel_stride(); + size_t offset = y * ec.stride + x * sample_size; + uint8_t* pixels = reinterpret_cast(ec.pixels()); + JXL_ASSERT(offset + sample_size <= ec.pixels_size); + Append(out, pixels + offset, sample_size); + } + } + } + return true; +} + +// Writes a PackedPixelFile as a numpy 4D ndarray in binary format. +bool WriteNPYArray(const PackedPixelFile& ppf, std::vector* out) { + size_t xsize = ppf.info.xsize; + size_t ysize = ppf.info.ysize; + WriteNPYHeader(xsize, ysize, + ppf.info.num_color_channels + ppf.extra_channels_info.size(), + ppf.frames.size(), out); + for (const auto& frame : ppf.frames) { + if (!WriteFrameToNPYArray(xsize, ysize, frame, out)) { + return false; + } + } + return true; +} + +class NumPyEncoder : public Encoder { + public: + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool = nullptr) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + GenerateMetadata(ppf, &encoded_image->metadata); + encoded_image->bitstreams.emplace_back(); + if (!WriteNPYArray(ppf, &encoded_image->bitstreams.back())) { + return false; + } + if (ppf.preview_frame) { + size_t xsize = ppf.info.preview.xsize; + size_t ysize = ppf.info.preview.ysize; + WriteNPYHeader(xsize, ysize, ppf.info.num_color_channels, 1, + &encoded_image->preview_bitstream); + if (!WriteFrameToNPYArray(xsize, ysize, *ppf.preview_frame, + &encoded_image->preview_bitstream)) { + return false; + } + } + return true; + } + std::vector AcceptedFormats() const override { + std::vector formats; + for (const uint32_t num_channels : {1, 3}) { + formats.push_back(JxlPixelFormat{num_channels, JXL_TYPE_FLOAT, + JXL_LITTLE_ENDIAN, /*align=*/0}); + } + return formats; + } +}; + +} // namespace + +std::unique_ptr GetNumPyEncoder() { + return jxl::make_unique(); +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/npy.h b/third-party/libjxl/libjxl/lib/extras/enc/npy.h new file mode 100644 index 0000000000..3ee6208ec2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/npy.h @@ -0,0 +1,23 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_NPY_H_ +#define LIB_EXTRAS_ENC_NPY_H_ + +// Encodes pixels to numpy array, used for conformance testing. + +#include + +#include "lib/extras/enc/encode.h" + +namespace jxl { +namespace extras { + +std::unique_ptr GetNumPyEncoder(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_NPY_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pgx.cc b/third-party/libjxl/libjxl/lib/extras/enc/pgx.cc new file mode 100644 index 0000000000..201c8b4189 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/pgx.cc @@ -0,0 +1,123 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/pgx.h" + +#include +#include +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/byte_order.h" + +namespace jxl { +namespace extras { +namespace { + +constexpr size_t kMaxHeaderSize = 200; + +Status EncodeHeader(const JxlBasicInfo& info, char* header, + int* chars_written) { + if (info.alpha_bits > 0) { + return JXL_FAILURE("PGX: can't store alpha"); + } + if (info.num_color_channels != 1) { + return JXL_FAILURE("PGX: must be grayscale"); + } + // TODO(lode): verify other bit depths: for other bit depths such as 1 or 4 + // bits, have a test case to verify it works correctly. For bits > 16, we may + // need to change the way external_image works. + if (info.bits_per_sample != 8 && info.bits_per_sample != 16) { + return JXL_FAILURE("PGX: bits other than 8 or 16 not yet supported"); + } + + // Use ML (Big Endian), LM may not be well supported by all decoders. + *chars_written = snprintf(header, kMaxHeaderSize, "PG ML + %u %u %u\n", + info.bits_per_sample, info.xsize, info.ysize); + JXL_RETURN_IF_ERROR(static_cast(*chars_written) < + kMaxHeaderSize); + return true; +} + +Status EncodeImagePGX(const PackedFrame& frame, const JxlBasicInfo& info, + std::vector* bytes) { + char header[kMaxHeaderSize]; + int header_size = 0; + JXL_RETURN_IF_ERROR(EncodeHeader(info, header, &header_size)); + + const PackedImage& color = frame.color; + const JxlPixelFormat format = color.format; + const uint8_t* in = reinterpret_cast(color.pixels()); + size_t data_bits_per_sample = PackedImage::BitsPerChannel(format.data_type); + size_t bytes_per_sample = data_bits_per_sample / kBitsPerByte; + size_t num_samples = info.xsize * info.ysize; + + if (info.bits_per_sample != data_bits_per_sample) { + return JXL_FAILURE("Bit depth does not match pixel data type"); + } + + std::vector pixels(num_samples * bytes_per_sample); + + if (format.data_type == JXL_TYPE_UINT8) { + memcpy(&pixels[0], in, num_samples * bytes_per_sample); + } else if (format.data_type == JXL_TYPE_UINT16) { + if (format.endianness != JXL_BIG_ENDIAN) { + const uint8_t* p_in = in; + uint8_t* p_out = pixels.data(); + for (size_t i = 0; i < num_samples; ++i, p_in += 2, p_out += 2) { + StoreBE16(LoadLE16(p_in), p_out); + } + } else { + memcpy(&pixels[0], in, num_samples * bytes_per_sample); + } + } else { + return JXL_FAILURE("Unsupported pixel data type"); + } + + bytes->resize(static_cast(header_size) + pixels.size()); + memcpy(bytes->data(), header, static_cast(header_size)); + memcpy(bytes->data() + header_size, pixels.data(), pixels.size()); + + return true; +} + +class PGXEncoder : public Encoder { + public: + std::vector AcceptedFormats() const override { + std::vector formats; + for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) { + for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) { + formats.push_back(JxlPixelFormat{/*num_channels=*/1, + /*data_type=*/data_type, + /*endianness=*/endianness, + /*align=*/0}); + } + } + return formats; + } + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + encoded_image->icc.assign(ppf.icc.begin(), ppf.icc.end()); + encoded_image->bitstreams.clear(); + encoded_image->bitstreams.reserve(ppf.frames.size()); + for (const auto& frame : ppf.frames) { + JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info)); + encoded_image->bitstreams.emplace_back(); + JXL_RETURN_IF_ERROR( + EncodeImagePGX(frame, ppf.info, &encoded_image->bitstreams.back())); + } + return true; + } +}; + +} // namespace + +std::unique_ptr GetPGXEncoder() { + return jxl::make_unique(); +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pgx.h b/third-party/libjxl/libjxl/lib/extras/enc/pgx.h new file mode 100644 index 0000000000..f24e391b09 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/pgx.h @@ -0,0 +1,24 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_PGX_H_ +#define LIB_EXTRAS_ENC_PGX_H_ + +// Encodes PGX pixels in memory. + +#include +#include + +#include "lib/extras/enc/encode.h" + +namespace jxl { +namespace extras { + +std::unique_ptr GetPGXEncoder(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_PGX_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pnm.cc b/third-party/libjxl/libjxl/lib/extras/enc/pnm.cc new file mode 100644 index 0000000000..91323692c0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/pnm.cc @@ -0,0 +1,302 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/enc/pnm.h" + +#include +#include + +#include +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/enc_image_bundle.h" +#include "lib/jxl/fields.h" // AllDefault +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { +namespace extras { +namespace { + +constexpr size_t kMaxHeaderSize = 200; + +class PNMEncoder : public Encoder { + public: + Status Encode(const PackedPixelFile& ppf, EncodedImage* encoded_image, + ThreadPool* pool = nullptr) const override { + JXL_RETURN_IF_ERROR(VerifyBasicInfo(ppf.info)); + if (!ppf.metadata.exif.empty() || !ppf.metadata.iptc.empty() || + !ppf.metadata.jumbf.empty() || !ppf.metadata.xmp.empty()) { + JXL_WARNING("PNM encoder ignoring metadata - use a different codec"); + } + encoded_image->icc = ppf.icc; + encoded_image->bitstreams.clear(); + encoded_image->bitstreams.reserve(ppf.frames.size()); + for (const auto& frame : ppf.frames) { + JXL_RETURN_IF_ERROR(VerifyPackedImage(frame.color, ppf.info)); + encoded_image->bitstreams.emplace_back(); + JXL_RETURN_IF_ERROR( + EncodeFrame(ppf, frame, &encoded_image->bitstreams.back())); + } + for (size_t i = 0; i < ppf.extra_channels_info.size(); ++i) { + const auto& ec_info = ppf.extra_channels_info[i].ec_info; + encoded_image->extra_channel_bitstreams.emplace_back(); + auto& ec_bitstreams = encoded_image->extra_channel_bitstreams.back(); + for (const auto& frame : ppf.frames) { + ec_bitstreams.emplace_back(); + JXL_RETURN_IF_ERROR(EncodeExtraChannel(frame.extra_channels[i], + ec_info.bits_per_sample, + &ec_bitstreams.back())); + } + } + return true; + } + + protected: + virtual Status EncodeFrame(const PackedPixelFile& ppf, + const PackedFrame& frame, + std::vector* bytes) const = 0; + virtual Status EncodeExtraChannel(const PackedImage& image, + size_t bits_per_sample, + std::vector* bytes) const = 0; +}; + +class PPMEncoder : public PNMEncoder { + public: + std::vector AcceptedFormats() const override { + return {JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0}, + JxlPixelFormat{3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}}; + } + Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame, + std::vector* bytes) const override { + return EncodeImage(frame.color, ppf.info.bits_per_sample, bytes); + } + Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample, + std::vector* bytes) const override { + return EncodeImage(image, bits_per_sample, bytes); + } + + private: + Status EncodeImage(const PackedImage& image, size_t bits_per_sample, + std::vector* bytes) const { + uint32_t maxval = (1u << bits_per_sample) - 1; + char type = image.format.num_channels == 1 ? '5' : '6'; + char header[kMaxHeaderSize]; + size_t header_size = + snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%u\n", + type, image.xsize, image.ysize, maxval); + JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize); + bytes->resize(header_size + image.pixels_size); + memcpy(bytes->data(), header, header_size); + memcpy(bytes->data() + header_size, + reinterpret_cast(image.pixels()), image.pixels_size); + return true; + } +}; + +class PGMEncoder : public PPMEncoder { + public: + std::vector AcceptedFormats() const override { + return {JxlPixelFormat{1, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0}, + JxlPixelFormat{1, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}}; + } +}; + +class PFMEncoder : public PNMEncoder { + public: + std::vector AcceptedFormats() const override { + std::vector formats; + for (const uint32_t num_channels : {1, 3}) { + for (JxlEndianness endianness : {JXL_BIG_ENDIAN, JXL_LITTLE_ENDIAN}) { + formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels, + /*data_type=*/JXL_TYPE_FLOAT, + /*endianness=*/endianness, + /*align=*/0}); + } + } + return formats; + } + Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame, + std::vector* bytes) const override { + return EncodeImage(frame.color, bytes); + } + Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample, + std::vector* bytes) const override { + return EncodeImage(image, bytes); + } + + private: + Status EncodeImage(const PackedImage& image, + std::vector* bytes) const { + char type = image.format.num_channels == 1 ? 'f' : 'F'; + double scale = image.format.endianness == JXL_LITTLE_ENDIAN ? -1.0 : 1.0; + char header[kMaxHeaderSize]; + size_t header_size = + snprintf(header, kMaxHeaderSize, "P%c\n%" PRIuS " %" PRIuS "\n%.1f\n", + type, image.xsize, image.ysize, scale); + JXL_RETURN_IF_ERROR(header_size < kMaxHeaderSize); + bytes->resize(header_size + image.pixels_size); + memcpy(bytes->data(), header, header_size); + const uint8_t* in = reinterpret_cast(image.pixels()); + uint8_t* out = bytes->data() + header_size; + for (size_t y = 0; y < image.ysize; ++y) { + size_t y_out = image.ysize - 1 - y; + const uint8_t* row_in = &in[y * image.stride]; + uint8_t* row_out = &out[y_out * image.stride]; + memcpy(row_out, row_in, image.stride); + } + return true; + } +}; + +class PAMEncoder : public PNMEncoder { + public: + std::vector AcceptedFormats() const override { + std::vector formats; + for (const uint32_t num_channels : {1, 2, 3, 4}) { + for (const JxlDataType data_type : {JXL_TYPE_UINT8, JXL_TYPE_UINT16}) { + formats.push_back(JxlPixelFormat{/*num_channels=*/num_channels, + /*data_type=*/data_type, + /*endianness=*/JXL_BIG_ENDIAN, + /*align=*/0}); + } + } + return formats; + } + Status EncodeFrame(const PackedPixelFile& ppf, const PackedFrame& frame, + std::vector* bytes) const override { + const PackedImage& color = frame.color; + const auto& ec_info = ppf.extra_channels_info; + JXL_RETURN_IF_ERROR(frame.extra_channels.size() == ec_info.size()); + for (const auto& ec : frame.extra_channels) { + if (ec.xsize != color.xsize || ec.ysize != color.ysize) { + return JXL_FAILURE("Extra channel and color size mismatch."); + } + if (ec.format.data_type != color.format.data_type || + ec.format.endianness != color.format.endianness) { + return JXL_FAILURE("Extra channel and color format mismatch."); + } + } + if (ppf.info.bits_per_sample != ppf.info.alpha_bits) { + return JXL_FAILURE("Alpha bit depth does not match image bit depth"); + } + for (const auto& it : ec_info) { + if (it.ec_info.bits_per_sample != ppf.info.bits_per_sample) { + return JXL_FAILURE( + "Extra channel bit depth does not match image bit depth"); + } + } + const char* kColorTypes[4] = {"GRAYSCALE", "GRAYSCALE_ALPHA", "RGB", + "RGB_ALPHA"}; + uint32_t maxval = (1u << ppf.info.bits_per_sample) - 1; + uint32_t depth = color.format.num_channels + ec_info.size(); + char header[kMaxHeaderSize]; + size_t pos = 0; + pos += snprintf(header + pos, kMaxHeaderSize - pos, + "P7\nWIDTH %" PRIuS "\nHEIGHT %" PRIuS + "\nDEPTH %u\n" + "MAXVAL %u\nTUPLTYPE %s\n", + color.xsize, color.ysize, depth, maxval, + kColorTypes[color.format.num_channels - 1]); + JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize); + for (const auto& info : ec_info) { + pos += snprintf(header + pos, kMaxHeaderSize - pos, "TUPLTYPE %s\n", + ExtraChannelTypeName(info.ec_info.type).c_str()); + JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize); + } + pos += snprintf(header + pos, kMaxHeaderSize - pos, "ENDHDR\n"); + JXL_RETURN_IF_ERROR(pos < kMaxHeaderSize); + size_t total_size = color.pixels_size; + for (const auto& ec : frame.extra_channels) { + total_size += ec.pixels_size; + } + bytes->resize(pos + total_size); + memcpy(bytes->data(), header, pos); + // If we have no extra channels, just copy color pixel data over. + if (frame.extra_channels.empty()) { + memcpy(bytes->data() + pos, reinterpret_cast(color.pixels()), + color.pixels_size); + return true; + } + // Interleave color and extra channels. + const uint8_t* in = reinterpret_cast(color.pixels()); + std::vector ec_in(frame.extra_channels.size()); + for (size_t i = 0; i < frame.extra_channels.size(); ++i) { + ec_in[i] = + reinterpret_cast(frame.extra_channels[i].pixels()); + } + uint8_t* out = bytes->data() + pos; + size_t pwidth = PackedImage::BitsPerChannel(color.format.data_type) / 8; + for (size_t y = 0; y < color.ysize; ++y) { + for (size_t x = 0; x < color.xsize; ++x) { + memcpy(out, in, color.pixel_stride()); + out += color.pixel_stride(); + in += color.pixel_stride(); + for (auto& p : ec_in) { + memcpy(out, p, pwidth); + out += pwidth; + p += pwidth; + } + } + } + return true; + } + Status EncodeExtraChannel(const PackedImage& image, size_t bits_per_sample, + std::vector* bytes) const override { + return true; + } + + private: + static std::string ExtraChannelTypeName(JxlExtraChannelType type) { + switch (type) { + case JXL_CHANNEL_ALPHA: + return std::string("Alpha"); + case JXL_CHANNEL_DEPTH: + return std::string("Depth"); + case JXL_CHANNEL_SPOT_COLOR: + return std::string("SpotColor"); + case JXL_CHANNEL_SELECTION_MASK: + return std::string("SelectionMask"); + case JXL_CHANNEL_BLACK: + return std::string("Black"); + case JXL_CHANNEL_CFA: + return std::string("CFA"); + case JXL_CHANNEL_THERMAL: + return std::string("Thermal"); + default: + return std::string("UNKNOWN"); + } + } +}; + +} // namespace + +std::unique_ptr GetPPMEncoder() { + return jxl::make_unique(); +} + +std::unique_ptr GetPFMEncoder() { + return jxl::make_unique(); +} + +std::unique_ptr GetPGMEncoder() { + return jxl::make_unique(); +} + +std::unique_ptr GetPAMEncoder() { + return jxl::make_unique(); +} + +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/enc/pnm.h b/third-party/libjxl/libjxl/lib/extras/enc/pnm.h new file mode 100644 index 0000000000..403208cecd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/enc/pnm.h @@ -0,0 +1,28 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_ENC_PNM_H_ +#define LIB_EXTRAS_ENC_PNM_H_ + +// Encodes/decodes PBM/PGM/PPM/PFM pixels in memory. + +// TODO(janwas): workaround for incorrect Win64 codegen (cause unknown) +#include +#include + +#include "lib/extras/enc/encode.h" + +namespace jxl { +namespace extras { + +std::unique_ptr GetPAMEncoder(); +std::unique_ptr GetPGMEncoder(); +std::unique_ptr GetPPMEncoder(); +std::unique_ptr GetPFMEncoder(); + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_ENC_PNM_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/exif.cc b/third-party/libjxl/libjxl/lib/extras/exif.cc new file mode 100644 index 0000000000..aea632732b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/exif.cc @@ -0,0 +1,55 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/exif.h" + +#include "lib/jxl/base/byte_order.h" + +namespace jxl { + +constexpr uint16_t kExifOrientationTag = 274; + +void ResetExifOrientation(std::vector& exif) { + if (exif.size() < 12) return; // not enough bytes for a valid exif blob + bool bigendian; + uint8_t* t = exif.data(); + if (LoadLE32(t) == 0x2A004D4D) { + bigendian = true; + } else if (LoadLE32(t) == 0x002A4949) { + bigendian = false; + } else { + return; // not a valid tiff header + } + t += 4; + uint64_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t)); + if (exif.size() < 12 + offset + 2 || offset < 8) return; + t += offset - 4; + uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 2; + while (nb_tags > 0) { + if (t + 12 >= exif.data() + exif.size()) return; + uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 2; + if (tag == kExifOrientationTag) { + uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 2; + uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t)); + t += 4; + if (type == 3 && count == 1) { + if (bigendian) { + StoreBE16(1, t); + } else { + StoreLE16(1, t); + } + } + return; + } else { + t += 10; + nb_tags--; + } + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/exif.h b/third-party/libjxl/libjxl/lib/extras/exif.h new file mode 100644 index 0000000000..f22b2ccef5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/exif.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_EXIF_H_ +#define LIB_EXTRAS_EXIF_H_ + +#include + +#include + +namespace jxl { + +// Sets the Exif orientation to the identity, to avoid repeated orientation +void ResetExifOrientation(std::vector& exif); + +} // namespace jxl + +#endif // LIB_EXTRAS_EXIF_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/hlg.cc b/third-party/libjxl/libjxl/lib/extras/hlg.cc new file mode 100644 index 0000000000..e39a0807f5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/hlg.cc @@ -0,0 +1,56 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/hlg.h" + +#include + +#include "lib/jxl/enc_color_management.h" + +namespace jxl { + +float GetHlgGamma(const float peak_luminance, const float surround_luminance) { + return 1.2f * std::pow(1.111f, std::log2(peak_luminance / 1000.f)) * + std::pow(0.98f, std::log2(surround_luminance / 5.f)); +} + +Status HlgOOTF(ImageBundle* ib, const float gamma, ThreadPool* pool) { + ColorEncoding linear_rec2020; + linear_rec2020.SetColorSpace(ColorSpace::kRGB); + linear_rec2020.primaries = Primaries::k2100; + linear_rec2020.white_point = WhitePoint::kD65; + linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear); + JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC()); + JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool)); + + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, ib->ysize(), ThreadPool::NoInit, + [&](const int y, const int thread) { + float* const JXL_RESTRICT rows[3] = {ib->color()->PlaneRow(0, y), + ib->color()->PlaneRow(1, y), + ib->color()->PlaneRow(2, y)}; + for (size_t x = 0; x < ib->xsize(); ++x) { + float& red = rows[0][x]; + float& green = rows[1][x]; + float& blue = rows[2][x]; + const float luminance = + 0.2627f * red + 0.6780f * green + 0.0593f * blue; + const float ratio = std::pow(luminance, gamma - 1); + if (std::isfinite(ratio)) { + red *= ratio; + green *= ratio; + blue *= ratio; + } + } + }, + "HlgOOTF")); + return true; +} + +Status HlgInverseOOTF(ImageBundle* ib, const float gamma, ThreadPool* pool) { + return HlgOOTF(ib, 1.f / gamma, pool); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/hlg.h b/third-party/libjxl/libjxl/lib/extras/hlg.h new file mode 100644 index 0000000000..4cfec444f4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/hlg.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_HLG_H_ +#define LIB_EXTRAS_HLG_H_ + +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +float GetHlgGamma(float peak_luminance, float surround_luminance = 5.f); + +Status HlgOOTF(ImageBundle* ib, float gamma, ThreadPool* pool = nullptr); + +Status HlgInverseOOTF(ImageBundle* ib, float gamma, ThreadPool* pool = nullptr); + +} // namespace jxl + +#endif // LIB_EXTRAS_HLG_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/jpegli_test.cc b/third-party/libjxl/libjxl/lib/extras/jpegli_test.cc new file mode 100644 index 0000000000..a710048e83 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/jpegli_test.cc @@ -0,0 +1,413 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if JPEGXL_ENABLE_JPEGLI + +#include "lib/extras/dec/jpegli.h" + +#include +#include + +#include +#include + +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/dec/decode.h" +#include "lib/extras/dec/jpg.h" +#include "lib/extras/enc/encode.h" +#include "lib/extras/enc/jpegli.h" +#include "lib/extras/enc/jpg.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/test_image.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace extras { +namespace { + +using test::Butteraugli3Norm; +using test::ButteraugliDistance; +using test::TestImage; + +Status ReadTestImage(const std::string& pathname, PackedPixelFile* ppf) { + const PaddedBytes encoded = jxl::test::ReadTestData(pathname); + ColorHints color_hints; + if (pathname.find(".ppm") != std::string::npos) { + color_hints.Add("color_space", "RGB_D65_SRG_Rel_SRG"); + } else if (pathname.find(".pgm") != std::string::npos) { + color_hints.Add("color_space", "Gra_D65_Rel_SRG"); + } + return DecodeBytes(Span(encoded), color_hints, ppf); +} + +std::vector GetAppData(const std::vector& compressed) { + std::vector result; + size_t pos = 2; // After SOI + while (pos + 4 < compressed.size()) { + if (compressed[pos] != 0xff || compressed[pos + 1] < 0xe0 || + compressed[pos + 1] > 0xf0) { + break; + } + size_t len = (compressed[pos + 2] << 8) + compressed[pos + 3] + 2; + if (pos + len > compressed.size()) { + break; + } + result.insert(result.end(), &compressed[pos], &compressed[pos] + len); + pos += len; + } + return result; +} + +Status DecodeWithLibjpeg(const std::vector& compressed, + PackedPixelFile* ppf, + const JPGDecompressParams* dparams = nullptr) { + return DecodeImageJPG(Span(compressed), ColorHints(), ppf, + /*constraints=*/nullptr, dparams); +} + +Status EncodeWithLibjpeg(const PackedPixelFile& ppf, int quality, + std::vector* compressed) { + std::unique_ptr encoder = GetJPEGEncoder(); + encoder->SetOption("q", std::to_string(quality)); + EncodedImage encoded; + JXL_RETURN_IF_ERROR(encoder->Encode(ppf, &encoded)); + JXL_RETURN_IF_ERROR(!encoded.bitstreams.empty()); + *compressed = std::move(encoded.bitstreams[0]); + return true; +} + +std::string Description(const JxlColorEncoding& color_encoding) { + ColorEncoding c_enc; + JXL_CHECK(ConvertExternalToInternalColorEncoding(color_encoding, &c_enc)); + return Description(c_enc); +} + +float BitsPerPixel(const PackedPixelFile& ppf, + const std::vector& compressed) { + const size_t num_pixels = ppf.info.xsize * ppf.info.ysize; + return compressed.size() * 8.0 / num_pixels; +} + +TEST(JpegliTest, JpegliSRGBDecodeTest) { + TEST_LIBJPEG_SUPPORT(); + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf0; + ASSERT_TRUE(ReadTestImage(testimage, &ppf0)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf0.color_encoding)); + EXPECT_EQ(8, ppf0.info.bits_per_sample); + + std::vector compressed; + ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed)); + + PackedPixelFile ppf1; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1)); + PackedPixelFile ppf2; + JpegDecompressParams dparams; + ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf2)); + EXPECT_LT(ButteraugliDistance(ppf0, ppf2), ButteraugliDistance(ppf0, ppf1)); +} + +TEST(JpegliTest, JpegliGrayscaleDecodeTest) { + TEST_LIBJPEG_SUPPORT(); + std::string testimage = "jxl/flower/flower_small.g.depth8.pgm"; + PackedPixelFile ppf0; + ASSERT_TRUE(ReadTestImage(testimage, &ppf0)); + EXPECT_EQ("Gra_D65_Rel_SRG", Description(ppf0.color_encoding)); + EXPECT_EQ(8, ppf0.info.bits_per_sample); + + std::vector compressed; + ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed)); + + PackedPixelFile ppf1; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1)); + PackedPixelFile ppf2; + JpegDecompressParams dparams; + ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf2)); + EXPECT_LT(ButteraugliDistance(ppf0, ppf2), ButteraugliDistance(ppf0, ppf1)); +} + +TEST(JpegliTest, JpegliXYBEncodeTest) { + TEST_LIBJPEG_SUPPORT(); + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf_in; + ASSERT_TRUE(ReadTestImage(testimage, &ppf_in)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding)); + EXPECT_EQ(8, ppf_in.info.bits_per_sample); + + std::vector compressed; + JpegSettings settings; + settings.xyb = true; + ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + PackedPixelFile ppf_out; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out)); + EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.45f)); + EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.32f)); +} + +TEST(JpegliTest, JpegliDecodeTestLargeSmoothArea) { + TEST_LIBJPEG_SUPPORT(); + TestImage t; + const size_t xsize = 2070; + const size_t ysize = 1063; + t.SetDimensions(xsize, ysize).SetChannels(3); + t.SetAllBitDepths(8).SetEndianness(JXL_NATIVE_ENDIAN); + TestImage::Frame frame = t.AddFrame(); + frame.RandomFill(); + // Create a large smooth area in the top half of the image. This is to test + // that the bias statistics calculation can handle many blocks with all-zero + // AC coefficients. + for (size_t y = 0; y < ysize / 2; ++y) { + for (size_t x = 0; x < xsize; ++x) { + for (size_t c = 0; c < 3; ++c) { + frame.SetValue(y, x, c, 0.5f); + } + } + } + const PackedPixelFile& ppf0 = t.ppf(); + + std::vector compressed; + ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed)); + + PackedPixelFile ppf1; + JpegDecompressParams dparams; + ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf1)); + EXPECT_LT(ButteraugliDistance(ppf0, ppf1), 3.0f); +} + +TEST(JpegliTest, JpegliYUVEncodeTest) { + TEST_LIBJPEG_SUPPORT(); + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf_in; + ASSERT_TRUE(ReadTestImage(testimage, &ppf_in)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding)); + EXPECT_EQ(8, ppf_in.info.bits_per_sample); + + std::vector compressed; + JpegSettings settings; + settings.xyb = false; + ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + PackedPixelFile ppf_out; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out)); + EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.7f)); + EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.32f)); +} + +TEST(JpegliTest, JpegliYUVChromaSubsamplingEncodeTest) { + TEST_LIBJPEG_SUPPORT(); + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf_in; + ASSERT_TRUE(ReadTestImage(testimage, &ppf_in)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding)); + EXPECT_EQ(8, ppf_in.info.bits_per_sample); + + std::vector compressed; + JpegSettings settings; + for (const char* sampling : {"440", "422", "420"}) { + settings.xyb = false; + settings.chroma_subsampling = std::string(sampling); + ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + PackedPixelFile ppf_out; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out)); + EXPECT_LE(BitsPerPixel(ppf_in, compressed), 1.55f); + EXPECT_LE(ButteraugliDistance(ppf_in, ppf_out), 1.82f); + } +} + +TEST(JpegliTest, JpegliYUVEncodeTestNoAq) { + TEST_LIBJPEG_SUPPORT(); + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf_in; + ASSERT_TRUE(ReadTestImage(testimage, &ppf_in)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding)); + EXPECT_EQ(8, ppf_in.info.bits_per_sample); + + std::vector compressed; + JpegSettings settings; + settings.xyb = false; + settings.use_adaptive_quantization = false; + ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + PackedPixelFile ppf_out; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf_out)); + EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(1.85f)); + EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.25f)); +} + +TEST(JpegliTest, JpegliHDRRoundtripTest) { + std::string testimage = "jxl/hdr_room.png"; + PackedPixelFile ppf_in; + ASSERT_TRUE(ReadTestImage(testimage, &ppf_in)); + EXPECT_EQ("RGB_D65_202_Rel_HLG", Description(ppf_in.color_encoding)); + EXPECT_EQ(16, ppf_in.info.bits_per_sample); + + std::vector compressed; + JpegSettings settings; + settings.xyb = false; + ASSERT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + PackedPixelFile ppf_out; + JpegDecompressParams dparams; + dparams.output_data_type = JXL_TYPE_UINT16; + ASSERT_TRUE(DecodeJpeg(compressed, dparams, nullptr, &ppf_out)); + EXPECT_THAT(BitsPerPixel(ppf_in, compressed), IsSlightlyBelow(2.95f)); + EXPECT_THAT(ButteraugliDistance(ppf_in, ppf_out), IsSlightlyBelow(1.05f)); +} + +TEST(JpegliTest, JpegliSetAppData) { + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf_in; + ASSERT_TRUE(ReadTestImage(testimage, &ppf_in)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf_in.color_encoding)); + EXPECT_EQ(8, ppf_in.info.bits_per_sample); + + std::vector compressed; + JpegSettings settings; + settings.app_data = {0xff, 0xe3, 0, 4, 0, 1}; + EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + EXPECT_EQ(settings.app_data, GetAppData(compressed)); + + settings.app_data = {0xff, 0xe3, 0, 6, 0, 1, 2, 3, 0xff, 0xef, 0, 4, 0, 1}; + EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + EXPECT_EQ(settings.app_data, GetAppData(compressed)); + + settings.xyb = true; + EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + EXPECT_EQ(0, memcmp(settings.app_data.data(), GetAppData(compressed).data(), + settings.app_data.size())); + + settings.xyb = false; + settings.app_data = {0}; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + settings.app_data = {0xff, 0xe0}; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + settings.app_data = {0xff, 0xe0, 0, 2}; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + settings.app_data = {0xff, 0xeb, 0, 4, 0}; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + settings.app_data = {0xff, 0xeb, 0, 4, 0, 1, 2, 3}; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + settings.app_data = {0xff, 0xab, 0, 4, 0, 1}; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + + settings.xyb = false; + settings.app_data = { + 0xff, 0xeb, 0, 4, 0, 1, // + 0xff, 0xe2, 0, 20, 0x49, 0x43, 0x43, 0x5F, 0x50, // + 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00, 0, 1, // + 0, 0, 0, 0, // + }; + EXPECT_TRUE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); + EXPECT_EQ(settings.app_data, GetAppData(compressed)); + + settings.xyb = true; + EXPECT_FALSE(EncodeJpeg(ppf_in, settings, nullptr, &compressed)); +} + +struct TestConfig { + int num_colors; + int passes; + int dither; +}; + +class JpegliColorQuantTestParam : public ::testing::TestWithParam { +}; + +TEST_P(JpegliColorQuantTestParam, JpegliColorQuantizeTest) { + TEST_LIBJPEG_SUPPORT(); + TestConfig config = GetParam(); + std::string testimage = "jxl/flower/flower_small.rgb.depth8.ppm"; + PackedPixelFile ppf0; + ASSERT_TRUE(ReadTestImage(testimage, &ppf0)); + EXPECT_EQ("RGB_D65_SRG_Rel_SRG", Description(ppf0.color_encoding)); + EXPECT_EQ(8, ppf0.info.bits_per_sample); + + std::vector compressed; + ASSERT_TRUE(EncodeWithLibjpeg(ppf0, 90, &compressed)); + + PackedPixelFile ppf1; + JPGDecompressParams dparams1; + dparams1.two_pass_quant = (config.passes == 2); + dparams1.num_colors = config.num_colors; + dparams1.dither_mode = config.dither; + ASSERT_TRUE(DecodeWithLibjpeg(compressed, &ppf1, &dparams1)); + + PackedPixelFile ppf2; + JpegDecompressParams dparams2; + dparams2.two_pass_quant = (config.passes == 2); + dparams2.num_colors = config.num_colors; + dparams2.dither_mode = config.dither; + ASSERT_TRUE(DecodeJpeg(compressed, dparams2, nullptr, &ppf2)); + + double dist1 = Butteraugli3Norm(ppf0, ppf1); + double dist2 = Butteraugli3Norm(ppf0, ppf2); + printf("distance: %f vs %f\n", dist2, dist1); + if (config.passes == 1) { + if (config.num_colors == 16 && config.dither == 2) { + // TODO(szabadka) Fix this case. + EXPECT_LT(dist2, dist1 * 1.5); + } else { + EXPECT_LT(dist2, dist1 * 1.05); + } + } else if (config.num_colors > 64) { + // TODO(szabadka) Fix 2pass quantization for <= 64 colors. + EXPECT_LT(dist2, dist1 * 1.1); + } else if (config.num_colors > 32) { + EXPECT_LT(dist2, dist1 * 1.2); + } else { + EXPECT_LT(dist2, dist1 * 1.7); + } +} + +std::vector GenerateTests() { + std::vector all_tests; + for (int num_colors = 8; num_colors <= 256; num_colors *= 2) { + for (int passes = 1; passes <= 2; ++passes) { + for (int dither = 0; dither < 3; dither += passes) { + TestConfig config; + config.num_colors = num_colors; + config.passes = passes; + config.dither = dither; + all_tests.push_back(config); + } + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + static constexpr const char* kDitherModeStr[] = {"No", "Ordered", "FS"}; + os << c.passes << "pass"; + os << c.num_colors << "colors"; + os << kDitherModeStr[c.dither] << "dither"; + return os; +} + +std::string TestDescription(const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JXL_GTEST_INSTANTIATE_TEST_SUITE_P(JpegliColorQuantTest, + JpegliColorQuantTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace extras +} // namespace jxl +#endif // JPEGXL_ENABLE_JPEGLI diff --git a/third-party/libjxl/libjxl/lib/extras/metrics.cc b/third-party/libjxl/libjxl/lib/extras/metrics.cc new file mode 100644 index 0000000000..8d91da6b8f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/metrics.cc @@ -0,0 +1,224 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/metrics.h" + +#include +#include + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/extras/metrics.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Rebind; + +double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params, + double p) { + const double onePerPixels = 1.0 / (distmap.ysize() * distmap.xsize()); + if (std::abs(p - 3.0) < 1E-6) { + double sum1[3] = {0.0}; + +// Prefer double if possible, but otherwise use float rather than scalar. +#if HWY_CAP_FLOAT64 + using T = double; + const Rebind df; +#else + using T = float; +#endif + const HWY_FULL(T) d; + constexpr size_t N = MaxLanes(HWY_FULL(T)()); + // Manually aligned storage to avoid asan crash on clang-7 due to + // unaligned spill. + HWY_ALIGN T sum_totals0[N] = {0}; + HWY_ALIGN T sum_totals1[N] = {0}; + HWY_ALIGN T sum_totals2[N] = {0}; + + for (size_t y = 0; y < distmap.ysize(); ++y) { + const float* JXL_RESTRICT row = distmap.ConstRow(y); + + auto sums0 = Zero(d); + auto sums1 = Zero(d); + auto sums2 = Zero(d); + + size_t x = 0; + for (; x + Lanes(d) <= distmap.xsize(); x += Lanes(d)) { +#if HWY_CAP_FLOAT64 + const auto d1 = PromoteTo(d, Load(df, row + x)); +#else + const auto d1 = Load(d, row + x); +#endif + const auto d2 = Mul(d1, Mul(d1, d1)); + sums0 = Add(sums0, d2); + const auto d3 = Mul(d2, d2); + sums1 = Add(sums1, d3); + const auto d4 = Mul(d3, d3); + sums2 = Add(sums2, d4); + } + + Store(Add(sums0, Load(d, sum_totals0)), d, sum_totals0); + Store(Add(sums1, Load(d, sum_totals1)), d, sum_totals1); + Store(Add(sums2, Load(d, sum_totals2)), d, sum_totals2); + + for (; x < distmap.xsize(); ++x) { + const double d1 = row[x]; + double d2 = d1 * d1 * d1; + sum1[0] += d2; + d2 *= d2; + sum1[1] += d2; + d2 *= d2; + sum1[2] += d2; + } + } + double v = 0; + v += pow( + onePerPixels * (sum1[0] + GetLane(SumOfLanes(d, Load(d, sum_totals0)))), + 1.0 / (p * 1.0)); + v += pow( + onePerPixels * (sum1[1] + GetLane(SumOfLanes(d, Load(d, sum_totals1)))), + 1.0 / (p * 2.0)); + v += pow( + onePerPixels * (sum1[2] + GetLane(SumOfLanes(d, Load(d, sum_totals2)))), + 1.0 / (p * 4.0)); + v /= 3.0; + return v; + } else { + static std::atomic once{0}; + if (once.fetch_add(1, std::memory_order_relaxed) == 0) { + JXL_WARNING("WARNING: using slow ComputeDistanceP"); + } + double sum1[3] = {0.0}; + for (size_t y = 0; y < distmap.ysize(); ++y) { + const float* JXL_RESTRICT row = distmap.ConstRow(y); + for (size_t x = 0; x < distmap.xsize(); ++x) { + double d2 = std::pow(row[x], p); + sum1[0] += d2; + d2 *= d2; + sum1[1] += d2; + d2 *= d2; + sum1[2] += d2; + } + } + double v = 0; + for (int i = 0; i < 3; ++i) { + v += pow(onePerPixels * (sum1[i]), 1.0 / (p * (1 << i))); + } + v /= 3.0; + return v; + } +} + +void ComputeSumOfSquares(const ImageBundle& ib1, const ImageBundle& ib2, + const JxlCmsInterface& cms, double sum_of_squares[3]) { + // Convert to sRGB - closer to perception than linear. + const Image3F* srgb1 = &ib1.color(); + Image3F copy1; + if (!ib1.IsSRGB()) { + JXL_CHECK( + ib1.CopyTo(Rect(ib1), ColorEncoding::SRGB(ib1.IsGray()), cms, ©1)); + srgb1 = ©1; + } + const Image3F* srgb2 = &ib2.color(); + Image3F copy2; + if (!ib2.IsSRGB()) { + JXL_CHECK( + ib2.CopyTo(Rect(ib2), ColorEncoding::SRGB(ib2.IsGray()), cms, ©2)); + srgb2 = ©2; + } + + JXL_CHECK(SameSize(*srgb1, *srgb2)); + + // TODO(veluca): SIMD. + float yuvmatrix[3][3] = {{0.299, 0.587, 0.114}, + {-0.14713, -0.28886, 0.436}, + {0.615, -0.51499, -0.10001}}; + for (size_t y = 0; y < srgb1->ysize(); ++y) { + const float* JXL_RESTRICT row1[3]; + const float* JXL_RESTRICT row2[3]; + for (size_t j = 0; j < 3; j++) { + row1[j] = srgb1->ConstPlaneRow(j, y); + row2[j] = srgb2->ConstPlaneRow(j, y); + } + for (size_t x = 0; x < srgb1->xsize(); ++x) { + float cdiff[3] = {}; + // YUV conversion is linear, so we can run it on the difference. + for (size_t j = 0; j < 3; j++) { + cdiff[j] = row1[j][x] - row2[j][x]; + } + float yuvdiff[3] = {}; + for (size_t j = 0; j < 3; j++) { + for (size_t k = 0; k < 3; k++) { + yuvdiff[j] += yuvmatrix[j][k] * cdiff[k]; + } + } + for (size_t j = 0; j < 3; j++) { + sum_of_squares[j] += yuvdiff[j] * yuvdiff[j]; + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(ComputeDistanceP); +double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params, + double p) { + return HWY_DYNAMIC_DISPATCH(ComputeDistanceP)(distmap, params, p); +} + +HWY_EXPORT(ComputeSumOfSquares); + +double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2, + const JxlCmsInterface& cms) { + double sum_of_squares[3] = {}; + HWY_DYNAMIC_DISPATCH(ComputeSumOfSquares)(ib1, ib2, cms, sum_of_squares); + // Weighted PSNR as in JPEG-XL: chroma counts 1/8. + const float weights[3] = {6.0f / 8, 1.0f / 8, 1.0f / 8}; + // Avoid squaring the weight - 1/64 is too extreme. + double norm = 0; + for (size_t i = 0; i < 3; i++) { + norm += std::sqrt(sum_of_squares[i]) * weights[i]; + } + // This function returns distance *squared*. + return norm * norm; +} + +double ComputePSNR(const ImageBundle& ib1, const ImageBundle& ib2, + const JxlCmsInterface& cms) { + if (!SameSize(ib1, ib2)) return 0.0; + double sum_of_squares[3] = {}; + HWY_DYNAMIC_DISPATCH(ComputeSumOfSquares)(ib1, ib2, cms, sum_of_squares); + constexpr double kChannelWeights[3] = {6.0 / 8, 1.0 / 8, 1.0 / 8}; + double avg_psnr = 0; + const size_t input_pixels = ib1.xsize() * ib1.ysize(); + for (int i = 0; i < 3; ++i) { + const double rmse = std::sqrt(sum_of_squares[i] / input_pixels); + const double psnr = + sum_of_squares[i] == 0 ? 99.99 : (20 * std::log10(1 / rmse)); + avg_psnr += kChannelWeights[i] * psnr; + } + return avg_psnr; +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/extras/metrics.h b/third-party/libjxl/libjxl/lib/extras/metrics.h new file mode 100644 index 0000000000..87a69a99ce --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/metrics.h @@ -0,0 +1,28 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_METRICS_H_ +#define LIB_EXTRAS_METRICS_H_ + +#include + +#include "lib/jxl/butteraugli/butteraugli.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +// Computes p-norm given the butteraugli distmap. +double ComputeDistanceP(const ImageF& distmap, const ButteraugliParams& params, + double p); + +double ComputeDistance2(const ImageBundle& ib1, const ImageBundle& ib2, + const JxlCmsInterface& cms); + +double ComputePSNR(const ImageBundle& ib1, const ImageBundle& ib2, + const JxlCmsInterface& cms); + +} // namespace jxl + +#endif // LIB_EXTRAS_METRICS_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/packed_image.h b/third-party/libjxl/libjxl/lib/extras/packed_image.h new file mode 100644 index 0000000000..3eaf5a0c6d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/packed_image.h @@ -0,0 +1,170 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_PACKED_IMAGE_H_ +#define LIB_EXTRAS_PACKED_IMAGE_H_ + +// Helper class for storing external (int or float, interleaved) images. This is +// the common format used by other libraries and in the libjxl API. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/common.h" + +namespace jxl { +namespace extras { + +// Class representing an interleaved image with a bunch of channels. +class PackedImage { + public: + PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format) + : PackedImage(xsize, ysize, format, CalcStride(format, xsize)) {} + + PackedImage Copy() const { + PackedImage copy(xsize, ysize, format); + memcpy(reinterpret_cast(copy.pixels()), + reinterpret_cast(pixels()), pixels_size); + return copy; + } + + // The interleaved pixels as defined in the storage format. + void* pixels() const { return pixels_.get(); } + + // The image size in pixels. + size_t xsize; + size_t ysize; + + // The number of bytes per row. + size_t stride; + + // Pixel storage format and buffer size of the pixels_ pointer. + JxlPixelFormat format; + size_t pixels_size; + + size_t pixel_stride() const { + return (BitsPerChannel(format.data_type) * format.num_channels / + jxl::kBitsPerByte); + } + + static size_t BitsPerChannel(JxlDataType data_type) { + switch (data_type) { + case JXL_TYPE_UINT8: + return 8; + case JXL_TYPE_UINT16: + return 16; + case JXL_TYPE_FLOAT: + return 32; + case JXL_TYPE_FLOAT16: + return 16; + default: + JXL_ABORT("Unhandled JxlDataType"); + } + } + + private: + PackedImage(size_t xsize, size_t ysize, const JxlPixelFormat& format, + size_t stride) + : xsize(xsize), + ysize(ysize), + stride(stride), + format(format), + pixels_size(ysize * stride), + pixels_(malloc(std::max(1, pixels_size)), free) {} + + static size_t CalcStride(const JxlPixelFormat& format, size_t xsize) { + size_t stride = xsize * (BitsPerChannel(format.data_type) * + format.num_channels / jxl::kBitsPerByte); + if (format.align > 1) { + stride = jxl::DivCeil(stride, format.align) * format.align; + } + return stride; + } + + std::unique_ptr pixels_; +}; + +// Helper class representing a frame, as seen from the API. Animations will have +// multiple frames, but a single frame can have a color/grayscale channel and +// multiple extra channels. The order of the extra channels should be the same +// as all other frames in the same image. +class PackedFrame { + public: + template + explicit PackedFrame(Args&&... args) : color(std::forward(args)...) {} + + PackedFrame Copy() const { + PackedFrame copy(color.xsize, color.ysize, color.format); + copy.frame_info = frame_info; + copy.name = name; + copy.color = color.Copy(); + for (size_t i = 0; i < extra_channels.size(); ++i) { + PackedImage ec = extra_channels[i].Copy(); + copy.extra_channels.emplace_back(std::move(ec)); + } + return copy; + } + + // The Frame metadata. + JxlFrameHeader frame_info = {}; + std::string name; + + // The pixel data for the color (or grayscale) channels. + PackedImage color; + // Extra channel image data. + std::vector extra_channels; +}; + +// Optional metadata associated with a file +class PackedMetadata { + public: + std::vector exif; + std::vector iptc; + std::vector jumbf; + std::vector xmp; +}; + +// The extra channel metadata information. +struct PackedExtraChannel { + JxlExtraChannelInfo ec_info; + size_t index; + std::string name; +}; + +// Helper class representing a JXL image file as decoded to pixels from the API. +class PackedPixelFile { + public: + JxlBasicInfo info = {}; + + std::vector extra_channels_info; + + // Color information of the decoded pixels. + // If the icc is empty, the JxlColorEncoding should be used instead. + std::vector icc; + JxlColorEncoding color_encoding = {}; + // The icc profile of the original image. + std::vector orig_icc; + + std::unique_ptr preview_frame; + std::vector frames; + + PackedMetadata metadata; + PackedPixelFile() { JxlEncoderInitBasicInfo(&info); }; +}; + +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_PACKED_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/packed_image_convert.cc b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.cc new file mode 100644 index 0000000000..a67510b270 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.cc @@ -0,0 +1,301 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/packed_image_convert.h" + +#include +#include + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/enc_image_bundle.h" +#include "lib/jxl/luminance.h" + +namespace jxl { +namespace extras { + +Status ConvertPackedFrameToImageBundle(const JxlBasicInfo& info, + const PackedFrame& frame, + const CodecInOut& io, ThreadPool* pool, + ImageBundle* bundle) { + JXL_ASSERT(frame.color.pixels() != nullptr); + const bool float_in = frame.color.format.data_type == JXL_TYPE_FLOAT16 || + frame.color.format.data_type == JXL_TYPE_FLOAT; + size_t frame_bits_per_sample = + float_in ? PackedImage::BitsPerChannel(frame.color.format.data_type) + : info.bits_per_sample; + JXL_ASSERT(frame_bits_per_sample != 0); + // It is ok for the frame.color.format.num_channels to not match the + // number of channels on the image. + JXL_ASSERT(1 <= frame.color.format.num_channels && + frame.color.format.num_channels <= 4); + + const Span span( + static_cast(frame.color.pixels()), + frame.color.pixels_size); + JXL_ASSERT(Rect(frame.frame_info.layer_info.crop_x0, + frame.frame_info.layer_info.crop_y0, + frame.frame_info.layer_info.xsize, + frame.frame_info.layer_info.ysize) + .IsInside(Rect(0, 0, info.xsize, info.ysize))); + if (info.have_animation) { + bundle->duration = frame.frame_info.duration; + bundle->blend = frame.frame_info.layer_info.blend_info.blendmode > 0; + bundle->use_for_next_frame = + frame.frame_info.layer_info.save_as_reference > 0; + bundle->origin.x0 = frame.frame_info.layer_info.crop_x0; + bundle->origin.y0 = frame.frame_info.layer_info.crop_y0; + } + bundle->name = frame.name; // frame.frame_info.name_length is ignored here. + JXL_ASSERT(io.metadata.m.color_encoding.IsGray() == + (frame.color.format.num_channels <= 2)); + + JXL_RETURN_IF_ERROR(ConvertFromExternal( + span, frame.color.xsize, frame.color.ysize, io.metadata.m.color_encoding, + frame_bits_per_sample, frame.color.format, pool, bundle)); + + bundle->extra_channels().resize(io.metadata.m.extra_channel_info.size()); + for (size_t i = 0; i < frame.extra_channels.size(); i++) { + const auto& ppf_ec = frame.extra_channels[i]; + bundle->extra_channels()[i] = ImageF(ppf_ec.xsize, ppf_ec.ysize); + JXL_CHECK(BufferToImageF(ppf_ec.format, ppf_ec.xsize, ppf_ec.ysize, + ppf_ec.pixels(), ppf_ec.pixels_size, pool, + &bundle->extra_channels()[i])); + } + return true; +} + +Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf, + ThreadPool* pool, CodecInOut* io) { + const bool has_alpha = ppf.info.alpha_bits != 0; + JXL_ASSERT(!ppf.frames.empty()); + if (has_alpha) { + JXL_ASSERT(ppf.info.alpha_bits == ppf.info.bits_per_sample); + JXL_ASSERT(ppf.info.alpha_exponent_bits == + ppf.info.exponent_bits_per_sample); + } + + const bool is_gray = ppf.info.num_color_channels == 1; + JXL_ASSERT(ppf.info.num_color_channels == 1 || + ppf.info.num_color_channels == 3); + + // Convert the image metadata + io->SetSize(ppf.info.xsize, ppf.info.ysize); + io->metadata.m.bit_depth.bits_per_sample = ppf.info.bits_per_sample; + io->metadata.m.bit_depth.exponent_bits_per_sample = + ppf.info.exponent_bits_per_sample; + io->metadata.m.bit_depth.floating_point_sample = + ppf.info.exponent_bits_per_sample != 0; + io->metadata.m.modular_16_bit_buffer_sufficient = + ppf.info.exponent_bits_per_sample == 0 && ppf.info.bits_per_sample <= 12; + + io->metadata.m.SetAlphaBits(ppf.info.alpha_bits, + ppf.info.alpha_premultiplied); + + io->metadata.m.xyb_encoded = !ppf.info.uses_original_profile; + JXL_ASSERT(ppf.info.orientation > 0 && ppf.info.orientation <= 8); + io->metadata.m.orientation = ppf.info.orientation; + + // Convert animation metadata + JXL_ASSERT(ppf.frames.size() == 1 || ppf.info.have_animation); + io->metadata.m.have_animation = ppf.info.have_animation; + io->metadata.m.animation.tps_numerator = ppf.info.animation.tps_numerator; + io->metadata.m.animation.tps_denominator = ppf.info.animation.tps_denominator; + io->metadata.m.animation.num_loops = ppf.info.animation.num_loops; + + // Convert the color encoding. + if (!ppf.icc.empty()) { + PaddedBytes icc; + icc.append(ppf.icc); + const JxlCmsInterface& cms = GetJxlCms(); + if (!io->metadata.m.color_encoding.SetICC(std::move(icc), &cms)) { + fprintf(stderr, "Warning: error setting ICC profile, assuming SRGB\n"); + io->metadata.m.color_encoding = ColorEncoding::SRGB(is_gray); + } else { + if (io->metadata.m.color_encoding.IsGray() != is_gray) { + // E.g. JPG image has 3 channels, but gray ICC. + return JXL_FAILURE("Embedded ICC does not match image color type"); + } + } + } else { + JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding( + ppf.color_encoding, &io->metadata.m.color_encoding)); + if (io->metadata.m.color_encoding.ICC().empty()) { + return JXL_FAILURE("Failed to serialize ICC"); + } + } + + // Convert the extra blobs + io->blobs.exif = ppf.metadata.exif; + io->blobs.iptc = ppf.metadata.iptc; + io->blobs.jumbf = ppf.metadata.jumbf; + io->blobs.xmp = ppf.metadata.xmp; + + // Append all other extra channels. + for (const auto& info : ppf.extra_channels_info) { + ExtraChannelInfo out; + out.type = static_cast(info.ec_info.type); + out.bit_depth.bits_per_sample = info.ec_info.bits_per_sample; + out.bit_depth.exponent_bits_per_sample = + info.ec_info.exponent_bits_per_sample; + out.bit_depth.floating_point_sample = + info.ec_info.exponent_bits_per_sample != 0; + out.dim_shift = info.ec_info.dim_shift; + out.name = info.name; + out.alpha_associated = (info.ec_info.alpha_premultiplied != 0); + out.spot_color[0] = info.ec_info.spot_color[0]; + out.spot_color[1] = info.ec_info.spot_color[1]; + out.spot_color[2] = info.ec_info.spot_color[2]; + out.spot_color[3] = info.ec_info.spot_color[3]; + io->metadata.m.extra_channel_info.push_back(std::move(out)); + } + + // Convert the preview + if (ppf.preview_frame) { + size_t preview_xsize = ppf.preview_frame->color.xsize; + size_t preview_ysize = ppf.preview_frame->color.ysize; + io->metadata.m.have_preview = true; + JXL_RETURN_IF_ERROR( + io->metadata.m.preview_size.Set(preview_xsize, preview_ysize)); + JXL_RETURN_IF_ERROR(ConvertPackedFrameToImageBundle( + ppf.info, *ppf.preview_frame, *io, pool, &io->preview_frame)); + } + + // Convert the pixels + io->frames.clear(); + for (const auto& frame : ppf.frames) { + ImageBundle bundle(&io->metadata.m); + JXL_RETURN_IF_ERROR( + ConvertPackedFrameToImageBundle(ppf.info, frame, *io, pool, &bundle)); + io->frames.push_back(std::move(bundle)); + } + + if (ppf.info.exponent_bits_per_sample == 0) { + // uint case. + io->metadata.m.bit_depth.bits_per_sample = io->Main().DetectRealBitdepth(); + } + if (ppf.info.intensity_target != 0) { + io->metadata.m.SetIntensityTarget(ppf.info.intensity_target); + } else { + SetIntensityTarget(&io->metadata.m); + } + io->CheckMetadata(); + return true; +} + +// Allows converting from internal CodecInOut to external PackedPixelFile +Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io, + const JxlPixelFormat& pixel_format, + const ColorEncoding& c_desired, + ThreadPool* pool, + PackedPixelFile* ppf) { + const bool has_alpha = io.metadata.m.HasAlpha(); + bool alpha_premultiplied = false; + JXL_ASSERT(!io.frames.empty()); + + if (has_alpha) { + JXL_ASSERT(io.metadata.m.GetAlphaBits() == + io.metadata.m.bit_depth.bits_per_sample); + const auto* alpha_channel = io.metadata.m.Find(ExtraChannel::kAlpha); + JXL_ASSERT(alpha_channel->bit_depth.exponent_bits_per_sample == + io.metadata.m.bit_depth.exponent_bits_per_sample); + alpha_premultiplied = alpha_channel->alpha_associated; + } + + // Convert the image metadata + ppf->info.xsize = io.metadata.size.xsize(); + ppf->info.ysize = io.metadata.size.ysize(); + ppf->info.num_color_channels = io.metadata.m.color_encoding.Channels(); + ppf->info.bits_per_sample = io.metadata.m.bit_depth.bits_per_sample; + ppf->info.exponent_bits_per_sample = + io.metadata.m.bit_depth.exponent_bits_per_sample; + + ppf->info.intensity_target = io.metadata.m.tone_mapping.intensity_target; + ppf->info.linear_below = io.metadata.m.tone_mapping.linear_below; + ppf->info.min_nits = io.metadata.m.tone_mapping.min_nits; + ppf->info.relative_to_max_display = + io.metadata.m.tone_mapping.relative_to_max_display; + + ppf->info.alpha_bits = io.metadata.m.GetAlphaBits(); + ppf->info.alpha_premultiplied = alpha_premultiplied; + + ppf->info.uses_original_profile = !io.metadata.m.xyb_encoded; + JXL_ASSERT(0 < io.metadata.m.orientation && io.metadata.m.orientation <= 8); + ppf->info.orientation = + static_cast(io.metadata.m.orientation); + ppf->info.num_color_channels = io.metadata.m.color_encoding.Channels(); + + // Convert animation metadata + JXL_ASSERT(io.frames.size() == 1 || io.metadata.m.have_animation); + ppf->info.have_animation = io.metadata.m.have_animation; + ppf->info.animation.tps_numerator = io.metadata.m.animation.tps_numerator; + ppf->info.animation.tps_denominator = io.metadata.m.animation.tps_denominator; + ppf->info.animation.num_loops = io.metadata.m.animation.num_loops; + + // Convert the color encoding + ppf->icc.assign(c_desired.ICC().begin(), c_desired.ICC().end()); + ConvertInternalToExternalColorEncoding(c_desired, &ppf->color_encoding); + + // Convert the extra blobs + ppf->metadata.exif = io.blobs.exif; + ppf->metadata.iptc = io.blobs.iptc; + ppf->metadata.jumbf = io.blobs.jumbf; + ppf->metadata.xmp = io.blobs.xmp; + const bool float_out = pixel_format.data_type == JXL_TYPE_FLOAT || + pixel_format.data_type == JXL_TYPE_FLOAT16; + // Convert the pixels + ppf->frames.clear(); + for (const auto& frame : io.frames) { + JXL_ASSERT(frame.metadata()->bit_depth.bits_per_sample != 0); + // It is ok for the frame.color().kNumPlanes to not match the + // number of channels on the image. + const uint32_t num_channels = + frame.metadata()->color_encoding.Channels() + has_alpha; + JxlPixelFormat format{/*num_channels=*/num_channels, + /*data_type=*/pixel_format.data_type, + /*endianness=*/pixel_format.endianness, + /*align=*/pixel_format.align}; + + PackedFrame packed_frame(frame.oriented_xsize(), frame.oriented_ysize(), + format); + const size_t bits_per_sample = + float_out ? packed_frame.color.BitsPerChannel(pixel_format.data_type) + : ppf->info.bits_per_sample; + packed_frame.name = frame.name; + packed_frame.frame_info.name_length = frame.name.size(); + // Color transform + ImageBundle ib = frame.Copy(); + const ImageBundle* to_color_transform = &ib; + ImageMetadata metadata = io.metadata.m; + ImageBundle store(&metadata); + const ImageBundle* transformed; + // TODO(firsching): handle the transform here. + JXL_RETURN_IF_ERROR(TransformIfNeeded(*to_color_transform, c_desired, + GetJxlCms(), pool, &store, + &transformed)); + + JXL_RETURN_IF_ERROR(ConvertToExternal( + *transformed, bits_per_sample, float_out, format.num_channels, + format.endianness, + /* stride_out=*/packed_frame.color.stride, pool, + packed_frame.color.pixels(), packed_frame.color.pixels_size, + /*out_callback=*/{}, frame.metadata()->GetOrientation())); + + // TODO(firsching): Convert the extra channels, beside one potential alpha + // channel. FIXME! + JXL_CHECK(frame.extra_channels().size() <= has_alpha); + ppf->frames.push_back(std::move(packed_frame)); + } + + return true; +} +} // namespace extras +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/packed_image_convert.h b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.h new file mode 100644 index 0000000000..100adccc09 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/packed_image_convert.h @@ -0,0 +1,36 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_ +#define LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_ + +// Helper functions to convert from the external image types to the internal +// CodecInOut to help transitioning to the external types. + +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/codec_in_out.h" + +namespace jxl { +namespace extras { + +// Converts an external PackedPixelFile to the internal CodecInOut for use with +// internal functions directly. +Status ConvertPackedPixelFileToCodecInOut(const PackedPixelFile& ppf, + ThreadPool* pool, CodecInOut* io); + +// Converts an internal CodecInOut for use with internal function to an external +// PackedPixelFile. +Status ConvertCodecInOutToPackedPixelFile(const CodecInOut& io, + const JxlPixelFormat& pixel_format, + const ColorEncoding& c_desired, + ThreadPool* pool, + PackedPixelFile* ppf); +} // namespace extras +} // namespace jxl + +#endif // LIB_EXTRAS_PACKED_IMAGE_CONVERT_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/size_constraints.h b/third-party/libjxl/libjxl/lib/extras/size_constraints.h new file mode 100644 index 0000000000..cf06f8cb22 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/size_constraints.h @@ -0,0 +1,43 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_SIZE_CONSTRAINTS_H_ +#define LIB_JXL_SIZE_CONSTRAINTS_H_ + +#include +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { + +struct SizeConstraints { + // Upper limit on pixel dimensions/area, enforced by VerifyDimensions + // (called from decoders). Fuzzers set smaller values to limit memory use. + uint32_t dec_max_xsize = 0xFFFFFFFFu; + uint32_t dec_max_ysize = 0xFFFFFFFFu; + uint64_t dec_max_pixels = 0xFFFFFFFFu; // Might be up to ~0ull +}; + +template ::value>::type> +Status VerifyDimensions(const SizeConstraints* constraints, T xs, T ys) { + if (!constraints) return true; + + if (xs == 0 || ys == 0) return JXL_FAILURE("Empty image."); + if (xs > constraints->dec_max_xsize) return JXL_FAILURE("Image too wide."); + if (ys > constraints->dec_max_ysize) return JXL_FAILURE("Image too tall."); + + const uint64_t num_pixels = static_cast(xs) * ys; + if (num_pixels > constraints->dec_max_pixels) { + return JXL_FAILURE("Image too big."); + } + + return true; +} + +} // namespace jxl + +#endif // LIB_JXL_SIZE_CONSTRAINTS_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/time.cc b/third-party/libjxl/libjxl/lib/extras/time.cc new file mode 100644 index 0000000000..73d1b8f260 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/time.cc @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/time.h" + +#include +#include +#include + +#include + +#include "lib/jxl/base/os_macros.h" // for JXL_OS_* + +#if JXL_OS_WIN +#ifndef NOMINMAX +#define NOMINMAX +#endif // NOMINMAX +#include +#endif // JXL_OS_WIN + +#if JXL_OS_MAC +#include +#include +#endif // JXL_OS_MAC + +#if JXL_OS_HAIKU +#include +#endif // JXL_OS_HAIKU + +namespace jxl { + +double Now() { +#if JXL_OS_WIN + LARGE_INTEGER counter; + (void)QueryPerformanceCounter(&counter); + LARGE_INTEGER freq; + (void)QueryPerformanceFrequency(&freq); + return double(counter.QuadPart) / freq.QuadPart; +#elif JXL_OS_MAC + const auto t = mach_absolute_time(); + // On OSX/iOS platform the elapsed time is cpu time unit + // We have to query the time base information to convert it back + // See https://developer.apple.com/library/mac/qa/qa1398/_index.html + static mach_timebase_info_data_t timebase; + if (timebase.denom == 0) { + (void)mach_timebase_info(&timebase); + } + return double(t) * timebase.numer / timebase.denom * 1E-9; +#elif JXL_OS_HAIKU + return double(system_time_nsecs()) * 1E-9; +#else + timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + return t.tv_sec + t.tv_nsec * 1E-9; +#endif +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/extras/time.h b/third-party/libjxl/libjxl/lib/extras/time.h new file mode 100644 index 0000000000..c71414b877 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/time.h @@ -0,0 +1,19 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_TIME_H_ +#define LIB_EXTRAS_TIME_H_ + +// OS-specific function for timing. + +namespace jxl { + +// Returns current time [seconds] from a monotonic clock with unspecified +// starting point - only suitable for computing elapsed time. +double Now(); + +} // namespace jxl + +#endif // LIB_EXTRAS_TIME_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/tone_mapping.cc b/third-party/libjxl/libjxl/lib/extras/tone_mapping.cc new file mode 100644 index 0000000000..1cdd6ed826 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/tone_mapping.cc @@ -0,0 +1,132 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/tone_mapping.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/extras/tone_mapping.cc" +#include +#include + +#include "lib/jxl/dec_tone_mapping-inl.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/image_bundle.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +static constexpr float rec2020_luminances[3] = {0.2627f, 0.6780f, 0.0593f}; + +Status ToneMapFrame(const std::pair display_nits, + ImageBundle* const ib, ThreadPool* const pool) { + // Perform tone mapping as described in Report ITU-R BT.2390-8, section 5.4 + // (pp. 23-25). + // https://www.itu.int/pub/R-REP-BT.2390-8-2020 + + HWY_FULL(float) df; + using V = decltype(Zero(df)); + + ColorEncoding linear_rec2020; + linear_rec2020.SetColorSpace(ColorSpace::kRGB); + linear_rec2020.primaries = Primaries::k2100; + linear_rec2020.white_point = WhitePoint::kD65; + linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear); + JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC()); + JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool)); + + Rec2408ToneMapper tone_mapper( + {ib->metadata()->tone_mapping.min_nits, + ib->metadata()->IntensityTarget()}, + display_nits, rec2020_luminances); + + return RunOnPool( + pool, 0, ib->ysize(), ThreadPool::NoInit, + [&](const uint32_t y, size_t /* thread */) { + float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y); + float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y); + float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y); + for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) { + V red = Load(df, row_r + x); + V green = Load(df, row_g + x); + V blue = Load(df, row_b + x); + tone_mapper.ToneMap(&red, &green, &blue); + Store(red, df, row_r + x); + Store(green, df, row_g + x); + Store(blue, df, row_b + x); + } + }, + "ToneMap"); +} + +Status GamutMapFrame(ImageBundle* const ib, float preserve_saturation, + ThreadPool* const pool) { + HWY_FULL(float) df; + using V = decltype(Zero(df)); + + ColorEncoding linear_rec2020; + linear_rec2020.SetColorSpace(ColorSpace::kRGB); + linear_rec2020.primaries = Primaries::k2100; + linear_rec2020.white_point = WhitePoint::kD65; + linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear); + JXL_RETURN_IF_ERROR(linear_rec2020.CreateICC()); + JXL_RETURN_IF_ERROR(ib->TransformTo(linear_rec2020, GetJxlCms(), pool)); + + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, ib->ysize(), ThreadPool::NoInit, + [&](const uint32_t y, size_t /* thread*/) { + float* const JXL_RESTRICT row_r = ib->color()->PlaneRow(0, y); + float* const JXL_RESTRICT row_g = ib->color()->PlaneRow(1, y); + float* const JXL_RESTRICT row_b = ib->color()->PlaneRow(2, y); + for (size_t x = 0; x < ib->xsize(); x += Lanes(df)) { + V red = Load(df, row_r + x); + V green = Load(df, row_g + x); + V blue = Load(df, row_b + x); + GamutMap(&red, &green, &blue, rec2020_luminances, + preserve_saturation); + Store(red, df, row_r + x); + Store(green, df, row_g + x); + Store(blue, df, row_b + x); + } + }, + "GamutMap")); + + return true; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +namespace { +HWY_EXPORT(ToneMapFrame); +HWY_EXPORT(GamutMapFrame); +} // namespace + +Status ToneMapTo(const std::pair display_nits, + CodecInOut* const io, ThreadPool* const pool) { + const auto tone_map_frame = HWY_DYNAMIC_DISPATCH(ToneMapFrame); + for (ImageBundle& ib : io->frames) { + JXL_RETURN_IF_ERROR(tone_map_frame(display_nits, &ib, pool)); + } + io->metadata.m.SetIntensityTarget(display_nits.second); + return true; +} + +Status GamutMap(CodecInOut* const io, float preserve_saturation, + ThreadPool* const pool) { + const auto gamut_map_frame = HWY_DYNAMIC_DISPATCH(GamutMapFrame); + for (ImageBundle& ib : io->frames) { + JXL_RETURN_IF_ERROR(gamut_map_frame(&ib, preserve_saturation, pool)); + } + return true; +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/extras/tone_mapping.h b/third-party/libjxl/libjxl/lib/extras/tone_mapping.h new file mode 100644 index 0000000000..1f474101eb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/tone_mapping.h @@ -0,0 +1,30 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_EXTRAS_TONE_MAPPING_H_ +#define LIB_EXTRAS_TONE_MAPPING_H_ + +#include "lib/jxl/codec_in_out.h" + +namespace jxl { + +// Important: after calling this, the result will contain many out-of-gamut +// colors. It is very strongly recommended to call GamutMap afterwards to +// rectify this. +Status ToneMapTo(std::pair display_nits, CodecInOut* io, + ThreadPool* pool = nullptr); + +// `preserve_saturation` indicates to what extent to favor saturation over +// luminance when mapping out-of-gamut colors to Rec. 2020. 0 preserves +// luminance at the complete expense of saturation, while 1 gives the most +// saturated color with the same hue that Rec. 2020 can represent even if it +// means lowering the luminance. Values in between correspond to linear mixtures +// of those two extremes. +Status GamutMap(CodecInOut* io, float preserve_saturation, + ThreadPool* pool = nullptr); + +} // namespace jxl + +#endif // LIB_EXTRAS_TONE_MAPPING_H_ diff --git a/third-party/libjxl/libjxl/lib/extras/tone_mapping_gbench.cc b/third-party/libjxl/libjxl/lib/extras/tone_mapping_gbench.cc new file mode 100644 index 0000000000..720d2ad0a9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/extras/tone_mapping_gbench.cc @@ -0,0 +1,42 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "benchmark/benchmark.h" +#include "lib/extras/codec.h" +#include "lib/extras/tone_mapping.h" +#include "lib/jxl/enc_color_management.h" + +namespace jxl { + +static void BM_ToneMapping(benchmark::State& state) { + Image3F color(2268, 1512); + FillImage(0.5f, &color); + + // Use linear Rec. 2020 so that `ToneMapTo` doesn't have to convert to it and + // we mainly measure the tone mapping itself. + ColorEncoding linear_rec2020; + linear_rec2020.SetColorSpace(ColorSpace::kRGB); + linear_rec2020.primaries = Primaries::k2100; + linear_rec2020.white_point = WhitePoint::kD65; + linear_rec2020.tf.SetTransferFunction(TransferFunction::kLinear); + JXL_CHECK(linear_rec2020.CreateICC()); + + for (auto _ : state) { + state.PauseTiming(); + CodecInOut tone_mapping_input; + Image3F color2(color.xsize(), color.ysize()); + CopyImageTo(color, &color2); + tone_mapping_input.SetFromImage(std::move(color2), linear_rec2020); + tone_mapping_input.metadata.m.SetIntensityTarget(255); + state.ResumeTiming(); + + JXL_CHECK(ToneMapTo({0.1, 100}, &tone_mapping_input)); + } + + state.SetItemsProcessed(state.iterations() * color.xsize() * color.ysize()); +} +BENCHMARK(BM_ToneMapping); + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/gbench_main.cc b/third-party/libjxl/libjxl/lib/gbench_main.cc new file mode 100644 index 0000000000..1cc1772017 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/gbench_main.cc @@ -0,0 +1,8 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "benchmark/benchmark.h" + +BENCHMARK_MAIN(); diff --git a/third-party/libjxl/libjxl/lib/include/jxl/cms_interface.h b/third-party/libjxl/libjxl/lib/include/jxl/cms_interface.h new file mode 100644 index 0000000000..491f373829 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/cms_interface.h @@ -0,0 +1,252 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + * @file cms_interface.h + * @brief Interface to allow the injection of different color management systems + * (CMSes, also called color management modules, or CMMs) in JPEG XL. + * + * A CMS is needed by the JPEG XL encoder and decoder to perform colorspace + * conversions. This defines an interface that can be implemented for different + * CMSes and then passed to the library. + */ + +#ifndef JXL_CMS_INTERFACE_H_ +#define JXL_CMS_INTERFACE_H_ + +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** Parses an ICC profile and populates @p c and @p cmyk with the data. + * + * @param user_data JxlCmsInterface::set_fields_data passed as-is. + * @param icc_data the ICC data to parse. + * @param icc_size how many bytes of icc_data are valid. + * @param c a JxlColorEncoding to populate if applicable. + * @param cmyk a boolean to set to whether the colorspace is a CMYK colorspace. + * @return Whether the relevant fields in @p c were successfully populated. + */ +typedef JXL_BOOL (*jpegxl_cms_set_fields_from_icc_func)(void* user_data, + const uint8_t* icc_data, + size_t icc_size, + JxlColorEncoding* c, + JXL_BOOL* cmyk); + +/** Represents an input or output colorspace to a color transform, as a + * serialized ICC profile. */ +typedef struct { + /** The serialized ICC profile. This is guaranteed to be present and valid. */ + struct { + const uint8_t* data; + size_t size; + } icc; + + /** Structured representation of the colorspace, if applicable. If all fields + * are different from their "unknown" value, then this is equivalent to the + * ICC representation of the colorspace. If some are "unknown", those that are + * not are still valid and can still be used on their own if they are useful. + */ + JxlColorEncoding color_encoding; + + /** Number of components per pixel. This can be deduced from the other + * representations of the colorspace but is provided for convenience and + * validation. */ + size_t num_channels; +} JxlColorProfile; + +/** Allocates and returns the data needed for @p num_threads parallel transforms + * from the @p input colorspace to @p output, with up to @p pixels_per_thread + * pixels to transform per call to JxlCmsInterface::run. @p init_data comes + * directly from the JxlCmsInterface instance. Since @c run only receives the + * data returned by @c init, a reference to @p init_data should be kept there + * if access to it is desired in @c run. Likewise for JxlCmsInterface::destroy. + * + * The ICC data in @p input and @p output is guaranteed to outlive the @c init / + * @c run / @c destroy cycle. + * + * @param init_data JxlCmsInterface::init_data passed as-is. + * @param num_threads the maximum number of threads from which + * JxlCmsInterface::run will be called. + * @param pixels_per_thread the maximum number of pixels that each call to + * JxlCmsInterface::run will have to transform. + * @param input_profile the input colorspace for the transform. + * @param output_profile the colorspace to which JxlCmsInterface::run should + * convert the input data. + * @param intensity_target for colorspaces where luminance is relative + * (essentially: not PQ), indicates the luminance at which (1, 1, 1) will + * be displayed. This is useful for conversions between PQ and a relative + * luminance colorspace, in either direction: @p intensity_target cd/m² + * in PQ should map to and from (1, 1, 1) in the relative one.\n + * It is also used for conversions to and from HLG, as it is + * scene-referred while other colorspaces are assumed to be + * display-referred. That is, conversions from HLG should apply the OOTF + * for a peak display luminance of @p intensity_target, and conversions + * to HLG should undo it. The OOTF is a gamma function applied to the + * luminance channel (https://www.itu.int/rec/R-REC-BT.2100-2-201807-I + * page 7), with the gamma value computed as + * 1.2 * 1.111^log2(intensity_target / 1000) (footnote 2 page 8 + * of the same document). + * @return The data needed for the transform, or @c NULL in case of failure. + * This will be passed to the other functions as @c user_data. + */ +typedef void* (*jpegxl_cms_init_func)(void* init_data, size_t num_threads, + size_t pixels_per_thread, + const JxlColorProfile* input_profile, + const JxlColorProfile* output_profile, + float intensity_target); + +/** Returns a buffer that can be used by callers of the interface to store the + * input of the conversion or read its result, if they pass it as the input or + * output of the @c run function. + * @param user_data the data returned by @c init. + * @param thread the index of the thread for which to return a buffer. + * @return A buffer that can be used by the caller for passing to @c run. + */ +typedef float* (*jpegxl_cms_get_buffer_func)(void* user_data, size_t thread); + +/** Executes one transform and returns true on success or false on error. It + * must be possible to call this from different threads with different values + * for @p thread, all between 0 (inclusive) and the value of @p num_threads + * passed to @c init (exclusive). It is allowed to implement this by locking + * such that the transforms are essentially performed sequentially, if such a + * performance profile is acceptable. @p user_data is the data returned by + * @c init. + * The buffers each contain @p num_pixels × @c num_channels interleaved floating + * point (0..1) samples where @c num_channels is the number of color channels of + * their respective color profiles. It is guaranteed that the only case in which + * they might overlap is if the output has fewer channels than the input, in + * which case the pointers may be identical. + * For CMYK data, 0 represents the maximum amount of ink while 1 represents no + * ink. + * @param user_data the data returned by @c init. + * @param thread the index of the thread from which the function is being + * called. + * @param input_buffer the buffer containing the pixel data to be transformed. + * @param output_buffer the buffer receiving the transformed pixel data. + * @param num_pixels the number of pixels to transform from @p input to + * @p output. + * @return JXL_TRUE on success, JXL_FALSE on failure. + */ +typedef JXL_BOOL (*jpegxl_cms_run_func)(void* user_data, size_t thread, + const float* input_buffer, + float* output_buffer, + size_t num_pixels); + +/** Performs the necessary clean-up and frees the memory allocated for user + * data. + */ +typedef void (*jpegxl_cms_destroy_func)(void*); + +/** + * Interface for performing colorspace transforms. The @c init function can be + * called several times to instantiate several transforms, including before + * other transforms have been destroyed. + * + * The call sequence for a given colorspace transform could look like the + * following: + * @dot + * digraph calls { + * newrank = true + * node [shape = box, fontname = monospace] + * init [label = "user_data <- init(\l\ + * init_data = data,\l\ + * num_threads = 3,\l\ + * pixels_per_thread = 20,\l\ + * input = (sRGB, 3 channels),\l\ + * output = (Display-P3, 3 channels),\l\ + * intensity_target = 255\l\ + * )\l"] + * subgraph cluster_0 { + * color = lightgrey + * label = "thread 1" + * labeljust = "c" + * run_1_1 [label = "run(\l\ + * user_data,\l\ + * thread = 1,\l\ + * input = in[0],\l\ + * output = out[0],\l\ + * num_pixels = 20\l\ + * )\l"] + * run_1_2 [label = "run(\l\ + * user_data,\l\ + * thread = 1,\l\ + * input = in[3],\l\ + * output = out[3],\l\ + * num_pixels = 20\l\ + * )\l"] + * } + * subgraph cluster_1 { + * color = lightgrey + * label = "thread 2" + * labeljust = "l" + * run_2_1 [label = "run(\l\ + * user_data,\l\ + * thread = 2,\l\ + * input = in[1],\l\ + * output = out[1],\l\ + * num_pixels = 20\l\ + * )\l"] + * run_2_2 [label = "run(\l\ + * user_data,\l\ + * thread = 2,\l\ + * input = in[4],\l\ + * output = out[4],\l\ + * num_pixels = 13\l\ + * )\l"] + * } + * subgraph cluster_3 { + * color = lightgrey + * label = "thread 3" + * labeljust = "c" + * run_3_1 [label = "run(\l\ + * user_data,\l\ + * thread = 3,\l\ + * input = in[2],\l\ + * output = out[2],\l\ + * num_pixels = 20\l\ + * )\l"] + * } + * init -> {run_1_1; run_2_1; run_3_1; rank = same} + * run_1_1 -> run_1_2 + * run_2_1 -> run_2_2 + * {run_1_2; run_2_2, run_3_1} -> "destroy(user_data)" + * } + * @enddot + */ +typedef struct { + /** CMS-specific data that will be passed to @ref set_fields_from_icc. */ + void* set_fields_data; + /** Populates a JxlColorEncoding from an ICC profile. */ + jpegxl_cms_set_fields_from_icc_func set_fields_from_icc; + + /** CMS-specific data that will be passed to @ref init. */ + void* init_data; + /** Prepares a colorspace transform as described in the documentation of @ref + * jpegxl_cms_init_func. */ + jpegxl_cms_init_func init; + /** Returns a buffer that can be used as input to @c run. */ + jpegxl_cms_get_buffer_func get_src_buf; + /** Returns a buffer that can be used as output from @c run. */ + jpegxl_cms_get_buffer_func get_dst_buf; + /** Executes the transform on a batch of pixels, per @ref jpegxl_cms_run_func. + */ + jpegxl_cms_run_func run; + /** Cleans up the transform. */ + jpegxl_cms_destroy_func destroy; +} JxlCmsInterface; + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_CMS_INTERFACE_H_ */ + +/** @} */ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/codestream_header.h b/third-party/libjxl/libjxl/lib/include/jxl/codestream_header.h new file mode 100644 index 0000000000..66dd7df4ce --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/codestream_header.h @@ -0,0 +1,430 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + * @file codestream_header.h + * @brief Definitions of structs and enums for the metadata from the JPEG XL + * codestream headers (signature, metadata, preview dimensions, ...), excluding + * color encoding which is in color_encoding.h. + */ + +#ifndef JXL_CODESTREAM_HEADER_H_ +#define JXL_CODESTREAM_HEADER_H_ + +#include +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** Image orientation metadata. + * Values 1..8 match the EXIF definitions. + * The name indicates the operation to perform to transform from the encoded + * image to the display image. + */ +typedef enum { + JXL_ORIENT_IDENTITY = 1, + JXL_ORIENT_FLIP_HORIZONTAL = 2, + JXL_ORIENT_ROTATE_180 = 3, + JXL_ORIENT_FLIP_VERTICAL = 4, + JXL_ORIENT_TRANSPOSE = 5, + JXL_ORIENT_ROTATE_90_CW = 6, + JXL_ORIENT_ANTI_TRANSPOSE = 7, + JXL_ORIENT_ROTATE_90_CCW = 8, +} JxlOrientation; + +/** Given type of an extra channel. + */ +typedef enum { + JXL_CHANNEL_ALPHA, + JXL_CHANNEL_DEPTH, + JXL_CHANNEL_SPOT_COLOR, + JXL_CHANNEL_SELECTION_MASK, + JXL_CHANNEL_BLACK, + JXL_CHANNEL_CFA, + JXL_CHANNEL_THERMAL, + JXL_CHANNEL_RESERVED0, + JXL_CHANNEL_RESERVED1, + JXL_CHANNEL_RESERVED2, + JXL_CHANNEL_RESERVED3, + JXL_CHANNEL_RESERVED4, + JXL_CHANNEL_RESERVED5, + JXL_CHANNEL_RESERVED6, + JXL_CHANNEL_RESERVED7, + JXL_CHANNEL_UNKNOWN, + JXL_CHANNEL_OPTIONAL +} JxlExtraChannelType; + +/** The codestream preview header */ +typedef struct { + /** Preview width in pixels */ + uint32_t xsize; + + /** Preview height in pixels */ + uint32_t ysize; +} JxlPreviewHeader; + +/** The codestream animation header, optionally present in the beginning of + * the codestream, and if it is it applies to all animation frames, unlike + * JxlFrameHeader which applies to an individual frame. + */ +typedef struct { + /** Numerator of ticks per second of a single animation frame time unit */ + uint32_t tps_numerator; + + /** Denominator of ticks per second of a single animation frame time unit */ + uint32_t tps_denominator; + + /** Amount of animation loops, or 0 to repeat infinitely */ + uint32_t num_loops; + + /** Whether animation time codes are present at animation frames in the + * codestream */ + JXL_BOOL have_timecodes; +} JxlAnimationHeader; + +/** Basic image information. This information is available from the file + * signature and first part of the codestream header. + */ +typedef struct { + /* TODO(lode): need additional fields for (transcoded) JPEG? For reusable + * fields orientation must be read from Exif APP1. For has_icc_profile: must + * look up where ICC profile is guaranteed to be in a JPEG file to be able to + * indicate this. */ + + /* TODO(lode): make struct packed, and/or make this opaque struct with getter + * functions (still separate struct from opaque decoder) */ + + /** Whether the codestream is embedded in the container format. If true, + * metadata information and extensions may be available in addition to the + * codestream. + */ + JXL_BOOL have_container; + + /** Width of the image in pixels, before applying orientation. + */ + uint32_t xsize; + + /** Height of the image in pixels, before applying orientation. + */ + uint32_t ysize; + + /** Original image color channel bit depth. + */ + uint32_t bits_per_sample; + + /** Original image color channel floating point exponent bits, or 0 if they + * are unsigned integer. For example, if the original data is half-precision + * (binary16) floating point, bits_per_sample is 16 and + * exponent_bits_per_sample is 5, and so on for other floating point + * precisions. + */ + uint32_t exponent_bits_per_sample; + + /** Upper bound on the intensity level present in the image in nits. For + * unsigned integer pixel encodings, this is the brightness of the largest + * representable value. The image does not necessarily contain a pixel + * actually this bright. An encoder is allowed to set 255 for SDR images + * without computing a histogram. + * Leaving this set to its default of 0 lets libjxl choose a sensible default + * value based on the color encoding. + */ + float intensity_target; + + /** Lower bound on the intensity level present in the image. This may be + * loose, i.e. lower than the actual darkest pixel. When tone mapping, a + * decoder will map [min_nits, intensity_target] to the display range. + */ + float min_nits; + + /** See the description of @see linear_below. + */ + JXL_BOOL relative_to_max_display; + + /** The tone mapping will leave unchanged (linear mapping) any pixels whose + * brightness is strictly below this. The interpretation depends on + * relative_to_max_display. If true, this is a ratio [0, 1] of the maximum + * display brightness [nits], otherwise an absolute brightness [nits]. + */ + float linear_below; + + /** Whether the data in the codestream is encoded in the original color + * profile that is attached to the codestream metadata header, or is + * encoded in an internally supported absolute color space (which the decoder + * can always convert to linear or non-linear sRGB or to XYB). If the original + * profile is used, the decoder outputs pixel data in the color space matching + * that profile, but doesn't convert it to any other color space. If the + * original profile is not used, the decoder only outputs the data as sRGB + * (linear if outputting to floating point, nonlinear with standard sRGB + * transfer function if outputting to unsigned integers) but will not convert + * it to to the original color profile. The decoder also does not convert to + * the target display color profile. To convert the pixel data produced by + * the decoder to the original color profile, one of the JxlDecoderGetColor* + * functions needs to be called with @ref JXL_COLOR_PROFILE_TARGET_DATA to get + * the color profile of the decoder output, and then an external CMS can be + * used for conversion. + * Note that for lossy compression, this should be set to false for most use + * cases, and if needed, the image should be converted to the original color + * profile after decoding, as described above. + */ + JXL_BOOL uses_original_profile; + + /** Indicates a preview image exists near the beginning of the codestream. + * The preview itself or its dimensions are not included in the basic info. + */ + JXL_BOOL have_preview; + + /** Indicates animation frames exist in the codestream. The animation + * information is not included in the basic info. + */ + JXL_BOOL have_animation; + + /** Image orientation, value 1-8 matching the values used by JEITA CP-3451C + * (Exif version 2.3). + */ + JxlOrientation orientation; + + /** Number of color channels encoded in the image, this is either 1 for + * grayscale data, or 3 for colored data. This count does not include + * the alpha channel or other extra channels. To check presence of an alpha + * channel, such as in the case of RGBA color, check alpha_bits != 0. + * If and only if this is 1, the JxlColorSpace in the JxlColorEncoding is + * JXL_COLOR_SPACE_GRAY. + */ + uint32_t num_color_channels; + + /** Number of additional image channels. This includes the main alpha channel, + * but can also include additional channels such as depth, additional alpha + * channels, spot colors, and so on. Information about the extra channels + * can be queried with JxlDecoderGetExtraChannelInfo. The main alpha channel, + * if it exists, also has its information available in the alpha_bits, + * alpha_exponent_bits and alpha_premultiplied fields in this JxlBasicInfo. + */ + uint32_t num_extra_channels; + + /** Bit depth of the encoded alpha channel, or 0 if there is no alpha channel. + * If present, matches the alpha_bits value of the JxlExtraChannelInfo + * associated with this alpha channel. + */ + uint32_t alpha_bits; + + /** Alpha channel floating point exponent bits, or 0 if they are unsigned. If + * present, matches the alpha_bits value of the JxlExtraChannelInfo associated + * with this alpha channel. integer. + */ + uint32_t alpha_exponent_bits; + + /** Whether the alpha channel is premultiplied. Only used if there is a main + * alpha channel. Matches the alpha_premultiplied value of the + * JxlExtraChannelInfo associated with this alpha channel. + */ + JXL_BOOL alpha_premultiplied; + + /** Dimensions of encoded preview image, only used if have_preview is + * JXL_TRUE. + */ + JxlPreviewHeader preview; + + /** Animation header with global animation properties for all frames, only + * used if have_animation is JXL_TRUE. + */ + JxlAnimationHeader animation; + + /** Intrinsic width of the image. + * The intrinsic size can be different from the actual size in pixels + * (as given by xsize and ysize) and it denotes the recommended dimensions + * for displaying the image, i.e. applications are advised to resample the + * decoded image to the intrinsic dimensions. + */ + uint32_t intrinsic_xsize; + + /** Intrinsic height of the image. + * The intrinsic size can be different from the actual size in pixels + * (as given by xsize and ysize) and it denotes the recommended dimensions + * for displaying the image, i.e. applications are advised to resample the + * decoded image to the intrinsic dimensions. + */ + uint32_t intrinsic_ysize; + + /** Padding for forwards-compatibility, in case more fields are exposed + * in a future version of the library. + */ + uint8_t padding[100]; +} JxlBasicInfo; + +/** Information for a single extra channel. + */ +typedef struct { + /** Given type of an extra channel. + */ + JxlExtraChannelType type; + + /** Total bits per sample for this channel. + */ + uint32_t bits_per_sample; + + /** Floating point exponent bits per channel, or 0 if they are unsigned + * integer. + */ + uint32_t exponent_bits_per_sample; + + /** The exponent the channel is downsampled by on each axis. + * TODO(lode): expand this comment to match the JPEG XL specification, + * specify how to upscale, how to round the size computation, and to which + * extra channels this field applies. + */ + uint32_t dim_shift; + + /** Length of the extra channel name in bytes, or 0 if no name. + * Excludes null termination character. + */ + uint32_t name_length; + + /** Whether alpha channel uses premultiplied alpha. Only applicable if + * type is JXL_CHANNEL_ALPHA. + */ + JXL_BOOL alpha_premultiplied; + + /** Spot color of the current spot channel in linear RGBA. Only applicable if + * type is JXL_CHANNEL_SPOT_COLOR. + */ + float spot_color[4]; + + /** Only applicable if type is JXL_CHANNEL_CFA. + * TODO(lode): add comment about the meaning of this field. + */ + uint32_t cfa_channel; +} JxlExtraChannelInfo; + +/* TODO(lode): add API to get the codestream header extensions. */ +/** Extensions in the codestream header. */ +typedef struct { + /** Extension bits. */ + uint64_t extensions; +} JxlHeaderExtensions; + +/** Frame blend modes. + * When decoding, if coalescing is enabled (default), this can be ignored. + */ +typedef enum { + JXL_BLEND_REPLACE = 0, + JXL_BLEND_ADD = 1, + JXL_BLEND_BLEND = 2, + JXL_BLEND_MULADD = 3, + JXL_BLEND_MUL = 4, +} JxlBlendMode; + +/** The information about blending the color channels or a single extra channel. + * When decoding, if coalescing is enabled (default), this can be ignored and + * the blend mode is considered to be JXL_BLEND_REPLACE. + * When encoding, these settings apply to the pixel data given to the encoder. + */ +typedef struct { + /** Blend mode. + */ + JxlBlendMode blendmode; + /** Reference frame ID to use as the 'bottom' layer (0-3). + */ + uint32_t source; + /** Which extra channel to use as the 'alpha' channel for blend modes + * JXL_BLEND_BLEND and JXL_BLEND_MULADD. + */ + uint32_t alpha; + /** Clamp values to [0,1] for the purpose of blending. + */ + JXL_BOOL clamp; +} JxlBlendInfo; + +/** The information about layers. + * When decoding, if coalescing is enabled (default), this can be ignored. + * When encoding, these settings apply to the pixel data given to the encoder, + * the encoder could choose an internal representation that differs. + */ +typedef struct { + /** Whether cropping is applied for this frame. When decoding, if false, + * crop_x0 and crop_y0 are set to zero, and xsize and ysize to the main + * image dimensions. When encoding and this is false, those fields are + * ignored. When decoding, if coalescing is enabled (default), this is always + * false, regardless of the internal encoding in the JPEG XL codestream. + */ + JXL_BOOL have_crop; + + /** Horizontal offset of the frame (can be negative). + */ + int32_t crop_x0; + + /** Vertical offset of the frame (can be negative). + */ + int32_t crop_y0; + + /** Width of the frame (number of columns). + */ + uint32_t xsize; + + /** Height of the frame (number of rows). + */ + uint32_t ysize; + + /** The blending info for the color channels. Blending info for extra channels + * has to be retrieved separately using JxlDecoderGetExtraChannelBlendInfo. + */ + JxlBlendInfo blend_info; + + /** After blending, save the frame as reference frame with this ID (0-3). + * Special case: if the frame duration is nonzero, ID 0 means "will not be + * referenced in the future". This value is not used for the last frame. + * When encoding, ID 3 is reserved to frames that are generated internally by + * the encoder, and should not be used by applications. + */ + uint32_t save_as_reference; +} JxlLayerInfo; + +/** The header of one displayed frame or non-coalesced layer. */ +typedef struct { + /** How long to wait after rendering in ticks. The duration in seconds of a + * tick is given by tps_numerator and tps_denominator in JxlAnimationHeader. + */ + uint32_t duration; + + /** SMPTE timecode of the current frame in form 0xHHMMSSFF, or 0. The bits are + * interpreted from most-significant to least-significant as hour, minute, + * second, and frame. If timecode is nonzero, it is strictly larger than that + * of a previous frame with nonzero duration. These values are only available + * if have_timecodes in JxlAnimationHeader is JXL_TRUE. + * This value is only used if have_timecodes in JxlAnimationHeader is + * JXL_TRUE. + */ + uint32_t timecode; + + /** Length of the frame name in bytes, or 0 if no name. + * Excludes null termination character. This value is set by the decoder. + * For the encoder, this value is ignored and @ref JxlEncoderSetFrameName is + * used instead to set the name and the length. + */ + uint32_t name_length; + + /** Indicates this is the last animation frame. This value is set by the + * decoder to indicate no further frames follow. For the encoder, it is not + * required to set this value and it is ignored, @ref JxlEncoderCloseFrames is + * used to indicate the last frame to the encoder instead. + */ + JXL_BOOL is_last; + + /** Information about the layer in case of no coalescing. + */ + JxlLayerInfo layer_info; +} JxlFrameHeader; + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_CODESTREAM_HEADER_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/color_encoding.h b/third-party/libjxl/libjxl/lib/include/jxl/color_encoding.h new file mode 100644 index 0000000000..b16f6a01ee --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/color_encoding.h @@ -0,0 +1,162 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + * @file color_encoding.h + * @brief Color Encoding definitions used by JPEG XL. + * All CIE units are for the standard 1931 2 degree observer. + */ + +#ifndef JXL_COLOR_ENCODING_H_ +#define JXL_COLOR_ENCODING_H_ + +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** Color space of the image data. */ +typedef enum { + /** Tristimulus RGB */ + JXL_COLOR_SPACE_RGB, + /** Luminance based, the primaries in JxlColorEncoding must be ignored. This + * value implies that num_color_channels in JxlBasicInfo is 1, any other value + * implies num_color_channels is 3. */ + JXL_COLOR_SPACE_GRAY, + /** XYB (opsin) color space */ + JXL_COLOR_SPACE_XYB, + /** None of the other table entries describe the color space appropriately */ + JXL_COLOR_SPACE_UNKNOWN, +} JxlColorSpace; + +/** Built-in whitepoints for color encoding. When decoding, the numerical xy + * whitepoint value can be read from the JxlColorEncoding white_point field + * regardless of the enum value. When encoding, enum values except + * JXL_WHITE_POINT_CUSTOM override the numerical fields. Some enum values match + * a subset of CICP (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)), however the + * white point and RGB primaries are separate enums here. + */ +typedef enum { + /** CIE Standard Illuminant D65: 0.3127, 0.3290 */ + JXL_WHITE_POINT_D65 = 1, + /** White point must be read from the JxlColorEncoding white_point field, or + * as ICC profile. This enum value is not an exact match of the corresponding + * CICP value. */ + JXL_WHITE_POINT_CUSTOM = 2, + /** CIE Standard Illuminant E (equal-energy): 1/3, 1/3 */ + JXL_WHITE_POINT_E = 10, + /** DCI-P3 from SMPTE RP 431-2: 0.314, 0.351 */ + JXL_WHITE_POINT_DCI = 11, +} JxlWhitePoint; + +/** Built-in primaries for color encoding. When decoding, the primaries can be + * read from the JxlColorEncoding primaries_red_xy, primaries_green_xy and + * primaries_blue_xy fields regardless of the enum value. When encoding, the + * enum values except JXL_PRIMARIES_CUSTOM override the numerical fields. Some + * enum values match a subset of CICP (Rec. ITU-T H.273 | ISO/IEC + * 23091-2:2019(E)), however the white point and RGB primaries are separate + * enums here. + */ +typedef enum { + /** The CIE xy values of the red, green and blue primaries are: 0.639998686, + 0.330010138; 0.300003784, 0.600003357; 0.150002046, 0.059997204 */ + JXL_PRIMARIES_SRGB = 1, + /** Primaries must be read from the JxlColorEncoding primaries_red_xy, + * primaries_green_xy and primaries_blue_xy fields, or as ICC profile. This + * enum value is not an exact match of the corresponding CICP value. */ + JXL_PRIMARIES_CUSTOM = 2, + /** As specified in Rec. ITU-R BT.2100-1 */ + JXL_PRIMARIES_2100 = 9, + /** As specified in SMPTE RP 431-2 */ + JXL_PRIMARIES_P3 = 11, +} JxlPrimaries; + +/** Built-in transfer functions for color encoding. Enum values match a subset + * of CICP (Rec. ITU-T H.273 | ISO/IEC 23091-2:2019(E)) unless specified + * otherwise. */ +typedef enum { + /** As specified in SMPTE RP 431-2 */ + JXL_TRANSFER_FUNCTION_709 = 1, + /** None of the other table entries describe the transfer function. */ + JXL_TRANSFER_FUNCTION_UNKNOWN = 2, + /** The gamma exponent is 1 */ + JXL_TRANSFER_FUNCTION_LINEAR = 8, + /** As specified in IEC 61966-2-1 sRGB */ + JXL_TRANSFER_FUNCTION_SRGB = 13, + /** As specified in SMPTE ST 2084 */ + JXL_TRANSFER_FUNCTION_PQ = 16, + /** As specified in SMPTE ST 428-1 */ + JXL_TRANSFER_FUNCTION_DCI = 17, + /** As specified in Rec. ITU-R BT.2100-1 (HLG) */ + JXL_TRANSFER_FUNCTION_HLG = 18, + /** Transfer function follows power law given by the gamma value in + JxlColorEncoding. Not a CICP value. */ + JXL_TRANSFER_FUNCTION_GAMMA = 65535, +} JxlTransferFunction; + +/** Renderig intent for color encoding, as specified in ISO 15076-1:2010 */ +typedef enum { + /** vendor-specific */ + JXL_RENDERING_INTENT_PERCEPTUAL = 0, + /** media-relative */ + JXL_RENDERING_INTENT_RELATIVE, + /** vendor-specific */ + JXL_RENDERING_INTENT_SATURATION, + /** ICC-absolute */ + JXL_RENDERING_INTENT_ABSOLUTE, +} JxlRenderingIntent; + +/** Color encoding of the image as structured information. + */ +typedef struct { + /** Color space of the image data. + */ + JxlColorSpace color_space; + + /** Built-in white point. If this value is JXL_WHITE_POINT_CUSTOM, must + * use the numerical whitepoint values from white_point_xy. + */ + JxlWhitePoint white_point; + + /** Numerical whitepoint values in CIE xy space. */ + double white_point_xy[2]; + + /** Built-in RGB primaries. If this value is JXL_PRIMARIES_CUSTOM, must + * use the numerical primaries values below. This field and the custom values + * below are unused and must be ignored if the color space is + * JXL_COLOR_SPACE_GRAY or JXL_COLOR_SPACE_XYB. + */ + JxlPrimaries primaries; + + /** Numerical red primary values in CIE xy space. */ + double primaries_red_xy[2]; + + /** Numerical green primary values in CIE xy space. */ + double primaries_green_xy[2]; + + /** Numerical blue primary values in CIE xy space. */ + double primaries_blue_xy[2]; + + /** Transfer function if have_gamma is 0 */ + JxlTransferFunction transfer_function; + + /** Gamma value used when transfer_function is JXL_TRANSFER_FUNCTION_GAMMA + */ + double gamma; + + /** Rendering intent defined for the color profile. */ + JxlRenderingIntent rendering_intent; +} JxlColorEncoding; + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_COLOR_ENCODING_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/decode.h b/third-party/libjxl/libjxl/lib/include/jxl/decode.h new file mode 100644 index 0000000000..5922728b07 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/decode.h @@ -0,0 +1,1415 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_decoder + * @{ + * @file decode.h + * @brief Decoding API for JPEG XL. + */ + +#ifndef JXL_DECODE_H_ +#define JXL_DECODE_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** + * Decoder library version. + * + * @return the decoder library version as an integer: + * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example, + * version 1.2.3 would return 1002003. + */ +JXL_EXPORT uint32_t JxlDecoderVersion(void); + +/** The result of @ref JxlSignatureCheck. + */ +typedef enum { + /** Not enough bytes were passed to determine if a valid signature was found. + */ + JXL_SIG_NOT_ENOUGH_BYTES = 0, + + /** No valid JPEG XL header was found. */ + JXL_SIG_INVALID = 1, + + /** A valid JPEG XL codestream signature was found, that is a JPEG XL image + * without container. + */ + JXL_SIG_CODESTREAM = 2, + + /** A valid container signature was found, that is a JPEG XL image embedded + * in a box format container. + */ + JXL_SIG_CONTAINER = 3, +} JxlSignature; + +/** + * JPEG XL signature identification. + * + * Checks if the passed buffer contains a valid JPEG XL signature. The passed @p + * buf of size + * @p size doesn't need to be a full image, only the beginning of the file. + * + * @return a flag indicating if a JPEG XL signature was found and what type. + * - @ref JXL_SIG_NOT_ENOUGH_BYTES if not enough bytes were passed to + * determine if a valid signature is there. + * - @ref JXL_SIG_INVALID if no valid signature found for JPEG XL decoding. + * - @ref JXL_SIG_CODESTREAM if a valid JPEG XL codestream signature was + * found. + * - @ref JXL_SIG_CONTAINER if a valid JPEG XL container signature was found. + */ +JXL_EXPORT JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len); + +/** + * Opaque structure that holds the JPEG XL decoder. + * + * Allocated and initialized with @ref JxlDecoderCreate(). + * Cleaned up and deallocated with @ref JxlDecoderDestroy(). + */ +typedef struct JxlDecoderStruct JxlDecoder; + +/** + * Creates an instance of @ref JxlDecoder and initializes it. + * + * @p memory_manager will be used for all the library dynamic allocations made + * from this instance. The parameter may be NULL, in which case the default + * allocator will be used. See jxl/memory_manager.h for details. + * + * @param memory_manager custom allocator function. It may be NULL. The memory + * manager will be copied internally. + * @return @c NULL if the instance can not be allocated or initialized + * @return pointer to initialized @ref JxlDecoder otherwise + */ +JXL_EXPORT JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager); + +/** + * Re-initializes a @ref JxlDecoder instance, so it can be re-used for decoding + * another image. All state and settings are reset as if the object was + * newly created with @ref JxlDecoderCreate, but the memory manager is kept. + * + * @param dec instance to be re-initialized. + */ +JXL_EXPORT void JxlDecoderReset(JxlDecoder* dec); + +/** + * Deinitializes and frees @ref JxlDecoder instance. + * + * @param dec instance to be cleaned up and deallocated. + */ +JXL_EXPORT void JxlDecoderDestroy(JxlDecoder* dec); + +/** + * Return value for @ref JxlDecoderProcessInput. + * The values from @ref JXL_DEC_BASIC_INFO onwards are optional informative + * events that can be subscribed to, they are never returned if they + * have not been registered with @ref JxlDecoderSubscribeEvents. + */ +typedef enum { + /** Function call finished successfully, or decoding is finished and there is + * nothing more to be done. + * + * Note that @ref JxlDecoderProcessInput will return JXL_DEC_SUCCESS if all + * events that were registered with @ref JxlDecoderSubscribeEvents were + * processed, even before the end of the JPEG XL codestream. + * + * In this case, the return value @ref JxlDecoderReleaseInput will be the same + * as it was at the last signaled event. E.g. if JXL_DEC_FULL_IMAGE was + * subscribed to, then all bytes from the end of the JPEG XL codestream + * (including possible boxes needed for jpeg reconstruction) will be returned + * as unprocessed. + */ + JXL_DEC_SUCCESS = 0, + + /** An error occurred, for example invalid input file or out of memory. + * TODO(lode): add function to get error information from decoder. + */ + JXL_DEC_ERROR = 1, + + /** The decoder needs more input bytes to continue. Before the next @ref + * JxlDecoderProcessInput call, more input data must be set, by calling @ref + * JxlDecoderReleaseInput (if input was set previously) and then calling @ref + * JxlDecoderSetInput. @ref JxlDecoderReleaseInput returns how many bytes + * are not yet processed, before a next call to @ref JxlDecoderProcessInput + * all unprocessed bytes must be provided again (the address need not match, + * but the contents must), and more bytes must be concatenated after the + * unprocessed bytes. + * In most cases, @ref JxlDecoderReleaseInput will return no unprocessed bytes + * at this event, the only exceptions are if the previously set input ended + * within (a) the raw codestream signature, (b) the signature box, (c) a box + * header, or (d) the first 4 bytes of a brob, ftyp, or jxlp box. In any of + * these cases the number of unprocessed bytes is less than 20. + */ + JXL_DEC_NEED_MORE_INPUT = 2, + + /** The decoder is able to decode a preview image and requests setting a + * preview output buffer using @ref JxlDecoderSetPreviewOutBuffer. This occurs + * if @ref JXL_DEC_PREVIEW_IMAGE is requested and it is possible to decode a + * preview image from the codestream and the preview out buffer was not yet + * set. There is maximum one preview image in a codestream. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the frame header (including ToC) of the preview frame as + * unprocessed. + */ + JXL_DEC_NEED_PREVIEW_OUT_BUFFER = 3, + + /** The decoder requests an output buffer to store the full resolution image, + * which can be set with @ref JxlDecoderSetImageOutBuffer or with @ref + * JxlDecoderSetImageOutCallback. This event re-occurs for new frames if + * there are multiple animation frames and requires setting an output again. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the frame header (including ToC) as unprocessed. + */ + JXL_DEC_NEED_IMAGE_OUT_BUFFER = 5, + + /** The JPEG reconstruction buffer is too small for reconstructed JPEG + * codestream to fit. @ref JxlDecoderSetJPEGBuffer must be called again to + * make room for remaining bytes. This event may occur multiple times + * after @ref JXL_DEC_JPEG_RECONSTRUCTION. + */ + JXL_DEC_JPEG_NEED_MORE_OUTPUT = 6, + + /** The box contents output buffer is too small. @ref JxlDecoderSetBoxBuffer + * must be called again to make room for remaining bytes. This event may occur + * multiple times after @ref JXL_DEC_BOX. + */ + JXL_DEC_BOX_NEED_MORE_OUTPUT = 7, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": Basic information such as image dimensions and + * extra channels. This event occurs max once per image. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the basic info as unprocessed (including the last byte of basic info + * if it did not end on a byte boundary). + */ + JXL_DEC_BASIC_INFO = 0x40, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": Color encoding or ICC profile from the + * codestream header. This event occurs max once per image and always later + * than @ref JXL_DEC_BASIC_INFO and earlier than any pixel data. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the image header (which is the start of the first frame) as + * unprocessed. + */ + JXL_DEC_COLOR_ENCODING = 0x100, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": Preview image, a small frame, decoded. This + * event can only happen if the image has a preview frame encoded. This event + * occurs max once for the codestream and always later than @ref + * JXL_DEC_COLOR_ENCODING and before @ref JXL_DEC_FRAME. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the preview frame as unprocessed. + */ + JXL_DEC_PREVIEW_IMAGE = 0x200, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": Beginning of a frame. @ref + * JxlDecoderGetFrameHeader can be used at this point. A note on frames: + * a JPEG XL image can have internal frames that are not intended to be + * displayed (e.g. used for compositing a final frame), but this only returns + * displayed frames, unless @ref JxlDecoderSetCoalescing was set to JXL_FALSE: + * in that case, the individual layers are returned, without blending. Note + * that even when coalescing is disabled, only frames of type kRegularFrame + * are returned; frames of type kReferenceOnly and kLfFrame are always for + * internal purposes only and cannot be accessed. A displayed frame either has + * an animation duration or is the only or last frame in the image. This event + * occurs max once per displayed frame, always later than @ref + * JXL_DEC_COLOR_ENCODING, and always earlier than any pixel data. While + * JPEG XL supports encoding a single frame as the composition of multiple + * internal sub-frames also called frames, this event is not indicated for the + * internal frames. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the frame header (including ToC) as unprocessed. + */ + JXL_DEC_FRAME = 0x400, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": full frame (or layer, in case coalescing is + * disabled) is decoded. @ref JxlDecoderSetImageOutBuffer must be used after + * getting the basic image information to be able to get the image pixels, if + * not this return status only indicates we're past this point in the + * codestream. This event occurs max once per frame. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the frame (or if @ref JXL_DEC_JPEG_RECONSTRUCTION is subscribed to, + * from the end of the last box that is needed for jpeg reconstruction) as + * unprocessed. + */ + JXL_DEC_FULL_IMAGE = 0x1000, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": JPEG reconstruction data decoded. @ref + * JxlDecoderSetJPEGBuffer may be used to set a JPEG reconstruction buffer + * after getting the JPEG reconstruction data. If a JPEG reconstruction buffer + * is set a byte stream identical to the JPEG codestream used to encode the + * image will be written to the JPEG reconstruction buffer instead of pixels + * to the image out buffer. This event occurs max once per image and always + * before @ref JXL_DEC_FULL_IMAGE. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the 'jbrd' box as unprocessed. + */ + JXL_DEC_JPEG_RECONSTRUCTION = 0x2000, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": The header of a box of the container format + * (BMFF) is decoded. The following API functions related to boxes can be used + * after this event: + * - @ref JxlDecoderSetBoxBuffer and @ref JxlDecoderReleaseBoxBuffer + * "JxlDecoderReleaseBoxBuffer": set and release a buffer to get the box + * data. + * - @ref JxlDecoderGetBoxType get the 4-character box typename. + * - @ref JxlDecoderGetBoxSizeRaw get the size of the box as it appears in + * the container file, not decompressed. + * - @ref JxlDecoderSetDecompressBoxes to configure whether to get the box + * data decompressed, or possibly compressed. + * + * Boxes can be compressed. This is so when their box type is + * "brob". In that case, they have an underlying decompressed box + * type and decompressed data. @ref JxlDecoderSetDecompressBoxes allows + * configuring which data to get. Decompressing requires + * Brotli. @ref JxlDecoderGetBoxType has a flag to get the compressed box + * type, which can be "brob", or the decompressed box type. If a box + * is not compressed (its compressed type is not "brob"), then + * the output decompressed box type and data is independent of what + * setting is configured. + * + * The buffer set with @ref JxlDecoderSetBoxBuffer must be set again for each + * next box to be obtained, or can be left unset to skip outputting this box. + * The output buffer contains the full box data when the next @ref JXL_DEC_BOX + * event or @ref JXL_DEC_SUCCESS occurs. @ref JXL_DEC_BOX occurs for all + * boxes, including non-metadata boxes such as the signature box or codestream + * boxes. To check whether the box is a metadata type for respectively EXIF, + * XMP or JUMBF, use @ref JxlDecoderGetBoxType and check for types "Exif", + * "xml " and "jumb" respectively. + * + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * start of the box header as unprocessed. + */ + JXL_DEC_BOX = 0x4000, + + /** Informative event by @ref JxlDecoderProcessInput + * "JxlDecoderProcessInput": a progressive step in decoding the frame is + * reached. When calling @ref JxlDecoderFlushImage at this point, the flushed + * image will correspond exactly to this point in decoding, and not yet + * contain partial results (such as partially more fine detail) of a next + * step. By default, this event will trigger maximum once per frame, when a + * 8x8th resolution (DC) image is ready (the image data is still returned at + * full resolution, giving upscaled DC). Use @ref + * JxlDecoderSetProgressiveDetail to configure more fine-grainedness. The + * event is not guaranteed to trigger, not all images have progressive steps + * or DC encoded. + * In this case, @ref JxlDecoderReleaseInput will return all bytes from the + * end of the section that was needed to produce this progressive event as + * unprocessed. + */ + JXL_DEC_FRAME_PROGRESSION = 0x8000, +} JxlDecoderStatus; + +/** Rewinds decoder to the beginning. The same input must be given again from + * the beginning of the file and the decoder will emit events from the beginning + * again. When rewinding (as opposed to @ref JxlDecoderReset), the decoder can + * keep state about the image, which it can use to skip to a requested frame + * more efficiently with @ref JxlDecoderSkipFrames. Settings such as parallel + * runner or subscribed events are kept. After rewind, @ref + * JxlDecoderSubscribeEvents can be used again, and it is feasible to leave out + * events that were already handled before, such as @ref JXL_DEC_BASIC_INFO + * and @ref JXL_DEC_COLOR_ENCODING, since they will provide the same information + * as before. + * The difference to @ref JxlDecoderReset is that some state is kept, namely + * settings set by a call to + * - @ref JxlDecoderSetCoalescing, + * - @ref JxlDecoderSetDesiredIntensityTarget, + * - @ref JxlDecoderSetDecompressBoxes, + * - @ref JxlDecoderSetKeepOrientation, + * - @ref JxlDecoderSetUnpremultiplyAlpha, + * - @ref JxlDecoderSetParallelRunner, + * - @ref JxlDecoderSetRenderSpotcolors, and + * - @ref JxlDecoderSubscribeEvents. + * + * @param dec decoder object + */ +JXL_EXPORT void JxlDecoderRewind(JxlDecoder* dec); + +/** Makes the decoder skip the next `amount` frames. It still needs to process + * the input, but will not output the frame events. It can be more efficient + * when skipping frames, and even more so when using this after @ref + * JxlDecoderRewind. If the decoder is already processing a frame (could + * have emitted @ref JXL_DEC_FRAME but not yet @ref JXL_DEC_FULL_IMAGE), it + * starts skipping from the next frame. If the amount is larger than the amount + * of frames remaining in the image, all remaining frames are skipped. Calling + * this function multiple times adds the amount to skip to the already existing + * amount. + * + * A frame here is defined as a frame that without skipping emits events such + * as @ref JXL_DEC_FRAME and @ref JXL_DEC_FULL_IMAGE, frames that are internal + * to the file format but are not rendered as part of an animation, or are not + * the final still frame of a still image, are not counted. + * + * @param dec decoder object + * @param amount the amount of frames to skip + */ +JXL_EXPORT void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount); + +/** + * Skips processing the current frame. Can be called after frame processing + * already started, signaled by a @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event, + * but before the corresponding @ref JXL_DEC_FULL_IMAGE event. The next signaled + * event will be another @ref JXL_DEC_FRAME, or @ref JXL_DEC_SUCCESS if there + * are no more frames. If pixel data is required from the already processed part + * of the frame, @ref JxlDecoderFlushImage must be called before this. + * + * @param dec decoder object + * @return @ref JXL_DEC_SUCCESS if there is a frame to skip, and @ref + * JXL_DEC_ERROR if the function was not called during frame processing. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSkipCurrentFrame(JxlDecoder* dec); + +/** + * Set the parallel runner for multithreading. May only be set before starting + * decoding. + * + * @param dec decoder object + * @param parallel_runner function pointer to runner for multithreading. It may + * be NULL to use the default, single-threaded, runner. A multithreaded + * runner should be set to reach fast performance. + * @param parallel_runner_opaque opaque pointer for parallel_runner. + * @return @ref JXL_DEC_SUCCESS if the runner was set, @ref JXL_DEC_ERROR + * otherwise (the previous runner remains set). + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner, + void* parallel_runner_opaque); + +/** + * Returns a hint indicating how many more bytes the decoder is expected to + * need to make @ref JxlDecoderGetBasicInfo available after the next @ref + * JxlDecoderProcessInput call. This is a suggested large enough value for + * the amount of bytes to provide in the next @ref JxlDecoderSetInput call, but + * it is not guaranteed to be an upper bound nor a lower bound. This number does + * not include bytes that have already been released from the input. Can be used + * before the first @ref JxlDecoderProcessInput call, and is correct the first + * time in most cases. If not, @ref JxlDecoderSizeHintBasicInfo can be called + * again to get an updated hint. + * + * @param dec decoder object + * @return the size hint in bytes if the basic info is not yet fully decoded. + * @return 0 when the basic info is already available. + */ +JXL_EXPORT size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec); + +/** Select for which informative events, i.e. @ref JXL_DEC_BASIC_INFO, etc., the + * decoder should return with a status. It is not required to subscribe to any + * events, data can still be requested from the decoder as soon as it available. + * By default, the decoder is subscribed to no events (events_wanted == 0), and + * the decoder will then only return when it cannot continue because it needs + * more input data or more output buffer. This function may only be be called + * before using @ref JxlDecoderProcessInput. + * + * @param dec decoder object + * @param events_wanted bitfield of desired events. + * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec, + int events_wanted); + +/** Enables or disables preserving of as-in-bitstream pixeldata + * orientation. Some images are encoded with an Orientation tag + * indicating that the decoder must perform a rotation and/or + * mirroring to the encoded image data. + * + * - If skip_reorientation is JXL_FALSE (the default): the decoder + * will apply the transformation from the orientation setting, hence + * rendering the image according to its specified intent. When + * producing a JxlBasicInfo, the decoder will always set the + * orientation field to JXL_ORIENT_IDENTITY (matching the returned + * pixel data) and also align xsize and ysize so that they correspond + * to the width and the height of the returned pixel data. + * - If skip_reorientation is JXL_TRUE: the decoder will skip + * applying the transformation from the orientation setting, returning + * the image in the as-in-bitstream pixeldata orientation. + * This may be faster to decode since the decoder doesn't have to apply the + * transformation, but can cause wrong display of the image if the + * orientation tag is not correctly taken into account by the user. + * + * By default, this option is disabled, and the returned pixel data is + * re-oriented according to the image's Orientation setting. + * + * This function must be called at the beginning, before decoding is performed. + * + * @see JxlBasicInfo for the orientation field, and @ref JxlOrientation for the + * possible values. + * + * @param dec decoder object + * @param skip_reorientation JXL_TRUE to enable, JXL_FALSE to disable. + * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetKeepOrientation(JxlDecoder* dec, JXL_BOOL skip_reorientation); + +/** + * Enables or disables preserving of associated alpha channels. If + * unpremul_alpha is set to JXL_FALSE then for associated alpha channel, the + * pixel data is returned with premultiplied colors. If it is set to JXL_TRUE, + * The colors will be unpremultiplied based on the alpha channel. This function + * has no effect if the image does not have an associated alpha channel. + * + * By default, this option is disabled, and the returned pixel data "as is". + * + * This function must be called at the beginning, before decoding is performed. + * + * @param dec decoder object + * @param unpremul_alpha JXL_TRUE to enable, JXL_FALSE to disable. + * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetUnpremultiplyAlpha(JxlDecoder* dec, JXL_BOOL unpremul_alpha); + +/** Enables or disables rendering spot colors. By default, spot colors + * are rendered, which is OK for viewing the decoded image. If render_spotcolors + * is JXL_FALSE, then spot colors are not rendered, and have to be retrieved + * separately using @ref JxlDecoderSetExtraChannelBuffer. This is useful for + * e.g. printing applications. + * + * @param dec decoder object + * @param render_spotcolors JXL_TRUE to enable (default), JXL_FALSE to disable. + * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetRenderSpotcolors(JxlDecoder* dec, JXL_BOOL render_spotcolors); + +/** Enables or disables coalescing of zero-duration frames. By default, frames + * are returned with coalescing enabled, i.e. all frames have the image + * dimensions, and are blended if needed. When coalescing is disabled, frames + * can have arbitrary dimensions, a non-zero crop offset, and blending is not + * performed. For display, coalescing is recommended. For loading a multi-layer + * still image as separate layers (as opposed to the merged image), coalescing + * has to be disabled. + * + * @param dec decoder object + * @param coalescing JXL_TRUE to enable coalescing (default), JXL_FALSE to + * disable it. + * @return @ref JXL_DEC_SUCCESS if no error, @ref JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetCoalescing(JxlDecoder* dec, + JXL_BOOL coalescing); + +/** + * Decodes JPEG XL file using the available bytes. Requires input has been + * set with @ref JxlDecoderSetInput. After @ref JxlDecoderProcessInput, input + * can optionally be released with @ref JxlDecoderReleaseInput and then set + * again to next bytes in the stream. @ref JxlDecoderReleaseInput returns how + * many bytes are not yet processed, before a next call to @ref + * JxlDecoderProcessInput all unprocessed bytes must be provided again (the + * address need not match, but the contents must), and more bytes may be + * concatenated after the unprocessed bytes. + * + * The returned status indicates whether the decoder needs more input bytes, or + * more output buffer for a certain type of output data. No matter what the + * returned status is (other than @ref JXL_DEC_ERROR), new information, such + * as @ref JxlDecoderGetBasicInfo, may have become available after this call. + * When the return value is not @ref JXL_DEC_ERROR or @ref JXL_DEC_SUCCESS, the + * decoding requires more @ref JxlDecoderProcessInput calls to continue. + * + * @param dec decoder object + * @return @ref JXL_DEC_SUCCESS when decoding finished and all events handled. + * If you still have more unprocessed input data anyway, then you can still + * continue by using @ref JxlDecoderSetInput and calling @ref + * JxlDecoderProcessInput again, similar to handling @ref + * JXL_DEC_NEED_MORE_INPUT. @ref JXL_DEC_SUCCESS can occur instead of @ref + * JXL_DEC_NEED_MORE_INPUT when, for example, the input data ended right at + * the boundary of a box of the container format, all essential codestream + * boxes were already decoded, but extra metadata boxes are still present in + * the next data. @ref JxlDecoderProcessInput cannot return success if all + * codestream boxes have not been seen yet. + * @return @ref JXL_DEC_ERROR when decoding failed, e.g. invalid codestream. + * TODO(lode): document the input data mechanism + * @return @ref JXL_DEC_NEED_MORE_INPUT when more input data is necessary. + * @return @ref JXL_DEC_BASIC_INFO when basic info such as image dimensions is + * available and this informative event is subscribed to. + * @return @ref JXL_DEC_COLOR_ENCODING when color profile information is + * available and this informative event is subscribed to. + * @return @ref JXL_DEC_PREVIEW_IMAGE when preview pixel information is + * available and output in the preview buffer. + * @return @ref JXL_DEC_FULL_IMAGE when all pixel information at highest detail + * is available and has been output in the pixel buffer. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec); + +/** + * Sets input data for @ref JxlDecoderProcessInput. The data is owned by the + * caller and may be used by the decoder until @ref JxlDecoderReleaseInput is + * called or the decoder is destroyed or reset so must be kept alive until then. + * Cannot be called if @ref JxlDecoderSetInput was already called and @ref + * JxlDecoderReleaseInput was not yet called, and cannot be called after @ref + * JxlDecoderCloseInput indicating the end of input was called. + * + * @param dec decoder object + * @param data pointer to next bytes to read from + * @param size amount of bytes available starting from data + * @return @ref JXL_DEC_ERROR if input was already set without releasing or @ref + * JxlDecoderCloseInput was already called, @ref JXL_DEC_SUCCESS otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec, + const uint8_t* data, + size_t size); + +/** + * Releases input which was provided with @ref JxlDecoderSetInput. Between @ref + * JxlDecoderProcessInput and @ref JxlDecoderReleaseInput, the user may not + * alter the data in the buffer. Calling @ref JxlDecoderReleaseInput is required + * whenever any input is already set and new input needs to be added with @ref + * JxlDecoderSetInput, but is not required before @ref JxlDecoderDestroy or @ref + * JxlDecoderReset. Calling @ref JxlDecoderReleaseInput when no input is set is + * not an error and returns 0. + * + * @param dec decoder object + * @return The amount of bytes the decoder has not yet processed that are still + * remaining in the data set by @ref JxlDecoderSetInput, or 0 if no input is + * set or @ref JxlDecoderReleaseInput was already called. For a next call + * to @ref JxlDecoderProcessInput, the buffer must start with these + * unprocessed bytes. From this value it is possible to infer the position + * of certain JPEG XL codestream elements (e.g. end of headers, frame + * start/end). See the documentation of individual values of @ref + * JxlDecoderStatus for more information. + */ +JXL_EXPORT size_t JxlDecoderReleaseInput(JxlDecoder* dec); + +/** + * Marks the input as finished, indicates that no more @ref JxlDecoderSetInput + * will be called. This function allows the decoder to determine correctly if it + * should return success, need more input or error in certain cases. For + * backwards compatibility with a previous version of the API, using this + * function is optional when not using the @ref JXL_DEC_BOX event (the decoder + * is able to determine the end of the image frames without marking the end), + * but using this function is required when using @ref JXL_DEC_BOX for getting + * metadata box contents. This function does not replace @ref + * JxlDecoderReleaseInput, that function should still be called if its return + * value is needed. + * + * @ref JxlDecoderCloseInput should be called as soon as all known input bytes + * are set (e.g. at the beginning when not streaming but setting all input + * at once), before the final @ref JxlDecoderProcessInput calls. + * + * @param dec decoder object + */ +JXL_EXPORT void JxlDecoderCloseInput(JxlDecoder* dec); + +/** + * Outputs the basic image information, such as image dimensions, bit depth and + * all other JxlBasicInfo fields, if available. + * + * @param dec decoder object + * @param info struct to copy the information into, or NULL to only check + * whether the information is available through the return value. + * @return @ref JXL_DEC_SUCCESS if the value is available, @ref + * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR + * in case of other error conditions. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec, + JxlBasicInfo* info); + +/** + * Outputs information for extra channel at the given index. The index must be + * smaller than num_extra_channels in the associated JxlBasicInfo. + * + * @param dec decoder object + * @param index index of the extra channel to query. + * @param info struct to copy the information into, or NULL to only check + * whether the information is available through the return value. + * @return @ref JXL_DEC_SUCCESS if the value is available, @ref + * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR + * in case of other error conditions. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelInfo( + const JxlDecoder* dec, size_t index, JxlExtraChannelInfo* info); + +/** + * Outputs name for extra channel at the given index in UTF-8. The index must be + * smaller than num_extra_channels in the associated JxlBasicInfo. The buffer + * for name must have at least name_length + 1 bytes allocated, gotten from + * the associated JxlExtraChannelInfo. + * + * @param dec decoder object + * @param index index of the extra channel to query. + * @param name buffer to copy the name into + * @param size size of the name buffer in bytes + * @return @ref JXL_DEC_SUCCESS if the value is available, @ref + * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR + * in case of other error conditions. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec, + size_t index, + char* name, + size_t size); + +/** Defines which color profile to get: the profile from the codestream + * metadata header, which represents the color profile of the original image, + * or the color profile from the pixel data produced by the decoder. Both are + * the same if the JxlBasicInfo has uses_original_profile set. + */ +typedef enum { + /** Get the color profile of the original image from the metadata. + */ + JXL_COLOR_PROFILE_TARGET_ORIGINAL = 0, + + /** Get the color profile of the pixel data the decoder outputs. */ + JXL_COLOR_PROFILE_TARGET_DATA = 1, +} JxlColorProfileTarget; + +/** + * Outputs the color profile as JPEG XL encoded structured data, if available. + * This is an alternative to an ICC Profile, which can represent a more limited + * amount of color spaces, but represents them exactly through enum values. + * + * It is often possible to use @ref JxlDecoderGetColorAsICCProfile as an + * alternative anyway. The following scenarios are possible: + * - The JPEG XL image has an attached ICC Profile, in that case, the encoded + * structured data is not available, this function will return an error + * status. @ref JxlDecoderGetColorAsICCProfile should be called instead. + * - The JPEG XL image has an encoded structured color profile, and it + * represents an RGB or grayscale color space. This function will return it. + * You can still use @ref JxlDecoderGetColorAsICCProfile as well as an + * alternative if desired, though depending on which RGB color space is + * represented, the ICC profile may be a close approximation. It is also not + * always feasible to deduce from an ICC profile which named color space it + * exactly represents, if any, as it can represent any arbitrary space. + * HDR color spaces such as those using PQ and HLG are also potentially + * problematic, in that: while ICC profiles can encode a transfer function + * that happens to approximate those of PQ and HLG (HLG for only one given + * system gamma at a time, and necessitating a 3D LUT if gamma is to be + * different from 1), they cannot (before ICCv4.4) semantically signal that + * this is the color space that they represent. Therefore, they will + * typically not actually be interpreted as representing an HDR color space. + * This is especially detrimental to PQ which will then be interpreted as if + * the maximum signal value represented SDR white instead of 10000 cd/m^2, + * meaning that the image will be displayed two orders of magnitude (5-7 EV) + * too dim. + * - The JPEG XL image has an encoded structured color profile, and it + * indicates an unknown or xyb color space. In that case, @ref + * JxlDecoderGetColorAsICCProfile is not available. + * + * When rendering an image on a system where ICC-based color management is used, + * @ref JxlDecoderGetColorAsICCProfile should generally be used first as it will + * return a ready-to-use profile (with the aforementioned caveat about HDR). + * When knowledge about the nominal color space is desired if available, @ref + * JxlDecoderGetColorAsEncodedProfile should be used first. + * + * @param dec decoder object + * @param target whether to get the original color profile from the metadata + * or the color profile of the decoded pixels. + * @param color_encoding struct to copy the information into, or NULL to only + * check whether the information is available through the return value. + * @return @ref JXL_DEC_SUCCESS if the data is available and returned, @ref + * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in + * case the encoded structured color profile does not exist in the + * codestream. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile( + const JxlDecoder* dec, JxlColorProfileTarget target, + JxlColorEncoding* color_encoding); + +/** + * Outputs the size in bytes of the ICC profile returned by @ref + * JxlDecoderGetColorAsICCProfile, if available, or indicates there is none + * available. In most cases, the image will have an ICC profile available, but + * if it does not, @ref JxlDecoderGetColorAsEncodedProfile must be used instead. + * + * @see JxlDecoderGetColorAsEncodedProfile for more information. The ICC + * profile is either the exact ICC profile attached to the codestream metadata, + * or a close approximation generated from JPEG XL encoded structured data, + * depending of what is encoded in the codestream. + * + * @param dec decoder object + * @param target whether to get the original color profile from the metadata + * or the color profile of the decoded pixels. + * @param size variable to output the size into, or NULL to only check the + * return status. + * @return @ref JXL_DEC_SUCCESS if the ICC profile is available, @ref + * JXL_DEC_NEED_MORE_INPUT if the decoder has not yet received enough + * input data to determine whether an ICC profile is available or what its + * size is, @ref JXL_DEC_ERROR in case the ICC profile is not available and + * cannot be generated. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetICCProfileSize( + const JxlDecoder* dec, JxlColorProfileTarget target, size_t* size); + +/** + * Outputs ICC profile if available. The profile is only available if @ref + * JxlDecoderGetICCProfileSize returns success. The output buffer must have + * at least as many bytes as given by @ref JxlDecoderGetICCProfileSize. + * + * @param dec decoder object + * @param target whether to get the original color profile from the metadata + * or the color profile of the decoded pixels. + * @param icc_profile buffer to copy the ICC profile into + * @param size size of the icc_profile buffer in bytes + * @return @ref JXL_DEC_SUCCESS if the profile was successfully returned is + * available, @ref JXL_DEC_NEED_MORE_INPUT if not yet available, @ref + * JXL_DEC_ERROR if the profile doesn't exist or the output size is not + * large enough. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetColorAsICCProfile( + const JxlDecoder* dec, JxlColorProfileTarget target, uint8_t* icc_profile, + size_t size); + +/** Sets the desired output color profile of the decoded image by calling + * @ref JxlDecoderSetOutputColorProfile, passing on @c color_encoding and + * setting @c icc_data to NULL. See @ref JxlDecoderSetOutputColorProfile for + * details. + * + * @param dec decoder object + * @param color_encoding the default color encoding to set + * @return @ref JXL_DEC_SUCCESS if the preference was set successfully, @ref + * JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreferredColorProfile( + JxlDecoder* dec, const JxlColorEncoding* color_encoding); + +/** Requests that the decoder perform tone mapping to the peak display luminance + * passed as @c desired_intensity_target, if appropriate. + * @note This is provided for convenience and the exact tone mapping that is + * performed is not meant to be considered authoritative in any way. It may + * change from version to version. + * @param dec decoder object + * @param desired_intensity_target the intended target peak luminance + * @return @ref JXL_DEC_SUCCESS if the preference was set successfully, @ref + * JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetDesiredIntensityTarget( + JxlDecoder* dec, float desired_intensity_target); + +/** + * Sets the desired output color profile of the decoded image either from a + * color encoding or an ICC profile. Valid calls of this function have either @c + * color_encoding or @c icc_data set to NULL and @c icc_size must be 0 if and + * only if @c icc_data is NULL. + * + * Depending on whether a color management system (CMS) has been set the + * behavior is as follows: + * + * If a color management system (CMS) has been set with @ref JxlDecoderSetCms, + * and the CMS supports output to the desired color encoding or ICC profile, + * then it will provide the output in that color encoding or ICC profile. If the + * desired color encoding or the ICC is not supported, then an error will be + * returned. + * + * If no CMS has been set with @ref JxlDecoderSetCms, there are two cases: + * + * (1) Calling this function with a color encoding will convert XYB images to + * the desired color encoding. In this case, if the requested color encoding has + * a narrower gamut, or the white points differ, then the resulting image can + * have significant color distortion. Non-XYB images will not be converted to + * the desired color space. + * + * (2) Calling this function with an ICC profile will result in an error. + * + * If called with an ICC profile (after a call to @ref JxlDecoderSetCms), the + * ICC profile has to be a valid RGB or grayscale color profile. + * + * Can only be set after the @ref JXL_DEC_COLOR_ENCODING event occurred and + * before any other event occurred, and should be used before getting + * JXL_COLOR_PROFILE_TARGET_DATA. + * + * This function must not be called before JxlDecoderSetCms. + * + * @param dec decoder orbject + * @param color_encoding the output color encoding + * @param icc_data bytes of the icc profile + * @param icc_size size of the icc profile in bytes + * @return @ref JXL_DEC_SUCCESS if the color profile was set successfully, @ref + * JXL_DEC_ERROR otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetOutputColorProfile( + JxlDecoder* dec, const JxlColorEncoding* color_encoding, + const uint8_t* icc_data, size_t icc_size); + +/** + * Sets the color management system (CMS) that will be used for color + * conversion (if applicable) during decoding. May only be set before starting + * decoding and must not be called after @ref JxlDecoderSetOutputColorProfile. + * + * See @ref JxlDecoderSetOutputColorProfile for how color conversions are done + * depending on whether or not a CMS has been set with @ref JxlDecoderSetCms. + * + * @param dec decoder object. + * @param cms structure representing a CMS implementation. See @ref + * JxlCmsInterface for more details. + */ +JXL_EXPORT void JxlDecoderSetCms(JxlDecoder* dec, JxlCmsInterface cms); +// TODO(firsching): add a function JxlDecoderSetDefaultCms() for setting a +// default in case libjxl is build with a CMS. + +/** + * Returns the minimum size in bytes of the preview image output pixel buffer + * for the given format. This is the buffer for @ref + * JxlDecoderSetPreviewOutBuffer. Requires the preview header information is + * available in the decoder. + * + * @param dec decoder object + * @param format format of pixels + * @param size output value, buffer size in bytes + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * information not available yet. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize( + const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size); + +/** + * Sets the buffer to write the small resolution preview image + * to. The size of the buffer must be at least as large as given by @ref + * JxlDecoderPreviewOutBufferSize. The buffer follows the format described + * by JxlPixelFormat. The preview image dimensions are given by the + * JxlPreviewHeader. The buffer is owned by the caller. + * + * @param dec decoder object + * @param format format of pixels. Object owned by user and its contents are + * copied internally. + * @param buffer buffer type to output the pixel data to + * @param size size of buffer in bytes + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * size too small. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer( + JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size); + +/** + * Outputs the information from the frame, such as duration when have_animation. + * This function can be called when @ref JXL_DEC_FRAME occurred for the current + * frame, even when have_animation in the JxlBasicInfo is JXL_FALSE. + * + * @param dec decoder object + * @param header struct to copy the information into, or NULL to only check + * whether the information is available through the return value. + * @return @ref JXL_DEC_SUCCESS if the value is available, @ref + * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in + * case of other error conditions. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec, + JxlFrameHeader* header); + +/** + * Outputs name for the current frame. The buffer for name must have at least + * name_length + 1 bytes allocated, gotten from the associated JxlFrameHeader. + * + * @param dec decoder object + * @param name buffer to copy the name into + * @param size size of the name buffer in bytes, including zero termination + * character, so this must be at least JxlFrameHeader.name_length + 1. + * @return @ref JXL_DEC_SUCCESS if the value is available, @ref + * JXL_DEC_NEED_MORE_INPUT if not yet available, @ref JXL_DEC_ERROR in + * case of other error conditions. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec, + char* name, size_t size); + +/** + * Outputs the blend information for the current frame for a specific extra + * channel. This function can be called when @ref JXL_DEC_FRAME occurred for the + * current frame, even when have_animation in the JxlBasicInfo is JXL_FALSE. + * This information is only useful if coalescing is disabled; otherwise the + * decoder will have performed blending already. + * + * @param dec decoder object + * @param index the index of the extra channel + * @param blend_info struct to copy the information into + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetExtraChannelBlendInfo( + const JxlDecoder* dec, size_t index, JxlBlendInfo* blend_info); + +/** + * Returns the minimum size in bytes of the image output pixel buffer for the + * given format. This is the buffer for @ref JxlDecoderSetImageOutBuffer. + * Requires that the basic image information is available in the decoder in the + * case of coalescing enabled (default). In case coalescing is disabled, this + * can only be called after the @ref JXL_DEC_FRAME event occurs. In that case, + * it will return the size required to store the possibly cropped frame (which + * can be larger or smaller than the image dimensions). + * + * @param dec decoder object + * @param format format of the pixels. + * @param size output value, buffer size in bytes + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * information not available yet. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize( + const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size); + +/** + * Sets the buffer to write the full resolution image to. This can be set when + * the @ref JXL_DEC_FRAME event occurs, must be set when the @ref + * JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs, and applies only for the + * current frame. The size of the buffer must be at least as large as given + * by @ref JxlDecoderImageOutBufferSize. The buffer follows the format described + * by JxlPixelFormat. The buffer is owned by the caller. + * + * @param dec decoder object + * @param format format of the pixels. Object owned by user and its contents + * are copied internally. + * @param buffer buffer type to output the pixel data to + * @param size size of buffer in bytes + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * size too small. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetImageOutBuffer( + JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size); + +/** + * Function type for @ref JxlDecoderSetImageOutCallback. + * + * The callback may be called simultaneously by different threads when using a + * threaded parallel runner, on different pixels. + * + * @param opaque optional user data, as given to @ref + * JxlDecoderSetImageOutCallback. + * @param x horizontal position of leftmost pixel of the pixel data. + * @param y vertical position of the pixel data. + * @param num_pixels amount of pixels included in the pixel data, horizontally. + * This is not the same as xsize of the full image, it may be smaller. + * @param pixels pixel data as a horizontal stripe, in the format passed to @ref + * JxlDecoderSetImageOutCallback. The memory is not owned by the user, and + * is only valid during the time the callback is running. + */ +typedef void (*JxlImageOutCallback)(void* opaque, size_t x, size_t y, + size_t num_pixels, const void* pixels); + +/** + * Initialization callback for @ref JxlDecoderSetMultithreadedImageOutCallback. + * + * @param init_opaque optional user data, as given to @ref + * JxlDecoderSetMultithreadedImageOutCallback. + * @param num_threads maximum number of threads that will call the @c run + * callback concurrently. + * @param num_pixels_per_thread maximum number of pixels that will be passed in + * one call to @c run. + * @return a pointer to data that will be passed to the @c run callback, or + * @c NULL if initialization failed. + */ +typedef void* (*JxlImageOutInitCallback)(void* init_opaque, size_t num_threads, + size_t num_pixels_per_thread); + +/** + * Worker callback for @ref JxlDecoderSetMultithreadedImageOutCallback. + * + * @param run_opaque user data returned by the @c init callback. + * @param thread_id number in `[0, num_threads)` identifying the thread of the + * current invocation of the callback. + * @param x horizontal position of the first (leftmost) pixel of the pixel data. + * @param y vertical position of the pixel data. + * @param num_pixels number of pixels in the pixel data. May be less than the + * full @c xsize of the image, and will be at most equal to the @c + * num_pixels_per_thread that was passed to @c init. + * @param pixels pixel data as a horizontal stripe, in the format passed to @ref + * JxlDecoderSetMultithreadedImageOutCallback. The data pointed to + * remains owned by the caller and is only guaranteed to outlive the current + * callback invocation. + */ +typedef void (*JxlImageOutRunCallback)(void* run_opaque, size_t thread_id, + size_t x, size_t y, size_t num_pixels, + const void* pixels); + +/** + * Destruction callback for @ref JxlDecoderSetMultithreadedImageOutCallback, + * called after all invocations of the @c run callback to perform any + * appropriate clean-up of the @c run_opaque data returned by @c init. + * + * @param run_opaque user data returned by the @c init callback. + */ +typedef void (*JxlImageOutDestroyCallback)(void* run_opaque); + +/** + * Sets pixel output callback. This is an alternative to @ref + * JxlDecoderSetImageOutBuffer. This can be set when the @ref JXL_DEC_FRAME + * event occurs, must be set when the @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event + * occurs, and applies only for the current frame. Only one of @ref + * JxlDecoderSetImageOutBuffer or @ref JxlDecoderSetImageOutCallback may be used + * for the same frame, not both at the same time. + * + * The callback will be called multiple times, to receive the image + * data in small chunks. The callback receives a horizontal stripe of pixel + * data, 1 pixel high, xsize pixels wide, called a scanline. The xsize here is + * not the same as the full image width, the scanline may be a partial section, + * and xsize may differ between calls. The user can then process and/or copy the + * partial scanline to an image buffer. The callback may be called + * simultaneously by different threads when using a threaded parallel runner, on + * different pixels. + * + * If @ref JxlDecoderFlushImage is not used, then each pixel will be visited + * exactly once by the different callback calls, during processing with one or + * more @ref JxlDecoderProcessInput calls. These pixels are decoded to full + * detail, they are not part of a lower resolution or lower quality progressive + * pass, but the final pass. + * + * If @ref JxlDecoderFlushImage is used, then in addition each pixel will be + * visited zero or one times during the blocking @ref JxlDecoderFlushImage call. + * Pixels visited as a result of @ref JxlDecoderFlushImage may represent a lower + * resolution or lower quality intermediate progressive pass of the image. Any + * visited pixel will be of a quality at least as good or better than previous + * visits of this pixel. A pixel may be visited zero times if it cannot be + * decoded yet or if it was already decoded to full precision (this behavior is + * not guaranteed). + * + * @param dec decoder object + * @param format format of the pixels. Object owned by user; its contents are + * copied internally. + * @param callback the callback function receiving partial scanlines of pixel + * data. + * @param opaque optional user data, which will be passed on to the callback, + * may be NULL. + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such + * as @ref JxlDecoderSetImageOutBuffer already set. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetImageOutCallback(JxlDecoder* dec, const JxlPixelFormat* format, + JxlImageOutCallback callback, void* opaque); + +/** Similar to @ref JxlDecoderSetImageOutCallback except that the callback is + * allowed an initialization phase during which it is informed of how many + * threads will call it concurrently, and those calls are further informed of + * which thread they are occurring in. + * + * @param dec decoder object + * @param format format of the pixels. Object owned by user; its contents are + * copied internally. + * @param init_callback initialization callback. + * @param run_callback the callback function receiving partial scanlines of + * pixel data. + * @param destroy_callback clean-up callback invoked after all calls to @c + * run_callback. May be NULL if no clean-up is necessary. + * @param init_opaque optional user data passed to @c init_callback, may be NULL + * (unlike the return value from @c init_callback which may only be NULL if + * initialization failed). + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such + * as @ref JxlDecoderSetImageOutBuffer having already been called. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetMultithreadedImageOutCallback( + JxlDecoder* dec, const JxlPixelFormat* format, + JxlImageOutInitCallback init_callback, JxlImageOutRunCallback run_callback, + JxlImageOutDestroyCallback destroy_callback, void* init_opaque); + +/** + * Returns the minimum size in bytes of an extra channel pixel buffer for the + * given format. This is the buffer for @ref JxlDecoderSetExtraChannelBuffer. + * Requires the basic image information is available in the decoder. + * + * @param dec decoder object + * @param format format of the pixels. The num_channels value is ignored and is + * always treated to be 1. + * @param size output value, buffer size in bytes + * @param index which extra channel to get, matching the index used in @ref + * JxlDecoderGetExtraChannelInfo. Must be smaller than num_extra_channels in + * the associated JxlBasicInfo. + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * information not available yet or invalid index. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderExtraChannelBufferSize( + const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size, + uint32_t index); + +/** + * Sets the buffer to write an extra channel to. This can be set when + * the @ref JXL_DEC_FRAME or @ref JXL_DEC_NEED_IMAGE_OUT_BUFFER event occurs, + * and applies only for the current frame. The size of the buffer must be at + * least as large as given by @ref JxlDecoderExtraChannelBufferSize. The buffer + * follows the format described by JxlPixelFormat, but where num_channels is 1. + * The buffer is owned by the caller. The amount of extra channels is given by + * the num_extra_channels field in the associated JxlBasicInfo, and the + * information of individual extra channels can be queried with @ref + * JxlDecoderGetExtraChannelInfo. To get multiple extra channels, this function + * must be called multiple times, once for each wanted index. Not all images + * have extra channels. The alpha channel is an extra channel and can be gotten + * as part of the color channels when using an RGBA pixel buffer with @ref + * JxlDecoderSetImageOutBuffer, but additionally also can be gotten + * separately as extra channel. The color channels themselves cannot be gotten + * this way. + * + * + * @param dec decoder object + * @param format format of the pixels. Object owned by user and its contents + * are copied internally. The num_channels value is ignored and is always + * treated to be 1. + * @param buffer buffer type to output the pixel data to + * @param size size of buffer in bytes + * @param index which extra channel to get, matching the index used in @ref + * JxlDecoderGetExtraChannelInfo. Must be smaller than num_extra_channels in + * the associated JxlBasicInfo. + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * size too small or invalid index. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetExtraChannelBuffer(JxlDecoder* dec, const JxlPixelFormat* format, + void* buffer, size_t size, uint32_t index); + +/** + * Sets output buffer for reconstructed JPEG codestream. + * + * The data is owned by the caller and may be used by the decoder until @ref + * JxlDecoderReleaseJPEGBuffer is called or the decoder is destroyed or + * reset so must be kept alive until then. + * + * If a JPEG buffer was set before and released with @ref + * JxlDecoderReleaseJPEGBuffer, bytes that the decoder has already output + * should not be included, only the remaining bytes output must be set. + * + * @param dec decoder object + * @param data pointer to next bytes to write to + * @param size amount of bytes available starting from data + * @return @ref JXL_DEC_ERROR if output buffer was already set and @ref + * JxlDecoderReleaseJPEGBuffer was not called on it, @ref JXL_DEC_SUCCESS + * otherwise + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec, + uint8_t* data, size_t size); + +/** + * Releases buffer which was provided with @ref JxlDecoderSetJPEGBuffer. + * + * Calling @ref JxlDecoderReleaseJPEGBuffer is required whenever + * a buffer is already set and a new buffer needs to be added with @ref + * JxlDecoderSetJPEGBuffer, but is not required before @ref + * JxlDecoderDestroy or @ref JxlDecoderReset. + * + * Calling @ref JxlDecoderReleaseJPEGBuffer when no buffer is set is + * not an error and returns 0. + * + * @param dec decoder object + * @return the amount of bytes the decoder has not yet written to of the data + * set by @ref JxlDecoderSetJPEGBuffer, or 0 if no buffer is set or @ref + * JxlDecoderReleaseJPEGBuffer was already called. + */ +JXL_EXPORT size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec); + +/** + * Sets output buffer for box output codestream. + * + * The data is owned by the caller and may be used by the decoder until @ref + * JxlDecoderReleaseBoxBuffer is called or the decoder is destroyed or + * reset so must be kept alive until then. + * + * If for the current box a box buffer was set before and released with @ref + * JxlDecoderReleaseBoxBuffer, bytes that the decoder has already output + * should not be included, only the remaining bytes output must be set. + * + * The @ref JxlDecoderReleaseBoxBuffer must be used at the next @ref JXL_DEC_BOX + * event or final @ref JXL_DEC_SUCCESS event to compute the size of the output + * box bytes. + * + * @param dec decoder object + * @param data pointer to next bytes to write to + * @param size amount of bytes available starting from data + * @return @ref JXL_DEC_ERROR if output buffer was already set and @ref + * JxlDecoderReleaseBoxBuffer was not called on it, @ref JXL_DEC_SUCCESS + * otherwise + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetBoxBuffer(JxlDecoder* dec, + uint8_t* data, size_t size); + +/** + * Releases buffer which was provided with @ref JxlDecoderSetBoxBuffer. + * + * Calling @ref JxlDecoderReleaseBoxBuffer is required whenever + * a buffer is already set and a new buffer needs to be added with @ref + * JxlDecoderSetBoxBuffer, but is not required before @ref + * JxlDecoderDestroy or @ref JxlDecoderReset. + * + * Calling @ref JxlDecoderReleaseBoxBuffer when no buffer is set is + * not an error and returns 0. + * + * @param dec decoder object + * @return the amount of bytes the decoder has not yet written to of the data + * set by @ref JxlDecoderSetBoxBuffer, or 0 if no buffer is set or @ref + * JxlDecoderReleaseBoxBuffer was already called. + */ +JXL_EXPORT size_t JxlDecoderReleaseBoxBuffer(JxlDecoder* dec); + +/** + * Configures whether to get boxes in raw mode or in decompressed mode. In raw + * mode, boxes are output as their bytes appear in the container file, which may + * be decompressed, or compressed if their type is "brob". In decompressed mode, + * "brob" boxes are decompressed with Brotli before outputting them. The size of + * the decompressed stream is not known before the decompression has already + * finished. + * + * The default mode is raw. This setting can only be changed before decoding, or + * directly after a @ref JXL_DEC_BOX event, and is remembered until the decoder + * is reset or destroyed. + * + * Enabling decompressed mode requires Brotli support from the library. + * + * @param dec decoder object + * @param decompress JXL_TRUE to transparently decompress, JXL_FALSE to get + * boxes in raw mode. + * @return @ref JXL_DEC_ERROR if decompressed mode is set and Brotli is not + * available, @ref JXL_DEC_SUCCESS otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderSetDecompressBoxes(JxlDecoder* dec, + JXL_BOOL decompress); + +/** + * Outputs the type of the current box, after a @ref JXL_DEC_BOX event occurred, + * as 4 characters without null termination character. In case of a compressed + * "brob" box, this will return "brob" if the decompressed argument is + * JXL_FALSE, or the underlying box type if the decompressed argument is + * JXL_TRUE. + * + * The following box types are currently described in ISO/IEC 18181-2: + * - "Exif": a box with EXIF metadata. Starts with a 4-byte tiff header offset + * (big-endian uint32) that indicates the start of the actual EXIF data + * (which starts with a tiff header). Usually the offset will be zero and the + * EXIF data starts immediately after the offset field. The Exif orientation + * should be ignored by applications; the JPEG XL codestream orientation + * takes precedence and libjxl will by default apply the correct orientation + * automatically (see @ref JxlDecoderSetKeepOrientation). + * - "xml ": a box with XML data, in particular XMP metadata. + * - "jumb": a JUMBF superbox (JPEG Universal Metadata Box Format, ISO/IEC + * 19566-5). + * - "JXL ": mandatory signature box, must come first, 12 bytes long including + * the box header + * - "ftyp": a second mandatory signature box, must come second, 20 bytes long + * including the box header + * - "jxll": a JXL level box. This indicates if the codestream is level 5 or + * level 10 compatible. If not present, it is level 5. Level 10 allows more + * features such as very high image resolution and bit-depths above 16 bits + * per channel. Added automatically by the encoder when + * JxlEncoderSetCodestreamLevel is used + * - "jxlc": a box with the image codestream, in case the codestream is not + * split across multiple boxes. The codestream contains the JPEG XL image + * itself, including the basic info such as image dimensions, ICC color + * profile, and all the pixel data of all the image frames. + * - "jxlp": a codestream box in case it is split across multiple boxes. + * The contents are the same as in case of a jxlc box, when concatenated. + * - "brob": a Brotli-compressed box, which otherwise represents an existing + * type of box such as Exif or "xml ". When @ref JxlDecoderSetDecompressBoxes + * is set to JXL_TRUE, these boxes will be transparently decompressed by the + * decoder. + * - "jxli": frame index box, can list the keyframes in case of a JPEG XL + * animation allowing the decoder to jump to individual frames more + * efficiently. + * - "jbrd": JPEG reconstruction box, contains the information required to + * byte-for-byte losslessly recontruct a JPEG-1 image. The JPEG DCT + * coefficients (pixel content) themselves as well as the ICC profile are + * encoded in the JXL codestream (jxlc or jxlp) itself. EXIF, XMP and JUMBF + * metadata is encoded in the corresponding boxes. The jbrd box itself + * contains information such as the remaining app markers of the JPEG-1 file + * and everything else required to fit the information together into the + * exact original JPEG file. + * + * Other application-specific boxes can exist. Their typename should not begin + * with "jxl" or "JXL" or conflict with other existing typenames. + * + * The signature, jxl* and jbrd boxes are processed by the decoder and would + * typically be ignored by applications. The typical way to use this function is + * to check if an encountered box contains metadata that the application is + * interested in (e.g. EXIF or XMP metadata), in order to conditionally set a + * box buffer. + * + * @param dec decoder object + * @param type buffer to copy the type into + * @param decompressed which box type to get: JXL_FALSE to get the raw box type, + * which can be "brob", JXL_TRUE, get the underlying box type. + * @return @ref JXL_DEC_SUCCESS if the value is available, @ref JXL_DEC_ERROR if + * not, for example the JXL file does not use the container format. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec, + JxlBoxType type, + JXL_BOOL decompressed); + +/** + * Returns the size of a box as it appears in the container file, after the @ref + * JXL_DEC_BOX event. For a non-compressed box, this is the size of the + * contents, excluding the 4 bytes indicating the box type. For a compressed + * "brob" box, this is the size of the compressed box contents plus the + * additional 4 byte indicating the underlying box type, but excluding the 4 + * bytes indicating "brob". This function gives the size of the data that will + * be written in the output buffer when getting boxes in the default raw + * compressed mode. When @ref JxlDecoderSetDecompressBoxes is enabled, the + * return value of function does not change, and the decompressed size is not + * known before it has already been decompressed and output. + * + * @param dec decoder object + * @param size raw size of the box in bytes + * @return @ref JXL_DEC_ERROR if no box size is available, @ref JXL_DEC_SUCCESS + * otherwise. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderGetBoxSizeRaw(const JxlDecoder* dec, + uint64_t* size); + +/** + * Configures at which progressive steps in frame decoding these @ref + * JXL_DEC_FRAME_PROGRESSION event occurs. The default value for the level + * of detail if this function is never called is `kDC`. + * + * @param dec decoder object + * @param detail at which level of detail to trigger @ref + * JXL_DEC_FRAME_PROGRESSION + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * an invalid value for the progressive detail. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetProgressiveDetail(JxlDecoder* dec, JxlProgressiveDetail detail); + +/** + * Returns the intended downsampling ratio for the progressive frame produced + * by @ref JxlDecoderFlushImage after the latest @ref JXL_DEC_FRAME_PROGRESSION + * event. + * + * @param dec decoder object + * @return The intended downsampling ratio, can be 1, 2, 4 or 8. + */ +JXL_EXPORT size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec); + +/** + * Outputs progressive step towards the decoded image so far when only partial + * input was received. If the flush was successful, the buffer set with @ref + * JxlDecoderSetImageOutBuffer will contain partial image data. + * + * Can be called when @ref JxlDecoderProcessInput returns @ref + * JXL_DEC_NEED_MORE_INPUT, after the @ref JXL_DEC_FRAME event already occurred + * and before the @ref JXL_DEC_FULL_IMAGE event occurred for a frame. + * + * @param dec decoder object + * @return @ref JXL_DEC_SUCCESS if image data was flushed to the output buffer, + * or @ref JXL_DEC_ERROR when no flush was done, e.g. if not enough image + * data was available yet even for flush, or no output buffer was set yet. + * This error is not fatal, it only indicates no flushed image is available + * right now. Regular decoding can still be performed. + */ +JXL_EXPORT JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec); + +/** + * Sets the bit depth of the output buffer or callback. + * + * Can be called after @ref JxlDecoderSetImageOutBuffer or @ref + * JxlDecoderSetImageOutCallback. For float pixel data types, only the default + * @ref JXL_BIT_DEPTH_FROM_PIXEL_FORMAT setting is supported. + * + * @param dec decoder object + * @param bit_depth the bit depth setting of the pixel output + * @return @ref JXL_DEC_SUCCESS on success, @ref JXL_DEC_ERROR on error, such as + * incompatible custom bit depth and pixel data type. + */ +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetImageOutBitDepth(JxlDecoder* dec, const JxlBitDepth* bit_depth); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_DECODE_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/decode_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/decode_cxx.h new file mode 100644 index 0000000000..bc6e8a3789 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/decode_cxx.h @@ -0,0 +1,57 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/// @addtogroup libjxl_decoder +/// @{ +/// +/// @file decode_cxx.h +/// @brief C++ header-only helper for @ref decode.h. +/// +/// There's no binary library associated with the header since this is a header +/// only library. + +#ifndef JXL_DECODE_CXX_H_ +#define JXL_DECODE_CXX_H_ + +#include + +#include + +#if !(defined(__cplusplus) || defined(c_plusplus)) +#error "This a C++ only header. Use jxl/decode.h from C sources." +#endif + +/// Struct to call JxlDecoderDestroy from the JxlDecoderPtr unique_ptr. +struct JxlDecoderDestroyStruct { + /// Calls @ref JxlDecoderDestroy() on the passed decoder. + void operator()(JxlDecoder* decoder) { JxlDecoderDestroy(decoder); } +}; + +/// std::unique_ptr<> type that calls JxlDecoderDestroy() when releasing the +/// decoder. +/// +/// Use this helper type from C++ sources to ensure the decoder is destroyed and +/// their internal resources released. +typedef std::unique_ptr JxlDecoderPtr; + +/// Creates an instance of JxlDecoder into a JxlDecoderPtr and initializes it. +/// +/// This function returns a unique_ptr that will call JxlDecoderDestroy() when +/// releasing the pointer. See @ref JxlDecoderCreate for details on the +/// instance creation. +/// +/// @param memory_manager custom allocator function. It may be NULL. The memory +/// manager will be copied internally. +/// @return a @c NULL JxlDecoderPtr if the instance can not be allocated or +/// initialized +/// @return initialized JxlDecoderPtr instance otherwise. +static inline JxlDecoderPtr JxlDecoderMake( + const JxlMemoryManager* memory_manager) { + return JxlDecoderPtr(JxlDecoderCreate(memory_manager)); +} + +#endif // JXL_DECODE_CXX_H_ + +/// @} diff --git a/third-party/libjxl/libjxl/lib/include/jxl/encode.h b/third-party/libjxl/libjxl/lib/include/jxl/encode.h new file mode 100644 index 0000000000..7501f9db55 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/encode.h @@ -0,0 +1,1313 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_encoder + * @{ + * @file encode.h + * @brief Encoding API for JPEG XL. + */ + +#ifndef JXL_ENCODE_H_ +#define JXL_ENCODE_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** + * Encoder library version. + * + * @return the encoder library version as an integer: + * MAJOR_VERSION * 1000000 + MINOR_VERSION * 1000 + PATCH_VERSION. For example, + * version 1.2.3 would return 1002003. + */ +JXL_EXPORT uint32_t JxlEncoderVersion(void); + +/** + * Opaque structure that holds the JPEG XL encoder. + * + * Allocated and initialized with JxlEncoderCreate(). + * Cleaned up and deallocated with JxlEncoderDestroy(). + */ +typedef struct JxlEncoderStruct JxlEncoder; + +/** + * Settings and metadata for a single image frame. This includes encoder options + * for a frame such as compression quality and speed. + * + * Allocated and initialized with JxlEncoderFrameSettingsCreate(). + * Cleaned up and deallocated when the encoder is destroyed with + * JxlEncoderDestroy(). + */ +typedef struct JxlEncoderFrameSettingsStruct JxlEncoderFrameSettings; + +/** + * Return value for multiple encoder functions. + */ +typedef enum { + /** Function call finished successfully, or encoding is finished and there is + * nothing more to be done. + */ + JXL_ENC_SUCCESS = 0, + + /** An error occurred, for example out of memory. + */ + JXL_ENC_ERROR = 1, + + /** The encoder needs more output buffer to continue encoding. + */ + JXL_ENC_NEED_MORE_OUTPUT = 2, + +} JxlEncoderStatus; + +/** + * Error conditions: + * API usage errors have the 0x80 bit set to 1 + * Other errors have the 0x80 bit set to 0 + */ +typedef enum { + /** No error + */ + JXL_ENC_ERR_OK = 0, + + /** Generic encoder error due to unspecified cause + */ + JXL_ENC_ERR_GENERIC = 1, + + /** Out of memory + * TODO(jon): actually catch this and return this error + */ + JXL_ENC_ERR_OOM = 2, + + /** JPEG bitstream reconstruction data could not be + * represented (e.g. too much tail data) + */ + JXL_ENC_ERR_JBRD = 3, + + /** Input is invalid (e.g. corrupt JPEG file or ICC profile) + */ + JXL_ENC_ERR_BAD_INPUT = 4, + + /** The encoder doesn't (yet) support this. Either no version of libjxl + * supports this, and the API is used incorrectly, or the libjxl version + * should have been checked before trying to do this. + */ + JXL_ENC_ERR_NOT_SUPPORTED = 0x80, + + /** The encoder API is used in an incorrect way. + * In this case, a debug build of libjxl should output a specific error + * message. (if not, please open an issue about it) + */ + JXL_ENC_ERR_API_USAGE = 0x81, + +} JxlEncoderError; + +/** + * Id of encoder options for a frame. This includes options such as setting + * encoding effort/speed or overriding the use of certain coding tools, for this + * frame. This does not include non-frame related encoder options such as for + * boxes. + */ +typedef enum { + /** Sets encoder effort/speed level without affecting decoding speed. Valid + * values are, from faster to slower speed: 1:lightning 2:thunder 3:falcon + * 4:cheetah 5:hare 6:wombat 7:squirrel 8:kitten 9:tortoise. + * Default: squirrel (7). + */ + JXL_ENC_FRAME_SETTING_EFFORT = 0, + + /** Sets the decoding speed tier for the provided options. Minimum is 0 + * (slowest to decode, best quality/density), and maximum is 4 (fastest to + * decode, at the cost of some quality/density). Default is 0. + */ + JXL_ENC_FRAME_SETTING_DECODING_SPEED = 1, + + /** Sets resampling option. If enabled, the image is downsampled before + * compression, and upsampled to original size in the decoder. Integer option, + * use -1 for the default behavior (resampling only applied for low quality), + * 1 for no downsampling (1x1), 2 for 2x2 downsampling, 4 for 4x4 + * downsampling, 8 for 8x8 downsampling. + */ + JXL_ENC_FRAME_SETTING_RESAMPLING = 2, + + /** Similar to JXL_ENC_FRAME_SETTING_RESAMPLING, but for extra channels. + * Integer option, use -1 for the default behavior (depends on encoder + * implementation), 1 for no downsampling (1x1), 2 for 2x2 downsampling, 4 for + * 4x4 downsampling, 8 for 8x8 downsampling. + */ + JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING = 3, + + /** Indicates the frame added with @ref JxlEncoderAddImageFrame is already + * downsampled by the downsampling factor set with @ref + * JXL_ENC_FRAME_SETTING_RESAMPLING. The input frame must then be given in the + * downsampled resolution, not the full image resolution. The downsampled + * resolution is given by ceil(xsize / resampling), ceil(ysize / resampling) + * with xsize and ysize the dimensions given in the basic info, and resampling + * the factor set with @ref JXL_ENC_FRAME_SETTING_RESAMPLING. + * Use 0 to disable, 1 to enable. Default value is 0. + */ + JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED = 4, + + /** Adds noise to the image emulating photographic film noise, the higher the + * given number, the grainier the image will be. As an example, a value of 100 + * gives low noise whereas a value of 3200 gives a lot of noise. The default + * value is 0. + */ + JXL_ENC_FRAME_SETTING_PHOTON_NOISE = 5, + + /** Enables adaptive noise generation. This setting is not recommended for + * use, please use JXL_ENC_FRAME_SETTING_PHOTON_NOISE instead. Use -1 for the + * default (encoder chooses), 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_NOISE = 6, + + /** Enables or disables dots generation. Use -1 for the default (encoder + * chooses), 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_DOTS = 7, + + /** Enables or disables patches generation. Use -1 for the default (encoder + * chooses), 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_PATCHES = 8, + + /** Edge preserving filter level, -1 to 3. Use -1 for the default (encoder + * chooses), 0 to 3 to set a strength. + */ + JXL_ENC_FRAME_SETTING_EPF = 9, + + /** Enables or disables the gaborish filter. Use -1 for the default (encoder + * chooses), 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_GABORISH = 10, + + /** Enables modular encoding. Use -1 for default (encoder + * chooses), 0 to enforce VarDCT mode (e.g. for photographic images), 1 to + * enforce modular mode (e.g. for lossless images). + */ + JXL_ENC_FRAME_SETTING_MODULAR = 11, + + /** Enables or disables preserving color of invisible pixels. Use -1 for the + * default (1 if lossless, 0 if lossy), 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE = 12, + + /** Determines the order in which 256x256 regions are stored in the codestream + * for progressive rendering. Use -1 for the encoder + * default, 0 for scanline order, 1 for center-first order. + */ + JXL_ENC_FRAME_SETTING_GROUP_ORDER = 13, + + /** Determines the horizontal position of center for the center-first group + * order. Use -1 to automatically use the middle of the image, 0..xsize to + * specifically set it. + */ + JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X = 14, + + /** Determines the center for the center-first group order. Use -1 to + * automatically use the middle of the image, 0..ysize to specifically set it. + */ + JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y = 15, + + /** Enables or disables progressive encoding for modular mode. Use -1 for the + * encoder default, 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_RESPONSIVE = 16, + + /** Set the progressive mode for the AC coefficients of VarDCT, using spectral + * progression from the DCT coefficients. Use -1 for the encoder default, 0 to + * disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC = 17, + + /** Set the progressive mode for the AC coefficients of VarDCT, using + * quantization of the least significant bits. Use -1 for the encoder default, + * 0 to disable, 1 to enable. + */ + JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC = 18, + + /** Set the progressive mode using lower-resolution DC images for VarDCT. Use + * -1 for the encoder default, 0 to disable, 1 to have an extra 64x64 lower + * resolution pass, 2 to have a 512x512 and 64x64 lower resolution pass. + */ + JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC = 19, + + /** Use Global channel palette if the amount of colors is smaller than this + * percentage of range. Use 0-100 to set an explicit percentage, -1 to use the + * encoder default. Used for modular encoding. + */ + JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT = 20, + + /** Use Local (per-group) channel palette if the amount of colors is smaller + * than this percentage of range. Use 0-100 to set an explicit percentage, -1 + * to use the encoder default. Used for modular encoding. + */ + JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT = 21, + + /** Use color palette if amount of colors is smaller than or equal to this + * amount, or -1 to use the encoder default. Used for modular encoding. + */ + JXL_ENC_FRAME_SETTING_PALETTE_COLORS = 22, + + /** Enables or disables delta palette. Use -1 for the default (encoder + * chooses), 0 to disable, 1 to enable. Used in modular mode. + */ + JXL_ENC_FRAME_SETTING_LOSSY_PALETTE = 23, + + /** Color transform for internal encoding: -1 = default, 0=XYB, 1=none (RGB), + * 2=YCbCr. The XYB setting performs the forward XYB transform. None and + * YCbCr both perform no transform, but YCbCr is used to indicate that the + * encoded data losslessly represents YCbCr values. + */ + JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM = 24, + + /** Reversible color transform for modular encoding: -1=default, 0-41=RCT + * index, e.g. index 0 = none, index 6 = YCoCg. + * If this option is set to a non-default value, the RCT will be globally + * applied to the whole frame. + * The default behavior is to try several RCTs locally per modular group, + * depending on the speed and distance setting. + */ + JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE = 25, + + /** Group size for modular encoding: -1=default, 0=128, 1=256, 2=512, 3=1024. + */ + JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE = 26, + + /** Predictor for modular encoding. -1 = default, 0=zero, 1=left, 2=top, + * 3=avg0, 4=select, 5=gradient, 6=weighted, 7=topright, 8=topleft, + * 9=leftleft, 10=avg1, 11=avg2, 12=avg3, 13=toptop predictive average 14=mix + * 5 and 6, 15=mix everything. + */ + JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR = 27, + + /** Fraction of pixels used to learn MA trees as a percentage. -1 = default, + * 0 = no MA and fast decode, 50 = default value, 100 = all, values above + * 100 are also permitted. Higher values use more encoder memory. + */ + JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT = 28, + + /** Number of extra (previous-channel) MA tree properties to use. -1 = + * default, 0-11 = valid values. Recommended values are in the range 0 to 3, + * or 0 to amount of channels minus 1 (including all extra channels, and + * excluding color channels when using VarDCT mode). Higher value gives slower + * encoding and slower decoding. + */ + JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS = 29, + + /** Enable or disable CFL (chroma-from-luma) for lossless JPEG recompression. + * -1 = default, 0 = disable CFL, 1 = enable CFL. + */ + JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL = 30, + + /** Prepare the frame for indexing in the frame index box. + * 0 = ignore this frame (same as not setting a value), + * 1 = index this frame within the Frame Index Box. + * If any frames are indexed, the first frame needs to + * be indexed, too. If the first frame is not indexed, and + * a later frame is attempted to be indexed, JXL_ENC_ERROR will occur. + * If non-keyframes, i.e., frames with cropping, blending or patches are + * attempted to be indexed, JXL_ENC_ERROR will occur. + */ + JXL_ENC_FRAME_INDEX_BOX = 31, + + /** Sets brotli encode effort for use in JPEG recompression and compressed + * metadata boxes (brob). Can be -1 (default) or 0 (fastest) to 11 (slowest). + * Default is based on the general encode effort in case of JPEG + * recompression, and 4 for brob boxes. + */ + JXL_ENC_FRAME_SETTING_BROTLI_EFFORT = 32, + + /** Enables or disables brotli compression of metadata boxes derived from + * a JPEG frame when using JxlEncoderAddJPEGFrame. This has no effect on boxes + * added using JxlEncoderAddBox. + * -1 = default, 0 = disable compression, 1 = enable compression. + */ + JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES = 33, + + /** Control what kind of buffering is used, when using chunked image frames. + * 0 = buffers everything, basically the same as non-streamed code path + (mainly for testing) + * 1 = can buffer internal data (the tokens) + * 2 = can buffer the output + * 3 = minimize buffer usage: streamed input and chunked output, writing TOC + last (will not work with progressive) + + When the image dimensions is smaller than 2048 x 2048 all the options are the + same. Using 1, 2 or 3 can result increasingly in less compression density. + */ + JXL_ENC_FRAME_SETTING_BUFFERING = 34, + + /** Keep or discard Exif metadata boxes derived from a JPEG frame when using + * JxlEncoderAddJPEGFrame. This has no effect on boxes added using + * JxlEncoderAddBox. When JxlEncoderStoreJPEGMetadata is set to 1, this option + * cannot be set to 0. Even when Exif metadata is discarded, the orientation + * will still be applied. 0 = discard Exif metadata, 1 = keep Exif metadata + * (default). + */ + JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF = 35, + + /** Keep or discard XMP metadata boxes derived from a JPEG frame when using + * JxlEncoderAddJPEGFrame. This has no effect on boxes added using + * JxlEncoderAddBox. When JxlEncoderStoreJPEGMetadata is set to 1, this option + * cannot be set to 0. 0 = discard XMP metadata, 1 = keep XMP metadata + * (default). + */ + JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP = 36, + + /** Keep or discard JUMBF metadata boxes derived from a JPEG frame when using + * JxlEncoderAddJPEGFrame. This has no effect on boxes added using + * JxlEncoderAddBox. 0 = discard JUMBF metadata, 1 = keep JUMBF metadata + * (default). + */ + JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF = 37, + + /** Enum value not to be used as an option. This value is added to force the + * C compiler to have the enum to take a known size. + */ + JXL_ENC_FRAME_SETTING_FILL_ENUM = 65535, + +} JxlEncoderFrameSettingId; + +/** + * Creates an instance of JxlEncoder and initializes it. + * + * @p memory_manager will be used for all the library dynamic allocations made + * from this instance. The parameter may be NULL, in which case the default + * allocator will be used. See jpegxl/memory_manager.h for details. + * + * @param memory_manager custom allocator function. It may be NULL. The memory + * manager will be copied internally. + * @return @c NULL if the instance can not be allocated or initialized + * @return pointer to initialized JxlEncoder otherwise + */ +JXL_EXPORT JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager); + +/** + * Re-initializes a JxlEncoder instance, so it can be re-used for encoding + * another image. All state and settings are reset as if the object was + * newly created with JxlEncoderCreate, but the memory manager is kept. + * + * @param enc instance to be re-initialized. + */ +JXL_EXPORT void JxlEncoderReset(JxlEncoder* enc); + +/** + * Deinitializes and frees JxlEncoder instance. + * + * @param enc instance to be cleaned up and deallocated. + */ +JXL_EXPORT void JxlEncoderDestroy(JxlEncoder* enc); + +/** + * Sets the color management system (CMS) that will be used for color conversion + * (if applicable) during encoding. May only be set before starting encoding. If + * left unset, the default CMS implementation will be used. + * + * @param enc encoder object. + * @param cms structure representing a CMS implementation. See JxlCmsInterface + * for more details. + */ +JXL_EXPORT void JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms); + +/** + * Set the parallel runner for multithreading. May only be set before starting + * encoding. + * + * @param enc encoder object. + * @param parallel_runner function pointer to runner for multithreading. It may + * be NULL to use the default, single-threaded, runner. A multithreaded + * runner should be set to reach fast performance. + * @param parallel_runner_opaque opaque pointer for parallel_runner. + * @return JXL_ENC_SUCCESS if the runner was set, JXL_ENC_ERROR + * otherwise (the previous runner remains set). + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderSetParallelRunner(JxlEncoder* enc, JxlParallelRunner parallel_runner, + void* parallel_runner_opaque); + +/** + * Get the (last) error code in case JXL_ENC_ERROR was returned. + * + * @param enc encoder object. + * @return the JxlEncoderError that caused the (last) JXL_ENC_ERROR to be + * returned. + */ +JXL_EXPORT JxlEncoderError JxlEncoderGetError(JxlEncoder* enc); + +/** + * Encodes JPEG XL file using the available bytes. @p *avail_out indicates how + * many output bytes are available, and @p *next_out points to the input bytes. + * *avail_out will be decremented by the amount of bytes that have been + * processed by the encoder and *next_out will be incremented by the same + * amount, so *next_out will now point at the amount of *avail_out unprocessed + * bytes. + * + * The returned status indicates whether the encoder needs more output bytes. + * When the return value is not JXL_ENC_ERROR or JXL_ENC_SUCCESS, the encoding + * requires more JxlEncoderProcessOutput calls to continue. + * + * The caller must guarantee that *avail_out >= 32 when calling + * JxlEncoderProcessOutput; otherwise, JXL_ENC_NEED_MORE_OUTPUT will be + * returned. It is guaranteed that, if *avail_out >= 32, at least one byte of + * output will be written. + * + * This encodes the frames and/or boxes added so far. If the last frame or last + * box has been added, @ref JxlEncoderCloseInput, @ref JxlEncoderCloseFrames + * and/or @ref JxlEncoderCloseBoxes must be called before the next + * @ref JxlEncoderProcessOutput call, or the codestream won't be encoded + * correctly. + * + * @param enc encoder object. + * @param next_out pointer to next bytes to write to. + * @param avail_out amount of bytes available starting from *next_out. + * @return JXL_ENC_SUCCESS when encoding finished and all events handled. + * @return JXL_ENC_ERROR when encoding failed, e.g. invalid input. + * @return JXL_ENC_NEED_MORE_OUTPUT more output buffer is necessary. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc, + uint8_t** next_out, + size_t* avail_out); + +/** + * Sets the frame information for this frame to the encoder. This includes + * animation information such as frame duration to store in the frame header. + * The frame header fields represent the frame as passed to the encoder, but not + * necessarily the exact values as they will be encoded file format: the encoder + * could change crop and blending options of a frame for more efficient encoding + * or introduce additional internal frames. Animation duration and time code + * information is not altered since those are immutable metadata of the frame. + * + * It is not required to use this function, however if have_animation is set + * to true in the basic info, then this function should be used to set the + * time duration of this individual frame. By default individual frames have a + * time duration of 0, making them form a composite still. See @ref + * JxlFrameHeader for more information. + * + * This information is stored in the JxlEncoderFrameSettings and so is used for + * any frame encoded with these JxlEncoderFrameSettings. It is ok to change + * between @ref JxlEncoderAddImageFrame calls, each added image frame will have + * the frame header that was set in the options at the time of calling + * JxlEncoderAddImageFrame. + * + * The is_last and name_length fields of the JxlFrameHeader are ignored, use + * @ref JxlEncoderCloseFrames to indicate last frame, and @ref + * JxlEncoderSetFrameName to indicate the name and its length instead. + * Calling this function will clear any name that was previously set with @ref + * JxlEncoderSetFrameName. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param frame_header frame header data to set. Object owned by the caller and + * does not need to be kept in memory, its information is copied internally. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderSetFrameHeader(JxlEncoderFrameSettings* frame_settings, + const JxlFrameHeader* frame_header); + +/** + * Sets blend info of an extra channel. The blend info of extra channels is set + * separately from that of the color channels, the color channels are set with + * @ref JxlEncoderSetFrameHeader. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param index index of the extra channel to use. + * @param blend_info blend info to set for the extra channel + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo( + JxlEncoderFrameSettings* frame_settings, size_t index, + const JxlBlendInfo* blend_info); + +/** + * Sets the name of the animation frame. This function is optional, frames are + * not required to have a name. This setting is a part of the frame header, and + * the same principles as for @ref JxlEncoderSetFrameHeader apply. The + * name_length field of JxlFrameHeader is ignored by the encoder, this function + * determines the name length instead as the length in bytes of the C string. + * + * The maximum possible name length is 1071 bytes (excluding terminating null + * character). + * + * Calling @ref JxlEncoderSetFrameHeader clears any name that was + * previously set. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param frame_name name of the next frame to be encoded, as a UTF-8 encoded C + * string (zero terminated). Owned by the caller, and copied internally. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameName( + JxlEncoderFrameSettings* frame_settings, const char* frame_name); + +/** + * Sets the bit depth of the input buffer. + * + * For float pixel formats, only the default JXL_BIT_DEPTH_FROM_PIXEL_FORMAT + * setting is allowed, while for unsigned pixel formats, + * JXL_BIT_DEPTH_FROM_CODESTREAM setting is also allowed. See the comment on + * @ref JxlEncoderAddImageFrame for the effects of the bit depth setting. + + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param bit_depth the bit depth setting of the pixel input + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameBitDepth( + JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth); + +/** + * Sets the buffer to read JPEG encoded bytes from for the next frame to encode. + * + * If JxlEncoderSetBasicInfo has not yet been called, calling + * JxlEncoderAddJPEGFrame will implicitly call it with the parameters of the + * added JPEG frame. + * + * If JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile has not yet been + * called, calling JxlEncoderAddJPEGFrame will implicitly call it with the + * parameters of the added JPEG frame. + * + * If the encoder is set to store JPEG reconstruction metadata using @ref + * JxlEncoderStoreJPEGMetadata and a single JPEG frame is added, it will be + * possible to losslessly reconstruct the JPEG codestream. + * + * If this is the last frame, @ref JxlEncoderCloseInput or @ref + * JxlEncoderCloseFrames must be called before the next + * @ref JxlEncoderProcessOutput call. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param buffer bytes to read JPEG from. Owned by the caller and its contents + * are copied internally. + * @param size size of buffer in bytes. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderAddJPEGFrame(const JxlEncoderFrameSettings* frame_settings, + const uint8_t* buffer, size_t size); + +/** + * Sets the buffer to read pixels from for the next image to encode. Must call + * JxlEncoderSetBasicInfo before JxlEncoderAddImageFrame. + * + * Currently only some data types for pixel formats are supported: + * - JXL_TYPE_UINT8, with range 0..255 + * - JXL_TYPE_UINT16, with range 0..65535 + * - JXL_TYPE_FLOAT16, with nominal range 0..1 + * - JXL_TYPE_FLOAT, with nominal range 0..1 + * + * Note: the sample data type in pixel_format is allowed to be different from + * what is described in the JxlBasicInfo. The type in pixel_format, together + * with an optional @ref JxlBitDepth parameter set by @ref + * JxlEncoderSetFrameBitDepth describes the format of the uncompressed pixel + * buffer. The bits_per_sample and exponent_bits_per_sample in the JxlBasicInfo + * describes what will actually be encoded in the JPEG XL codestream. + * For example, to encode a 12-bit image, you would set bits_per_sample to 12, + * while the input frame buffer can be in the following formats: + * - if pixel format is in JXL_TYPE_UINT16 with default bit depth setting + * (i.e. JXL_BIT_DEPTH_FROM_PIXEL_FORMAT), input sample values are rescaled + * to 16-bit, i.e. multiplied by 65535/4095; + * - if pixel format is in JXL_TYPE_UINT16 with JXL_BIT_DEPTH_FROM_CODESTREAM + * bit depth setting, input sample values are provided unscaled; + * - if pixel format is in JXL_TYPE_FLOAT, input sample values are rescaled + * to 0..1, i.e. multiplied by 1.f/4095.f. + * While it is allowed, it is obviously not recommended to use a pixel_format + * with lower precision than what is specified in the JxlBasicInfo. + * + * We support interleaved channels as described by the JxlPixelFormat: + * - single-channel data, e.g. grayscale + * - single-channel + alpha + * - trichromatic, e.g. RGB + * - trichromatic + alpha + * + * Extra channels not handled here need to be set by @ref + * JxlEncoderSetExtraChannelBuffer. + * If the image has alpha, and alpha is not passed here, it will implicitly be + * set to all-opaque (an alpha value of 1.0 everywhere). + * + * The pixels are assumed to be encoded in the original profile that is set with + * JxlEncoderSetColorEncoding or JxlEncoderSetICCProfile. If none of these + * functions were used, the pixels are assumed to be nonlinear sRGB for integer + * data types (JXL_TYPE_UINT8, JXL_TYPE_UINT16), and linear sRGB for floating + * point data types (JXL_TYPE_FLOAT16, JXL_TYPE_FLOAT). + * + * Sample values in floating-point pixel formats are allowed to be outside the + * nominal range, e.g. to represent out-of-sRGB-gamut colors in the + * uses_original_profile=false case. They are however not allowed to be NaN or + * +-infinity. + * + * If this is the last frame, @ref JxlEncoderCloseInput or @ref + * JxlEncoderCloseFrames must be called before the next + * @ref JxlEncoderProcessOutput call. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param pixel_format format for pixels. Object owned by the caller and its + * contents are copied internally. + * @param buffer buffer type to input the pixel data from. Owned by the caller + * and its contents are copied internally. + * @param size size of buffer in bytes. This size should match what is implied + * by the frame dimensions and the pixel format. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderAddImageFrame( + const JxlEncoderFrameSettings* frame_settings, + const JxlPixelFormat* pixel_format, const void* buffer, size_t size); + +/** + * TODO(firsching): add documentation + * + */ +typedef void (*JxlEncoderOutputCallback)(void* run_opaque, size_t pos, + size_t num_bytes); + +/** + * TODO(firsching): add documentation + * + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderSetOutputCallback(JxlEncoderOutputCallback callback); + +/** + * TODO(firsching): add documentation + * + * @param frame_settings + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderChunkedImageFrameStart(const JxlEncoderFrameSettings* frame_settings); + +/** + * TODO(firsching): add documentation + * We process exactly one 2048x2048 DC-group. + * + * @param frame_settings + * @param x horizontal position of the top-left corner of the processed group. + * Must be divisible by 2048. + * @param y vertical position of the top-left corner of the processed group. + * Must be divisible by 2048. + * @param pixel_format for pixels. Object owned by the caller and its contents + * are copied internally. + * @param input_data the input buffer. + * @param input_size size of the input data in bytes. + * @return JXL_EXPORT + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderChunkedImageFrameAddPart( + const JxlEncoderFrameSettings* frame_settings, size_t x, size_t y, + const JxlPixelFormat* pixel_format, const void* input_data, + size_t input_size); + +/** + * Sets the buffer to read pixels from for an extra channel at a given index. + * The index must be smaller than the num_extra_channels in the associated + * JxlBasicInfo. Must call @ref JxlEncoderSetExtraChannelInfo before + * JxlEncoderSetExtraChannelBuffer. + * + * TODO(firsching): mention what data types in pixel formats are supported. + * + * It is required to call this function for every extra channel, except for the + * alpha channel if that was already set through @ref JxlEncoderAddImageFrame. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param pixel_format format for pixels. Object owned by the caller and its + * contents are copied internally. The num_channels value is ignored, since the + * number of channels for an extra channel is always assumed to be one. + * @param buffer buffer type to input the pixel data from. Owned by the caller + * and its contents are copied internally. + * @param size size of buffer in bytes. This size should match what is implied + * by the frame dimensions and the pixel format. + * @param index index of the extra channel to use. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBuffer( + const JxlEncoderFrameSettings* frame_settings, + const JxlPixelFormat* pixel_format, const void* buffer, size_t size, + uint32_t index); + +/** Adds a metadata box to the file format. JxlEncoderProcessOutput must be used + * to effectively write the box to the output. @ref JxlEncoderUseBoxes must + * be enabled before using this function. + * + * Boxes allow inserting application-specific data and metadata (Exif, XML/XMP, + * JUMBF and user defined boxes). + * + * The box format follows ISO BMFF and shares features and box types with other + * image and video formats, including the Exif, XML and JUMBF boxes. The box + * format for JPEG XL is specified in ISO/IEC 18181-2. + * + * Boxes in general don't contain other boxes inside, except a JUMBF superbox. + * Boxes follow each other sequentially and are byte-aligned. If the container + * format is used, the JXL stream consists of concatenated boxes. + * It is also possible to use a direct codestream without boxes, but in that + * case metadata cannot be added. + * + * Each box generally has the following byte structure in the file: + * - 4 bytes: box size including box header (Big endian. If set to 0, an + * 8-byte 64-bit size follows instead). + * - 4 bytes: type, e.g. "JXL " for the signature box, "jxlc" for a codestream + * box. + * - N bytes: box contents. + * + * Only the box contents are provided to the contents argument of this function, + * the encoder encodes the size header itself. Most boxes are written + * automatically by the encoder as needed ("JXL ", "ftyp", "jxll", "jxlc", + * "jxlp", "jxli", "jbrd"), and this function only needs to be called to add + * optional metadata when encoding from pixels (using JxlEncoderAddImageFrame). + * When recompressing JPEG files (using JxlEncoderAddJPEGFrame), if the input + * JPEG contains EXIF, XMP or JUMBF metadata, the corresponding boxes are + * already added automatically. + * + * Box types are given by 4 characters. The following boxes can be added with + * this function: + * - "Exif": a box with EXIF metadata, can be added by libjxl users, or is + * automatically added when needed for JPEG reconstruction. The contents of + * this box must be prepended by a 4-byte tiff header offset, which may + * be 4 zero bytes in case the tiff header follows immediately. + * The EXIF metadata must be in sync with what is encoded in the JPEG XL + * codestream, specifically the image orientation. While this is not + * recommended in practice, in case of conflicting metadata, the JPEG XL + * codestream takes precedence. + * - "xml ": a box with XML data, in particular XMP metadata, can be added by + * libjxl users, or is automatically added when needed for JPEG reconstruction + * - "jumb": a JUMBF superbox, which can contain boxes with different types of + * metadata inside. This box type can be added by the encoder transparently, + * and other libraries to create and handle JUMBF content exist. + * - Application-specific boxes. Their typename should not begin with "jxl" or + * "JXL" or conflict with other existing typenames, and they should be + * registered with MP4RA (mp4ra.org). + * + * These boxes can be stored uncompressed or Brotli-compressed (using a "brob" + * box), depending on the compress_box parameter. + * + * @param enc encoder object. + * @param type the box type, e.g. "Exif" for EXIF metadata, "xml " for XMP or + * IPTC metadata, "jumb" for JUMBF metadata. + * @param contents the full contents of the box, for example EXIF + * data. ISO BMFF box header must not be included, only the contents. Owned by + * the caller and its contents are copied internally. + * @param size size of the box contents. + * @param compress_box Whether to compress this box as a "brob" box. Requires + * Brotli support. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error, such as when + * using this function without JxlEncoderUseContainer, or adding a box type + * that would result in an invalid file format. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderAddBox(JxlEncoder* enc, + const JxlBoxType type, + const uint8_t* contents, + size_t size, + JXL_BOOL compress_box); + +/** + * Indicates the intention to add metadata boxes. This allows @ref + * JxlEncoderAddBox to be used. When using this function, then it is required + * to use @ref JxlEncoderCloseBoxes at the end. + * + * By default the encoder assumes no metadata boxes will be added. + * + * This setting can only be set at the beginning, before encoding starts. + * + * @param enc encoder object. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderUseBoxes(JxlEncoder* enc); + +/** + * Declares that no further boxes will be added with @ref JxlEncoderAddBox. + * This function must be called after the last box is added so the encoder knows + * the stream will be finished. It is not necessary to use this function if + * @ref JxlEncoderUseBoxes is not used. Further frames may still be added. + * + * Must be called between JxlEncoderAddBox of the last box + * and the next call to JxlEncoderProcessOutput, or @ref JxlEncoderProcessOutput + * won't output the last box correctly. + * + * NOTE: if you don't need to close frames and boxes at separate times, you can + * use @ref JxlEncoderCloseInput instead to close both at once. + * + * @param enc encoder object. + */ +JXL_EXPORT void JxlEncoderCloseBoxes(JxlEncoder* enc); + +/** + * Declares that no frames will be added and @ref JxlEncoderAddImageFrame and + * @ref JxlEncoderAddJPEGFrame won't be called anymore. Further metadata boxes + * may still be added. This function or @ref JxlEncoderCloseInput must be called + * after adding the last frame and the next call to + * @ref JxlEncoderProcessOutput, or the frame won't be properly marked as last. + * + * NOTE: if you don't need to close frames and boxes at separate times, you can + * use @ref JxlEncoderCloseInput instead to close both at once. + * + * @param enc encoder object. + */ +JXL_EXPORT void JxlEncoderCloseFrames(JxlEncoder* enc); + +/** + * Closes any input to the encoder, equivalent to calling JxlEncoderCloseFrames + * as well as calling JxlEncoderCloseBoxes if needed. No further input of any + * kind may be given to the encoder, but further @ref JxlEncoderProcessOutput + * calls should be done to create the final output. + * + * The requirements of both @ref JxlEncoderCloseFrames and @ref + * JxlEncoderCloseBoxes apply to this function. Either this function or the + * other two must be called after the final frame and/or box, and the next + * @ref JxlEncoderProcessOutput call, or the codestream won't be encoded + * correctly. + * + * @param enc encoder object. + */ +JXL_EXPORT void JxlEncoderCloseInput(JxlEncoder* enc); + +/** + * Sets the original color encoding of the image encoded by this encoder. This + * is an alternative to JxlEncoderSetICCProfile and only one of these two must + * be used. This one sets the color encoding as a @ref JxlColorEncoding, while + * the other sets it as ICC binary data. + * Must be called after JxlEncoderSetBasicInfo. + * + * @param enc encoder object. + * @param color color encoding. Object owned by the caller and its contents are + * copied internally. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or + * JXL_ENC_NOT_SUPPORTED otherwise + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderSetColorEncoding(JxlEncoder* enc, const JxlColorEncoding* color); + +/** + * Sets the original color encoding of the image encoded by this encoder as an + * ICC color profile. This is an alternative to JxlEncoderSetColorEncoding and + * only one of these two must be used. This one sets the color encoding as ICC + * binary data, while the other defines it as a @ref JxlColorEncoding. + * Must be called after JxlEncoderSetBasicInfo. + * + * @param enc encoder object. + * @param icc_profile bytes of the original ICC profile + * @param size size of the icc_profile buffer in bytes + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR or + * JXL_ENC_NOT_SUPPORTED otherwise + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc, + const uint8_t* icc_profile, + size_t size); + +/** + * Initializes a JxlBasicInfo struct to default values. + * For forwards-compatibility, this function has to be called before values + * are assigned to the struct fields. + * The default values correspond to an 8-bit RGB image, no alpha or any + * other extra channels. + * + * @param info global image metadata. Object owned by the caller. + */ +JXL_EXPORT void JxlEncoderInitBasicInfo(JxlBasicInfo* info); + +/** + * Initializes a JxlFrameHeader struct to default values. + * For forwards-compatibility, this function has to be called before values + * are assigned to the struct fields. + * The default values correspond to a frame with no animation duration and the + * 'replace' blend mode. After using this function, For animation duration must + * be set, for composite still blend settings must be set. + * + * @param frame_header frame metadata. Object owned by the caller. + */ +JXL_EXPORT void JxlEncoderInitFrameHeader(JxlFrameHeader* frame_header); + +/** + * Initializes a JxlBlendInfo struct to default values. + * For forwards-compatibility, this function has to be called before values + * are assigned to the struct fields. + * + * @param blend_info blending info. Object owned by the caller. + */ +JXL_EXPORT void JxlEncoderInitBlendInfo(JxlBlendInfo* blend_info); + +/** + * Sets the global metadata of the image encoded by this encoder. + * + * If the JxlBasicInfo contains information of extra channels beyond an alpha + * channel, then @ref JxlEncoderSetExtraChannelInfo must be called between + * JxlEncoderSetBasicInfo and @ref JxlEncoderAddImageFrame. In order to indicate + * extra channels, the value of `info.num_extra_channels` should be set to the + * number of extra channels, also counting the alpha channel if present. + * + * @param enc encoder object. + * @param info global image metadata. Object owned by the caller and its + * contents are copied internally. + * @return JXL_ENC_SUCCESS if the operation was successful, + * JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc, + const JxlBasicInfo* info); + +/** + * Sets the upsampling method the decoder will use in case there are frames + * with JXL_ENC_FRAME_SETTING_RESAMPLING set. This is useful in combination + * with the JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED option, to control the + * type of upsampling that will be used. + * + * @param enc encoder object. + * @param factor upsampling factor to configure (1, 2, 4 or 8; for 1 this + * function has no effect at all) + * @param mode upsampling mode to use for this upsampling: + * -1: default (good for photographic images, no signaling overhead) + * 0: nearest neighbor (good for pixel art) + * 1: 'pixel dots' (same as NN for 2x, diamond-shaped 'pixel dots' for 4x/8x) + * @return JXL_ENC_SUCCESS if the operation was successful, + * JXL_ENC_ERROR or JXL_ENC_NOT_SUPPORTED otherwise + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetUpsamplingMode(JxlEncoder* enc, + const int64_t factor, + const int64_t mode); + +/** + * Initializes a JxlExtraChannelInfo struct to default values. + * For forwards-compatibility, this function has to be called before values + * are assigned to the struct fields. + * The default values correspond to an 8-bit channel of the provided type. + * + * @param type type of the extra channel. + * @param info global extra channel metadata. Object owned by the caller and its + * contents are copied internally. + */ +JXL_EXPORT void JxlEncoderInitExtraChannelInfo(JxlExtraChannelType type, + JxlExtraChannelInfo* info); + +/** + * Sets information for the extra channel at the given index. The index + * must be smaller than num_extra_channels in the associated JxlBasicInfo. + * + * @param enc encoder object + * @param index index of the extra channel to set. + * @param info global extra channel metadata. Object owned by the caller and its + * contents are copied internally. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelInfo( + JxlEncoder* enc, size_t index, const JxlExtraChannelInfo* info); + +/** + * Sets the name for the extra channel at the given index in UTF-8. The index + * must be smaller than the num_extra_channels in the associated JxlBasicInfo. + * + * TODO(lode): remove size parameter for consistency with + * JxlEncoderSetFrameName + * + * @param enc encoder object + * @param index index of the extra channel to set. + * @param name buffer with the name of the extra channel. + * @param size size of the name buffer in bytes, not counting the terminating + * character. + * @return JXL_ENC_SUCCESS on success, JXL_ENC_ERROR on error + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelName(JxlEncoder* enc, + size_t index, + const char* name, + size_t size); + +/** + * Sets a frame-specific option of integer type to the encoder options. + * The JxlEncoderFrameSettingId argument determines which option is set. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param option ID of the option to set. + * @param value Integer value to set for this option. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR in + * case of an error, such as invalid or unknown option id, or invalid integer + * value for the given option. If an error is returned, the state of the + * JxlEncoderFrameSettings object is still valid and is the same as before this + * function was called. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderFrameSettingsSetOption( + JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option, + int64_t value); + +/** + * Sets a frame-specific option of float type to the encoder options. + * The JxlEncoderFrameSettingId argument determines which option is set. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param option ID of the option to set. + * @param value Float value to set for this option. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR in + * case of an error, such as invalid or unknown option id, or invalid integer + * value for the given option. If an error is returned, the state of the + * JxlEncoderFrameSettings object is still valid and is the same as before this + * function was called. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption( + JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option, + float value); + +/** Forces the encoder to use the box-based container format (BMFF) even + * when not necessary. + * + * When using @ref JxlEncoderUseBoxes, @ref JxlEncoderStoreJPEGMetadata or @ref + * JxlEncoderSetCodestreamLevel with level 10, the encoder will automatically + * also use the container format, it is not necessary to use + * JxlEncoderUseContainer for those use cases. + * + * By default this setting is disabled. + * + * This setting can only be set at the beginning, before encoding starts. + * + * @param enc encoder object. + * @param use_container true if the encoder should always output the JPEG XL + * container format, false to only output it when necessary. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR + * otherwise. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc, + JXL_BOOL use_container); + +/** + * Configure the encoder to store JPEG reconstruction metadata in the JPEG XL + * container. + * + * If this is set to true and a single JPEG frame is added, it will be + * possible to losslessly reconstruct the JPEG codestream. + * + * This setting can only be set at the beginning, before encoding starts. + * + * @param enc encoder object. + * @param store_jpeg_metadata true if the encoder should store JPEG metadata. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR + * otherwise. + */ +JXL_EXPORT JxlEncoderStatus +JxlEncoderStoreJPEGMetadata(JxlEncoder* enc, JXL_BOOL store_jpeg_metadata); + +/** Sets the feature level of the JPEG XL codestream. Valid values are 5 and + * 10, or -1 (to choose automatically). Using the minimum required level, or + * level 5 in most cases, is recommended for compatibility with all decoders. + * + * Level 5: for end-user image delivery, this level is the most widely + * supported level by image decoders and the recommended level to use unless a + * level 10 feature is absolutely necessary. Supports a maximum resolution + * 268435456 pixels total with a maximum width or height of 262144 pixels, + * maximum 16-bit color channel depth, maximum 120 frames per second for + * animation, maximum ICC color profile size of 4 MiB, it allows all color + * models and extra channel types except CMYK and the JXL_CHANNEL_BLACK extra + * channel, and a maximum of 4 extra channels in addition to the 3 color + * channels. It also sets boundaries to certain internally used coding tools. + * + * Level 10: this level removes or increases the bounds of most of the level + * 5 limitations, allows CMYK color and up to 32 bits per color channel, but + * may be less widely supported. + * + * The default value is -1. This means the encoder will automatically choose + * between level 5 and level 10 based on what information is inside the @ref + * JxlBasicInfo structure. Do note that some level 10 features, particularly + * those used by animated JPEG XL codestreams, might require level 10, even + * though the @ref JxlBasicInfo only suggests level 5. In this case, the level + * must be explicitly set to 10, otherwise the encoder will return an error. + * The encoder will restrict internal encoding choices to those compatible with + * the level setting. + * + * This setting can only be set at the beginning, before encoding starts. + * + * @param enc encoder object. + * @param level the level value to set, must be -1, 5, or 10. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR + * otherwise. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetCodestreamLevel(JxlEncoder* enc, + int level); + +/** Returns the codestream level required to support the currently configured + * settings and basic info. This function can only be used at the beginning, + * before encoding starts, but after setting basic info. + * + * This does not support per-frame settings, only global configuration, such as + * the image dimensions, that are known at the time of writing the header of + * the JPEG XL file. + * + * If this returns 5, nothing needs to be done and the codestream can be + * compatible with any decoder. If this returns 10, JxlEncoderSetCodestreamLevel + * has to be used to set the codestream level to 10, or the encoder can be + * configured differently to allow using the more compatible level 5. + * + * @param enc encoder object. + * @return -1 if no level can support the configuration (e.g. image dimensions + * larger than even level 10 supports), 5 if level 5 is supported, 10 if setting + * the codestream level to 10 is required. + * + */ +JXL_EXPORT int JxlEncoderGetRequiredCodestreamLevel(const JxlEncoder* enc); + +/** + * Enables lossless encoding. + * + * This is not an option like the others on itself, but rather while enabled it + * overrides a set of existing options (such as distance, modular mode and + * color transform) that enables bit-for-bit lossless encoding. + * + * When disabled, those options are not overridden, but since those options + * could still have been manually set to a combination that operates losslessly, + * using this function with lossless set to JXL_DEC_FALSE does not guarantee + * lossy encoding, though the default set of options is lossy. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param lossless whether to override options for lossless mode + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR + * otherwise. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameLossless( + JxlEncoderFrameSettings* frame_settings, JXL_BOOL lossless); + +/** + * Sets the distance level for lossy compression: target max butteraugli + * distance, lower = higher quality. Range: 0 .. 15. + * 0.0 = mathematically lossless (however, use JxlEncoderSetFrameLossless + * instead to use true lossless, as setting distance to 0 alone is not the only + * requirement). 1.0 = visually lossless. Recommended range: 0.5 .. 3.0. Default + * value: 1.0. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param distance the distance value to set. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR + * otherwise. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetFrameDistance( + JxlEncoderFrameSettings* frame_settings, float distance); + +/** + * Sets the distance level for lossy compression of extra channels. + * The distance is as in JxlEncoderSetFrameDistance (lower = higher quality). + * If not set, or if set to the special value -1, the distance that was set with + * JxlEncoderSetFrameDistance will be used. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param index index of the extra channel to set a distance value for. + * @param distance the distance value to set. + * @return JXL_ENC_SUCCESS if the operation was successful, JXL_ENC_ERROR + * otherwise. + */ +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelDistance( + JxlEncoderFrameSettings* frame_settings, size_t index, float distance); + +/** + * Create a new set of encoder options, with all values initially copied from + * the @p source options, or set to default if @p source is NULL. + * + * The returned pointer is an opaque struct tied to the encoder and it will be + * deallocated by the encoder when JxlEncoderDestroy() is called. For functions + * taking both a @ref JxlEncoder and a @ref JxlEncoderFrameSettings, only + * JxlEncoderFrameSettings created with this function for the same encoder + * instance can be used. + * + * @param enc encoder object. + * @param source source options to copy initial values from, or NULL to get + * defaults initialized to defaults. + * @return the opaque struct pointer identifying a new set of encoder options. + */ +JXL_EXPORT JxlEncoderFrameSettings* JxlEncoderFrameSettingsCreate( + JxlEncoder* enc, const JxlEncoderFrameSettings* source); + +/** + * Sets a color encoding to be sRGB. + * + * @param color_encoding color encoding instance. + * @param is_gray whether the color encoding should be gray scale or color. + */ +JXL_EXPORT void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding, + JXL_BOOL is_gray); + +/** + * Sets a color encoding to be linear sRGB. + * + * @param color_encoding color encoding instance. + * @param is_gray whether the color encoding should be gray scale or color. + */ +JXL_EXPORT void JxlColorEncodingSetToLinearSRGB( + JxlColorEncoding* color_encoding, JXL_BOOL is_gray); + +/** + * Enables usage of expert options. + * + * At the moment, the only expert option is setting an effort value of 10, + * which gives the best compression for pixel-lossless modes but is very slow. + * + * @param enc encoder object. + */ +JXL_EXPORT void JxlEncoderAllowExpertOptions(JxlEncoder* enc); + +/** + * Function type for @ref JxlEncoderSetDebugImageCallback. + * + * The callback may be called simultaneously by different threads when using a + * threaded parallel runner, on different debug images. + * + * @param opaque optional user data, as given to @ref + * JxlEncoderSetDebugImageCallback. + * @param label label of debug image, can be used in filenames + * @param xsize width of debug image + * @param ysize height of debug image + * @param color color encoding of debug image + * @param pixels pixel data of debug image as big-endian 16-bit unsigned + * samples. The memory is not owned by the user, and is only valid during the + * time the callback is running. + */ +typedef void (*JxlDebugImageCallback)(void* opaque, const char* label, + size_t xsize, size_t ysize, + const JxlColorEncoding* color, + const uint16_t* pixels); + +/** + * Sets the given debug image callback that will be used by the encoder to + * output various debug images during encoding. + * + * This only has any effect if the encoder was compiled with the appropriate + * debug build flags. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param callback used to return the debug image + * @param opaque user supplied parameter to the image callback + */ +JXL_EXPORT void JxlEncoderSetDebugImageCallback( + JxlEncoderFrameSettings* frame_settings, JxlDebugImageCallback callback, + void* opaque); + +/** + * Sets the given stats object for gathering various statistics during encoding. + * + * This only has any effect if the encoder was compiled with the appropriate + * debug build flags. + * + * @param frame_settings set of options and metadata for this frame. Also + * includes reference to the encoder object. + * @param stats object that can be used to query the gathered stats (created + * by @ref JxlEncoderStatsCreate) + */ +JXL_EXPORT void JxlEncoderCollectStats(JxlEncoderFrameSettings* frame_settings, + JxlEncoderStats* stats); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_ENCODE_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/encode_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/encode_cxx.h new file mode 100644 index 0000000000..3889e12c14 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/encode_cxx.h @@ -0,0 +1,57 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/// @addtogroup libjxl_encoder +///@{ +/// +/// @file encode_cxx.h +/// @brief C++ header-only helper for @ref encode.h. +/// +/// There's no binary library associated with the header since this is a header +/// only library. + +#ifndef JXL_ENCODE_CXX_H_ +#define JXL_ENCODE_CXX_H_ + +#include + +#include + +#if !(defined(__cplusplus) || defined(c_plusplus)) +#error "This a C++ only header. Use jxl/encode.h from C sources." +#endif + +/// Struct to call JxlEncoderDestroy from the JxlEncoderPtr unique_ptr. +struct JxlEncoderDestroyStruct { + /// Calls @ref JxlEncoderDestroy() on the passed encoder. + void operator()(JxlEncoder* encoder) { JxlEncoderDestroy(encoder); } +}; + +/// std::unique_ptr<> type that calls JxlEncoderDestroy() when releasing the +/// encoder. +/// +/// Use this helper type from C++ sources to ensure the encoder is destroyed and +/// their internal resources released. +typedef std::unique_ptr JxlEncoderPtr; + +/// Creates an instance of JxlEncoder into a JxlEncoderPtr and initializes it. +/// +/// This function returns a unique_ptr that will call JxlEncoderDestroy() when +/// releasing the pointer. See @ref JxlEncoderCreate for details on the +/// instance creation. +/// +/// @param memory_manager custom allocator function. It may be NULL. The memory +/// manager will be copied internally. +/// @return a @c NULL JxlEncoderPtr if the instance can not be allocated or +/// initialized +/// @return initialized JxlEncoderPtr instance otherwise. +static inline JxlEncoderPtr JxlEncoderMake( + const JxlMemoryManager* memory_manager) { + return JxlEncoderPtr(JxlEncoderCreate(memory_manager)); +} + +#endif // JXL_ENCODE_CXX_H_ + +/// @} diff --git a/third-party/libjxl/libjxl/lib/include/jxl/memory_manager.h b/third-party/libjxl/libjxl/lib/include/jxl/memory_manager.h new file mode 100644 index 0000000000..52640a8beb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/memory_manager.h @@ -0,0 +1,72 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + * @file memory_manager.h + * @brief Abstraction functions used by JPEG XL to allocate memory. + */ + +#ifndef JXL_MEMORY_MANAGER_H_ +#define JXL_MEMORY_MANAGER_H_ + +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** + * Allocating function for a memory region of a given size. + * + * Allocates a contiguous memory region of size @p size bytes. The returned + * memory may not be aligned to a specific size or initialized at all. + * + * @param opaque custom memory manager handle provided by the caller. + * @param size in bytes of the requested memory region. + * @return @c NULL if the memory can not be allocated, + * @return pointer to the memory otherwise. + */ +typedef void* (*jpegxl_alloc_func)(void* opaque, size_t size); + +/** + * Deallocating function pointer type. + * + * This function @b MUST do nothing if @p address is @c NULL. + * + * @param opaque custom memory manager handle provided by the caller. + * @param address memory region pointer returned by ::jpegxl_alloc_func, or @c + * NULL. + */ +typedef void (*jpegxl_free_func)(void* opaque, void* address); + +/** + * Memory Manager struct. + * These functions, when provided by the caller, will be used to handle memory + * allocations. + */ +typedef struct JxlMemoryManagerStruct { + /** The opaque pointer that will be passed as the first parameter to all the + * functions in this struct. */ + void* opaque; + + /** Memory allocation function. This can be NULL if and only if also the + * free() member in this class is NULL. All dynamic memory will be allocated + * and freed with these functions if they are not NULL. */ + jpegxl_alloc_func alloc; + /** Free function matching the alloc() member. */ + jpegxl_free_func free; + + /* TODO(deymo): Add cache-aligned alloc/free functions here. */ +} JxlMemoryManager; + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_MEMORY_MANAGER_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/parallel_runner.h b/third-party/libjxl/libjxl/lib/include/jxl/parallel_runner.h new file mode 100644 index 0000000000..45394e972c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/parallel_runner.h @@ -0,0 +1,156 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + */ +/** + * @file parallel_runner.h + */ + +/** API for running data operations in parallel in a multi-threaded environment. + * This module allows the JPEG XL caller to define their own way of creating and + * assigning threads. + * + * The JxlParallelRunner function type defines a parallel data processing + * runner that may be implemented by the caller to allow the library to process + * in multiple threads. The multi-threaded processing in this library only + * requires to run the same function over each number of a range, possibly + * running each call in a different thread. The JPEG XL caller is responsible + * for implementing this logic using the thread APIs available in their system. + * For convenience, a C++ implementation based on std::thread is provided in + * jpegxl/parallel_runner_thread.h (part of the jpegxl_threads library). + * + * Thread pools usually store small numbers of heterogeneous tasks in a queue. + * When tasks are identical or differ only by an integer input parameter, it is + * much faster to store just one function of an integer parameter and call it + * for each value. Conventional vector-of-tasks can be run in parallel using a + * lambda function adapter that simply calls task_funcs[task]. + * + * If no multi-threading is desired, a @c NULL value of JxlParallelRunner + * will use an internal implementation without multi-threading. + */ + +#ifndef JXL_PARALLEL_RUNNER_H_ +#define JXL_PARALLEL_RUNNER_H_ + +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** Return code used in the JxlParallel* functions as return value. A value + * of 0 means success and any other value means error. The special value + * JXL_PARALLEL_RET_RUNNER_ERROR can be used by the runner to indicate any + * other error. + */ +typedef int JxlParallelRetCode; + +/** + * General error returned by the JxlParallelRunInit function to indicate + * an error. + */ +#define JXL_PARALLEL_RET_RUNNER_ERROR (-1) + +/** + * Parallel run initialization callback. See JxlParallelRunner for details. + * + * This function MUST be called by the JxlParallelRunner only once, on the + * same thread that called JxlParallelRunner, before any parallel execution. + * The purpose of this call is to provide the maximum number of threads that the + * JxlParallelRunner will use, which can be used by JPEG XL to allocate + * per-thread storage if needed. + * + * @param jpegxl_opaque the @p jpegxl_opaque handle provided to + * JxlParallelRunner() must be passed here. + * @param num_threads the maximum number of threads. This value must be + * positive. + * @return 0 if the initialization process was successful. + * @return an error code if there was an error, which should be returned by + * JxlParallelRunner(). + */ +typedef JxlParallelRetCode (*JxlParallelRunInit)(void* jpegxl_opaque, + size_t num_threads); + +/** + * Parallel run data processing callback. See JxlParallelRunner for details. + * + * This function MUST be called once for every number in the range [start_range, + * end_range) (including start_range but not including end_range) passing this + * number as the @p value. Calls for different value may be executed from + * different threads in parallel. + * + * @param jpegxl_opaque the @p jpegxl_opaque handle provided to + * JxlParallelRunner() must be passed here. + * @param value the number in the range [start_range, end_range) of the call. + * @param thread_id the thread number where this function is being called from. + * This must be lower than the @p num_threads value passed to + * JxlParallelRunInit. + */ +typedef void (*JxlParallelRunFunction)(void* jpegxl_opaque, uint32_t value, + size_t thread_id); + +/** + * JxlParallelRunner function type. A parallel runner implementation can be + * provided by a JPEG XL caller to allow running computations in multiple + * threads. This function must call the initialization function @p init in the + * same thread that called it and then call the passed @p func once for every + * number in the range [start_range, end_range) (including start_range but not + * including end_range) possibly from different multiple threads in parallel. + * + * The JxlParallelRunner function does not need to be re-entrant. This means + * that the same JxlParallelRunner function with the same runner_opaque + * provided parameter will not be called from the library from either @p init or + * @p func in the same decoder or encoder instance. However, a single decoding + * or encoding instance may call the provided JxlParallelRunner multiple + * times for different parts of the decoding or encoding process. + * + * @return 0 if the @p init call succeeded (returned 0) and no other error + * occurred in the runner code. + * @return JXL_PARALLEL_RET_RUNNER_ERROR if an error occurred in the runner + * code, for example, setting up the threads. + * @return the return value of @p init() if non-zero. + */ +typedef JxlParallelRetCode (*JxlParallelRunner)( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range); + +/* The following is an example of a JxlParallelRunner that doesn't use any + * multi-threading. Note that this implementation doesn't store any state + * between multiple calls of the ExampleSequentialRunner function, so the + * runner_opaque value is not used. + + JxlParallelRetCode ExampleSequentialRunner(void* runner_opaque, + void* jpegxl_opaque, + JxlParallelRunInit init, + JxlParallelRunFunction func, + uint32_t start_range, + uint32_t end_range) { + // We only use one thread (the currently running thread). + JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1); + if (init_ret != 0) return init_ret; + + // In case of other initialization error (for example when initializing the + // threads) one can return JXL_PARALLEL_RET_RUNNER_ERROR. + + for (uint32_t i = start_range; i < end_range; i++) { + // Every call is in the thread number 0. These don't need to be in any + // order. + (*func)(jpegxl_opaque, i, 0); + } + return 0; + } + */ + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_PARALLEL_RUNNER_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner.h b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner.h new file mode 100644 index 0000000000..196e66d30a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner.h @@ -0,0 +1,78 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_threads + * @{ + * @file resizable_parallel_runner.h + * @brief implementation using std::thread of a resizeable ::JxlParallelRunner. + */ + +/** Implementation of JxlParallelRunner than can be used to enable + * multithreading when using the JPEG XL library. This uses std::thread + * internally and related synchronization functions. The number of threads + * created can be changed after creation of the thread pool; the threads + * (including the main thread) are re-used for every + * ResizableParallelRunner::Runner call. Only one concurrent + * JxlResizableParallelRunner call per instance is allowed at a time. + * + * This is a scalable, lower-overhead thread pool runner, especially suitable + * for data-parallel computations in the fork-join model, where clients need to + * know when all tasks have completed. + * + * Compared to the implementation in @ref thread_parallel_runner.h, this + * implementation is tuned for execution on lower-powered systems, including + * for example ARM CPUs with big.LITTLE computation models. + */ + +#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_H_ +#define JXL_RESIZABLE_PARALLEL_RUNNER_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** Parallel runner internally using std::thread. Use as JxlParallelRunner. + */ +JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range); + +/** Creates the runner for JxlResizableParallelRunner. Use as the opaque + * runner. The runner will execute tasks on the calling thread until + * @ref JxlResizableParallelRunnerSetThreads is called. + */ +JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate( + const JxlMemoryManager* memory_manager); + +/** Changes the number of threads for JxlResizableParallelRunner. + */ +JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads( + void* runner_opaque, size_t num_threads); + +/** Suggests a number of threads to use for an image of given size. + */ +JXL_THREADS_EXPORT uint32_t +JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize); + +/** Destroys the runner created by JxlResizableParallelRunnerCreate. + */ +JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_RESIZABLE_PARALLEL_RUNNER_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner_cxx.h new file mode 100644 index 0000000000..39bbbd283a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/resizable_parallel_runner_cxx.h @@ -0,0 +1,64 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/// @addtogroup libjxl_threads +/// @{ +/// +/// @file resizable_parallel_runner_cxx.h +/// @ingroup libjxl_threads +/// @brief C++ header-only helper for @ref resizable_parallel_runner.h. +/// +/// There's no binary library associated with the header since this is a header +/// only library. + +#ifndef JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_ +#define JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_ + +#include + +#include + +#if !(defined(__cplusplus) || defined(c_plusplus)) +#error \ + "This a C++ only header. Use jxl/jxl_resizable_parallel_runner.h from C" \ + "sources." +#endif + +/// Struct to call JxlResizableParallelRunnerDestroy from the +/// JxlResizableParallelRunnerPtr unique_ptr. +struct JxlResizableParallelRunnerDestroyStruct { + /// Calls @ref JxlResizableParallelRunnerDestroy() on the passed runner. + void operator()(void* runner) { JxlResizableParallelRunnerDestroy(runner); } +}; + +/// std::unique_ptr<> type that calls JxlResizableParallelRunnerDestroy() when +/// releasing the runner. +/// +/// Use this helper type from C++ sources to ensure the runner is destroyed and +/// their internal resources released. +typedef std::unique_ptr + JxlResizableParallelRunnerPtr; + +/// Creates an instance of JxlResizableParallelRunner into a +/// JxlResizableParallelRunnerPtr and initializes it. +/// +/// This function returns a unique_ptr that will call +/// JxlResizableParallelRunnerDestroy() when releasing the pointer. See @ref +/// JxlResizableParallelRunnerCreate for details on the instance creation. +/// +/// @param memory_manager custom allocator function. It may be NULL. The memory +/// manager will be copied internally. +/// @return a @c NULL JxlResizableParallelRunnerPtr if the instance can not be +/// allocated or initialized +/// @return initialized JxlResizableParallelRunnerPtr instance otherwise. +static inline JxlResizableParallelRunnerPtr JxlResizableParallelRunnerMake( + const JxlMemoryManager* memory_manager) { + return JxlResizableParallelRunnerPtr( + JxlResizableParallelRunnerCreate(memory_manager)); +} + +#endif // JXL_RESIZABLE_PARALLEL_RUNNER_CXX_H_ + +/// @} diff --git a/third-party/libjxl/libjxl/lib/include/jxl/stats.h b/third-party/libjxl/libjxl/lib/include/jxl/stats.h new file mode 100644 index 0000000000..7aeca26325 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/stats.h @@ -0,0 +1,103 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_encoder + * @{ + * @file stats.h + * @brief API to collect various statistics from JXL encoder. + */ + +#ifndef JXL_STATS_H_ +#define JXL_STATS_H_ + +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** + * Opaque structure that holds the encoder statistics. + * + * Allocated and initialized with JxlEncoderStatsCreate(). + * Cleaned up and deallocated with JxlEncoderStatsDestroy(). + */ +typedef struct JxlEncoderStatsStruct JxlEncoderStats; + +/** + * Creates an instance of JxlEncoderStats and initializes it. + * + * @return pointer to initialized JxlEncoderStats instance + */ +JXL_EXPORT JxlEncoderStats* JxlEncoderStatsCreate(); + +/** + * Deinitializes and frees JxlEncoderStats instance. + * + * @param stats instance to be cleaned up and deallocated. No-op if stats is + * null pointer. + */ +JXL_EXPORT void JxlEncoderStatsDestroy(JxlEncoderStats* stats); + +/** Data type for querying JxlEncoderStats object + */ +typedef enum { + JXL_ENC_STAT_HEADER_BITS, + JXL_ENC_STAT_TOC_BITS, + JXL_ENC_STAT_DICTIONARY_BITS, + JXL_ENC_STAT_SPLINES_BITS, + JXL_ENC_STAT_NOISE_BITS, + JXL_ENC_STAT_QUANT_BITS, + JXL_ENC_STAT_MODULAR_TREE_BITS, + JXL_ENC_STAT_MODULAR_GLOBAL_BITS, + JXL_ENC_STAT_DC_BITS, + JXL_ENC_STAT_MODULAR_DC_GROUP_BITS, + JXL_ENC_STAT_CONTROL_FIELDS_BITS, + JXL_ENC_STAT_COEF_ORDER_BITS, + JXL_ENC_STAT_AC_HISTOGRAM_BITS, + JXL_ENC_STAT_AC_BITS, + JXL_ENC_STAT_MODULAR_AC_GROUP_BITS, + JXL_ENC_STAT_NUM_SMALL_BLOCKS, + JXL_ENC_STAT_NUM_DCT4X8_BLOCKS, + JXL_ENC_STAT_NUM_AFV_BLOCKS, + JXL_ENC_STAT_NUM_DCT8_BLOCKS, + JXL_ENC_STAT_NUM_DCT8X32_BLOCKS, + JXL_ENC_STAT_NUM_DCT16_BLOCKS, + JXL_ENC_STAT_NUM_DCT16X32_BLOCKS, + JXL_ENC_STAT_NUM_DCT32_BLOCKS, + JXL_ENC_STAT_NUM_DCT32X64_BLOCKS, + JXL_ENC_STAT_NUM_DCT64_BLOCKS, + JXL_ENC_STAT_NUM_BUTTERAUGLI_ITERS, + JXL_ENC_NUM_STATS, +} JxlEncoderStatsKey; + +/** Returns the value of the statistics corresponding the given key. + * + * @param stats object that was passed to the encoder with a + * @ref JxlEncoderCollectStats function + * @param key the particular statistics to query + * + * @return the value of the statistics + */ +JXL_EXPORT size_t JxlEncoderStatsGet(const JxlEncoderStats* stats, + JxlEncoderStatsKey key); + +/** Updates the values of the given stats object with that of an other. + * + * @param stats object whose values will be updated (usually added together) + * @param other stats object whose values will be merged with stats + */ +JXL_EXPORT void JxlEncoderStatsMerge(JxlEncoderStats* stats, + const JxlEncoderStats* other); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_STATS_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner.h b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner.h new file mode 100644 index 0000000000..715648b256 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner.h @@ -0,0 +1,72 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_threads + * @{ + * @file thread_parallel_runner.h + * @brief implementation using std::thread of a ::JxlParallelRunner. + */ + +/** Implementation of JxlParallelRunner than can be used to enable + * multithreading when using the JPEG XL library. This uses std::thread + * internally and related synchronization functions. The number of threads + * created is fixed at construction time and the threads are re-used for every + * ThreadParallelRunner::Runner call. Only one concurrent + * JxlThreadParallelRunner call per instance is allowed at a time. + * + * This is a scalable, lower-overhead thread pool runner, especially suitable + * for data-parallel computations in the fork-join model, where clients need to + * know when all tasks have completed. + * + * This thread pool can efficiently load-balance millions of tasks using an + * atomic counter, thus avoiding per-task virtual or system calls. With 48 + * hyperthreads and 1M tasks that add to an atomic counter, overall runtime is + * 10-20x higher when using std::async, and ~200x for a queue-based thread + */ + +#ifndef JXL_THREAD_PARALLEL_RUNNER_H_ +#define JXL_THREAD_PARALLEL_RUNNER_H_ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** Parallel runner internally using std::thread. Use as JxlParallelRunner. + */ +JXL_THREADS_EXPORT JxlParallelRetCode JxlThreadParallelRunner( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range); + +/** Creates the runner for JxlThreadParallelRunner. Use as the opaque + * runner. + */ +JXL_THREADS_EXPORT void* JxlThreadParallelRunnerCreate( + const JxlMemoryManager* memory_manager, size_t num_worker_threads); + +/** Destroys the runner created by JxlThreadParallelRunnerCreate. + */ +JXL_THREADS_EXPORT void JxlThreadParallelRunnerDestroy(void* runner_opaque); + +/** Returns a default num_worker_threads value for + * JxlThreadParallelRunnerCreate. + */ +JXL_THREADS_EXPORT size_t JxlThreadParallelRunnerDefaultNumWorkerThreads(); + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_THREAD_PARALLEL_RUNNER_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner_cxx.h b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner_cxx.h new file mode 100644 index 0000000000..4974ffee87 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/thread_parallel_runner_cxx.h @@ -0,0 +1,64 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/// @addtogroup libjxl_threads +/// @{ +/// +/// @file thread_parallel_runner_cxx.h +/// @brief C++ header-only helper for @ref thread_parallel_runner.h. +/// +/// There's no binary library associated with the header since this is a header +/// only library. + +#ifndef JXL_THREAD_PARALLEL_RUNNER_CXX_H_ +#define JXL_THREAD_PARALLEL_RUNNER_CXX_H_ + +#include + +#include + +#if !(defined(__cplusplus) || defined(c_plusplus)) +#error \ + "This a C++ only header. Use jxl/jxl_thread_parallel_runner.h from C" \ + "sources." +#endif + +/// Struct to call JxlThreadParallelRunnerDestroy from the +/// JxlThreadParallelRunnerPtr unique_ptr. +struct JxlThreadParallelRunnerDestroyStruct { + /// Calls @ref JxlThreadParallelRunnerDestroy() on the passed runner. + void operator()(void* runner) { JxlThreadParallelRunnerDestroy(runner); } +}; + +/// std::unique_ptr<> type that calls JxlThreadParallelRunnerDestroy() when +/// releasing the runner. +/// +/// Use this helper type from C++ sources to ensure the runner is destroyed and +/// their internal resources released. +typedef std::unique_ptr + JxlThreadParallelRunnerPtr; + +/// Creates an instance of JxlThreadParallelRunner into a +/// JxlThreadParallelRunnerPtr and initializes it. +/// +/// This function returns a unique_ptr that will call +/// JxlThreadParallelRunnerDestroy() when releasing the pointer. See @ref +/// JxlThreadParallelRunnerCreate for details on the instance creation. +/// +/// @param memory_manager custom allocator function. It may be NULL. The memory +/// manager will be copied internally. +/// @param num_worker_threads the number of worker threads to create. +/// @return a @c NULL JxlThreadParallelRunnerPtr if the instance can not be +/// allocated or initialized +/// @return initialized JxlThreadParallelRunnerPtr instance otherwise. +static inline JxlThreadParallelRunnerPtr JxlThreadParallelRunnerMake( + const JxlMemoryManager* memory_manager, size_t num_worker_threads) { + return JxlThreadParallelRunnerPtr( + JxlThreadParallelRunnerCreate(memory_manager, num_worker_threads)); +} + +#endif // JXL_THREAD_PARALLEL_RUNNER_CXX_H_ + +/// @} diff --git a/third-party/libjxl/libjxl/lib/include/jxl/types.h b/third-party/libjxl/libjxl/lib/include/jxl/types.h new file mode 100644 index 0000000000..f280fe99ca --- /dev/null +++ b/third-party/libjxl/libjxl/lib/include/jxl/types.h @@ -0,0 +1,179 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + * @file types.h + * @brief Data types for the JPEG XL API, for both encoding and decoding. + */ + +#ifndef JXL_TYPES_H_ +#define JXL_TYPES_H_ + +#include +#include +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +/** + * A portable @c bool replacement. + * + * ::JXL_BOOL is a "documentation" type: actually it is @c int, but in API it + * denotes a type, whose only values are ::JXL_TRUE and ::JXL_FALSE. + */ +#define JXL_BOOL int +/** Portable @c true replacement. */ +#define JXL_TRUE 1 +/** Portable @c false replacement. */ +#define JXL_FALSE 0 + +/** Data type for the sample values per channel per pixel. + */ +typedef enum { + /** Use 32-bit single-precision floating point values, with range 0.0-1.0 + * (within gamut, may go outside this range for wide color gamut). Floating + * point output, either JXL_TYPE_FLOAT or JXL_TYPE_FLOAT16, is recommended + * for HDR and wide gamut images when color profile conversion is required. */ + JXL_TYPE_FLOAT = 0, + + /** Use type uint8_t. May clip wide color gamut data. + */ + JXL_TYPE_UINT8 = 2, + + /** Use type uint16_t. May clip wide color gamut data. + */ + JXL_TYPE_UINT16 = 3, + + /** Use 16-bit IEEE 754 half-precision floating point values */ + JXL_TYPE_FLOAT16 = 5, +} JxlDataType; + +/** Ordering of multi-byte data. + */ +typedef enum { + /** Use the endianness of the system, either little endian or big endian, + * without forcing either specific endianness. Do not use if pixel data + * should be exported to a well defined format. + */ + JXL_NATIVE_ENDIAN = 0, + /** Force little endian */ + JXL_LITTLE_ENDIAN = 1, + /** Force big endian */ + JXL_BIG_ENDIAN = 2, +} JxlEndianness; + +/** Data type for the sample values per channel per pixel for the output buffer + * for pixels. This is not necessarily the same as the data type encoded in the + * codestream. The channels are interleaved per pixel. The pixels are + * organized row by row, left to right, top to bottom. + * TODO(lode): support different channel orders if needed (RGB, BGR, ...) + */ +typedef struct { + /** Amount of channels available in a pixel buffer. + * 1: single-channel data, e.g. grayscale or a single extra channel + * 2: single-channel + alpha + * 3: trichromatic, e.g. RGB + * 4: trichromatic + alpha + * TODO(lode): this needs finetuning. It is not yet defined how the user + * chooses output color space. CMYK+alpha needs 5 channels. + */ + uint32_t num_channels; + + /** Data type of each channel. + */ + JxlDataType data_type; + + /** Whether multi-byte data types are represented in big endian or little + * endian format. This applies to JXL_TYPE_UINT16, JXL_TYPE_UINT32 + * and JXL_TYPE_FLOAT. + */ + JxlEndianness endianness; + + /** Align scanlines to a multiple of align bytes, or 0 to require no + * alignment at all (which has the same effect as value 1) + */ + size_t align; +} JxlPixelFormat; + +/** Settings for the interpretation of UINT input and output buffers. + * (buffers using a FLOAT data type are not affected by this) + */ +typedef enum { + /** This is the default setting, where the encoder expects the input pixels + * to use the full range of the pixel format data type (e.g. for UINT16, the + * input range is 0 .. 65535 and the value 65535 is mapped to 1.0 when + * converting to float), and the decoder uses the full range to output + * pixels. If the bit depth in the basic info is different from this, the + * encoder expects the values to be rescaled accordingly (e.g. multiplied by + * 65535/4095 for a 12-bit image using UINT16 input data type). */ + JXL_BIT_DEPTH_FROM_PIXEL_FORMAT = 0, + + /** If this setting is selected, the encoder expects the input pixels to be + * in the range defined by the bits_per_sample value of the basic info (e.g. + * for 12-bit images using UINT16 input data types, the allowed range is + * 0 .. 4095 and the value 4095 is mapped to 1.0 when converting to float), + * and the decoder outputs pixels in this range. */ + JXL_BIT_DEPTH_FROM_CODESTREAM = 1, + + /** This setting can only be used in the decoder to select a custom range for + * pixel output */ + JXL_BIT_DEPTH_CUSTOM = 2, +} JxlBitDepthType; + +/** Data type for describing the interpretation of the input and output buffers + * in terms of the range of allowed input and output pixel values. */ +typedef struct { + /** Bit depth setting, see comment on @ref JxlBitDepthType */ + JxlBitDepthType type; + + /** Custom bits per sample */ + uint32_t bits_per_sample; + + /** Custom exponent bits per sample */ + uint32_t exponent_bits_per_sample; +} JxlBitDepth; + +/** Data type holding the 4-character type name of an ISOBMFF box. + */ +typedef char JxlBoxType[4]; + +/** Types of progressive detail. + * Setting a progressive detail with value N implies all progressive details + * with smaller or equal value. Currently only the following level of + * progressive detail is implemented: + * - kDC (which implies kFrames) + * - kLastPasses (which implies kDC and kFrames) + * - kPasses (which implies kLastPasses, kDC and kFrames) + */ +typedef enum { + // after completed kRegularFrames + kFrames = 0, + // after completed DC (1:8) + kDC = 1, + // after completed AC passes that are the last pass for their resolution + // target. + kLastPasses = 2, + // after completed AC passes that are not the last pass for their resolution + // target. + kPasses = 3, + // during DC frame when lower resolution are completed (1:32, 1:16) + kDCProgressive = 4, + // after completed groups + kDCGroups = 5, + // after completed groups + kGroups = 6, +} JxlProgressiveDetail; + +#if defined(__cplusplus) || defined(c_plusplus) +} +#endif + +#endif /* JXL_TYPES_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/jpegli.cmake b/third-party/libjxl/libjxl/lib/jpegli.cmake new file mode 100644 index 0000000000..5d4f45e58a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli.cmake @@ -0,0 +1,159 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include(compatibility.cmake) +include(jxl_lists.cmake) + +set(JPEGLI_INTERNAL_LIBS + hwy + Threads::Threads + ${ATOMICS_LIBRARIES} +) + +# JPEGLIB setup +set(BITS_IN_JSAMPLE 8) +set(MEM_SRCDST_SUPPORTED 1) + +if(JPEGLI_LIBJPEG_LIBRARY_SOVERSION STREQUAL "62") + set(JPEG_LIB_VERSION 62) +elseif(JPEGLI_LIBJPEG_LIBRARY_SOVERSION STREQUAL "7") + set(JPEG_LIB_VERSION 70) +elseif(JPEGLI_LIBJPEG_LIBRARY_SOVERSION STREQUAL "8") + set(JPEG_LIB_VERSION 80) +endif() + +configure_file( + ../third_party/libjpeg-turbo/jconfig.h.in include/jpegli/jconfig.h) +configure_file( + ../third_party/libjpeg-turbo/jpeglib.h include/jpegli/jpeglib.h COPYONLY) +configure_file( + ../third_party/libjpeg-turbo/jmorecfg.h include/jpegli/jmorecfg.h COPYONLY) + +add_library(jpegli-static STATIC EXCLUDE_FROM_ALL "${JPEGXL_INTERNAL_JPEGLI_SOURCES}") +target_compile_options(jpegli-static PRIVATE "${JPEGXL_INTERNAL_FLAGS}") +target_compile_options(jpegli-static PUBLIC ${JPEGXL_COVERAGE_FLAGS}) +set_property(TARGET jpegli-static PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(jpegli-static PRIVATE + "$" + "$" + "$" + "${JXL_HWY_INCLUDE_DIRS}" +) +target_include_directories(jpegli-static PUBLIC + "$" +) +target_link_libraries(jpegli-static PUBLIC ${JPEGLI_INTERNAL_LIBS}) + +# +# Tests for jpegli-static +# + +find_package(JPEG) +if(JPEG_FOUND AND BUILD_TESTING) +# TODO(eustas): merge into jxl_tests.cmake? + +add_library(jpegli_libjpeg_util-obj OBJECT + ${JPEGXL_INTERNAL_JPEGLI_LIBJPEG_HELPER_FILES} +) +target_include_directories(jpegli_libjpeg_util-obj PRIVATE + "${PROJECT_SOURCE_DIR}" + "${JPEG_INCLUDE_DIRS}" +) +target_compile_options(jpegli_libjpeg_util-obj PRIVATE + "${JPEGXL_INTERNAL_FLAGS}" "${JPEGXL_COVERAGE_FLAGS}") + +# Individual test binaries: +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests) +foreach (TESTFILE IN LISTS JPEGXL_INTERNAL_JPEGLI_TESTS) + # The TESTNAME is the name without the extension or directory. + get_filename_component(TESTNAME ${TESTFILE} NAME_WE) + add_executable(${TESTNAME} ${TESTFILE} + $ + ${JPEGXL_INTERNAL_JPEGLI_TESTLIB_FILES} + ) + target_compile_options(${TESTNAME} PRIVATE + ${JPEGXL_INTERNAL_FLAGS} + # Add coverage flags to the test binary so code in the private headers of + # the library is also instrumented when running tests that execute it. + ${JPEGXL_COVERAGE_FLAGS} + ) + target_compile_definitions(${TESTNAME} PRIVATE + -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}") + target_include_directories(${TESTNAME} PRIVATE + "${PROJECT_SOURCE_DIR}" + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}/include" + ) + target_link_libraries(${TESTNAME} + hwy + jpegli-static + gmock + GTest::GTest + GTest::Main + ${JPEG_LIBRARIES} + ) + set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}") + # Output test targets in the test directory. + set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/") + if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set_target_properties(${TESTNAME} PROPERTIES COMPILE_FLAGS "-Wno-error") + endif () + jxl_discover_tests(${TESTNAME}) +endforeach () +endif() + +# +# Build libjpeg.so that links to libjpeg-static +# + +if (JPEGXL_ENABLE_JPEGLI_LIBJPEG AND NOT APPLE AND NOT WIN32 AND NOT JPEGXL_EMSCRIPTEN) +add_library(jpegli-libjpeg-obj OBJECT "${JPEGXL_INTERNAL_JPEGLI_WRAPPER_SOURCES}") +target_compile_options(jpegli-libjpeg-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS}) +target_compile_options(jpegli-libjpeg-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS}) +set_property(TARGET jpegli-libjpeg-obj PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(jpegli-libjpeg-obj PRIVATE + "$" + "$" +) +target_compile_definitions(jpegli-libjpeg-obj PUBLIC + ${JPEGLI_LIBJPEG_OBJ_COMPILE_DEFINITIONS} +) +set(JPEGLI_LIBJPEG_INTERNAL_OBJECTS $) + +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/jpegli) +add_library(jpeg SHARED ${JPEGLI_LIBJPEG_INTERNAL_OBJECTS}) +target_link_libraries(jpeg PUBLIC ${JPEGXL_COVERAGE_FLAGS}) +target_link_libraries(jpeg PRIVATE jpegli-static) +set_target_properties(jpeg PROPERTIES + VERSION ${JPEGLI_LIBJPEG_LIBRARY_VERSION} + SOVERSION ${JPEGLI_LIBJPEG_LIBRARY_SOVERSION} + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jpegli" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/jpegli") + +# Add a jpeg.version file as a version script to tag symbols with the +# appropriate version number. +set_target_properties(jpeg PROPERTIES + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jpegli/jpeg.version.${JPEGLI_LIBJPEG_LIBRARY_SOVERSION}) +set_property(TARGET jpeg APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jpegli/jpeg.version.${JPEGLI_LIBJPEG_LIBRARY_SOVERSION}") + +if (JPEGXL_INSTALL_JPEGLI_LIBJPEG) + install(TARGETS jpeg + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install( + DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include/jpegli/" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") +endif() + +# This hides the default visibility symbols from static libraries bundled into +# the shared library. In particular this prevents exposing symbols from hwy +# in the shared library. +if(LINKER_SUPPORT_EXCLUDE_LIBS) + set_property(TARGET jpeg APPEND_STRING PROPERTY + LINK_FLAGS " ${LINKER_EXCLUDE_LIBS_FLAG}") +endif() +endif() diff --git a/third-party/libjxl/libjxl/lib/jpegli/README.md b/third-party/libjxl/libjxl/lib/jpegli/README.md new file mode 100644 index 0000000000..72f13afd22 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/README.md @@ -0,0 +1,49 @@ +# Improved JPEG encoder and decoder implementation + +This subdirectory contains a JPEG encoder and decoder implementation that is +API and ABI compatible with libjpeg62. + +## Building + +When building the parent libjxl project, two binaries, `tools/cjpegli` and +`tools/djpegli` will be built, as well as a +`lib/jpegli/libjpeg.so.62.3.0` shared library that can be used as a drop-in +replacement for the system library with the same name. + +## Encoder improvements + +Improvements and new features used by the encoder include: + +* Support for 16-bit unsigned and 32-bit floating point input buffers. + +* Color space conversions, chroma subsampling and DCT are all done in floating + point precision, the conversion to integers happens first when producing + the final quantized DCT coefficients. + +* The desired quality can be indicated by a distance parameter that is + analogous to the distance parameter of JPEG XL. The quantization tables + are chosen based on the distance and the chroma subsampling mode, with + different positions in the quantization matrix scaling differently, and the + red and blue chrominance channels have separate quantization tables. + +* Adaptive dead-zone quantization. On noisy parts of the image, quantization + thresholds for zero coefficients are higher than on smoother parts of the + image. + +* Support for more efficient compression of JPEGs with an ICC profile + representing the XYB colorspace. These JPEGs will not be converted to the + YCbCr colorspace, but specialized quantization tables will be chosen for + the original X, Y, B channels. + +## Decoder improvements + +* Support for 16-bit unsigned and 32-bit floating point output buffers. + +* Non-zero DCT coefficients are dequantized to the expectation value of their + respective quantization intervals assuming a Laplacian distribution of the + original unquantized DCT coefficients. + +* After dequantization, inverse DCT, chroma upsampling and color space + conversions are all done in floating point precision, the conversion to + integer samples happens only in the final output phase (unless output to + floating point was requested). diff --git a/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.cc b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.cc new file mode 100644 index 0000000000..a1c0b89ad3 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.cc @@ -0,0 +1,563 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/adaptive_quantization.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/adaptive_quantization.cc" +#include +#include + +#include "lib/jpegli/encode_internal.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::AbsDiff; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Floor; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Min; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sqrt; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +static constexpr float kInputScaling = 1.0f / 255.0f; + +// Primary template: default to actual division. +template +struct FastDivision { + HWY_INLINE V operator()(const V n, const V d) const { return n / d; } +}; +// Partial specialization for float vectors. +template +struct FastDivision { + // One Newton-Raphson iteration. + static HWY_INLINE V ReciprocalNR(const V x) { + const auto rcp = ApproximateReciprocal(x); + const auto sum = Add(rcp, rcp); + const auto x_rcp = Mul(x, rcp); + return NegMulAdd(x_rcp, rcp, sum); + } + + V operator()(const V n, const V d) const { +#if 1 // Faster on SKX + return Div(n, d); +#else + return n * ReciprocalNR(d); +#endif + } +}; + +// Approximates smooth functions via rational polynomials (i.e. dividing two +// polynomials). Evaluates polynomials via Horner's scheme, which is faster than +// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to +// specify constants (replicated 4x) independently of the lane count. +template +HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x, + const T (&p)[NP], + const T (&q)[NQ]) { + constexpr size_t kDegP = NP / 4 - 1; + constexpr size_t kDegQ = NQ / 4 - 1; + auto yp = LoadDup128(d, &p[kDegP * 4]); + auto yq = LoadDup128(d, &q[kDegQ * 4]); + // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a + // compiler warning that the index is out of bounds since we are already + // checking that it is not out of bounds with (kDegP >= n) and the access + // will be optimized away. Similarly with q and kDegQ. + HWY_FENCE; + if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); + if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); + HWY_FENCE; + if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); + if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); + HWY_FENCE; + if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); + if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); + HWY_FENCE; + if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); + if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); + HWY_FENCE; + if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); + if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); + HWY_FENCE; + if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); + if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); + HWY_FENCE; + if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); + if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); + + return FastDivision()(yp, yq); +} + +// Computes base-2 logarithm like std::log2. Undefined if negative / NaN. +// L1 error ~3.9E-6 +template +V FastLog2f(const DF df, V x) { + // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2). + HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f), + HWY_REP4(1.4287160470083755E+00f), + HWY_REP4(7.4245873327820566E-01f)}; + HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f), + HWY_REP4(1.0096718572241148E+00f), + HWY_REP4(1.7409343003366853E-01f)}; + + const Rebind di; + const auto x_bits = BitCast(di, x); + + // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops + const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3 + // Shifted exponent = log2; also used to clear mantissa. + const auto exp_shifted = ShiftRight<23>(exp_bits); + const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted))); + const auto exp_val = ConvertTo(df, exp_shifted); + return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q), + exp_val); +} + +// max relative error ~3e-7 +template +V FastPow2f(const DF df, V x) { + const Rebind di; + auto floorx = Floor(x); + auto exp = + BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127)))); + auto frac = Sub(x, floorx); + auto num = Add(frac, Set(df, 1.01749063e+01)); + num = MulAdd(num, frac, Set(df, 4.88687798e+01)); + num = MulAdd(num, frac, Set(df, 9.85506591e+01)); + num = Mul(num, exp); + auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02)); + den = MulAdd(den, frac, Set(df, -1.94414990e+01)); + den = MulAdd(den, frac, Set(df, 9.85506633e+01)); + return Div(num, den); +} + +inline float FastPow2f(float f) { + HWY_CAPPED(float, 1) D; + return GetLane(FastPow2f(D, Set(D, f))); +} + +// The following functions modulate an exponent (out_val) and return the updated +// value. Their descriptor is limited to 8 lanes for 8x8 blocks. + +template +V ComputeMask(const D d, const V out_val) { + const auto kBase = Set(d, -0.74174993f); + const auto kMul4 = Set(d, 3.2353257320940401f); + const auto kMul2 = Set(d, 12.906028311180409f); + const auto kOffset2 = Set(d, 305.04035728311436f); + const auto kMul3 = Set(d, 5.0220313103171232f); + const auto kOffset3 = Set(d, 2.1925739705298404f); + const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3); + const auto kMul0 = Set(d, 0.74760422233706747f); + const auto k1 = Set(d, 1.0f); + + // Avoid division by zero. + const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f)); + const auto v2 = Div(k1, Add(v1, kOffset2)); + const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3)); + const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4)); + // TODO(jyrki): + // A log or two here could make sense. In butteraugli we have effectively + // log(log(x + C)) for this kind of use, as a single log is used in + // saturating visual masking and here the modulation values are exponential, + // another log would counter that. + return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3)))); +} + +// mul and mul2 represent a scaling difference between jxl and butteraugli. +static const float kSGmul = 226.0480446705883f; +static const float kSGmul2 = 1.0f / 73.377132366608819f; +static const float kLog2 = 0.693147181f; +// Includes correction factor for std::log -> log2. +static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2; +static const float kSGVOffset = 7.14672470003f; + +template +V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) { + // The opsin space in jxl is the cubic root of photons, i.e., v * v * v + // is related to the number of photons. + // + // SimpleGamma(v * v * v) is the psychovisual space in butteraugli. + // This ratio allows quantization to move from jxl's opsin space to + // butteraugli's log-gamma space. + static const float kEpsilon = 1e-2; + static const float kNumOffset = kEpsilon / kInputScaling / kInputScaling; + static const float kNumMul = kSGRetMul * 3 * kSGmul; + static const float kVOffset = (kSGVOffset * kLog2 + kEpsilon) / kInputScaling; + static const float kDenMul = kLog2 * kSGmul * kInputScaling * kInputScaling; + + v = ZeroIfNegative(v); + const auto num_mul = Set(d, kNumMul); + const auto num_offset = Set(d, kNumOffset); + const auto den_offset = Set(d, kVOffset); + const auto den_mul = Set(d, kDenMul); + + const auto v2 = Mul(v, v); + + const auto num = MulAdd(num_mul, v2, num_offset); + const auto den = MulAdd(Mul(den_mul, v), v2, den_offset); + return invert ? Div(num, den) : Div(den, num); +} + +template +static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) { + using DScalar = HWY_CAPPED(float, 1); + auto vscalar = Load(DScalar(), &v); + return GetLane( + RatioOfDerivativesOfCubicRootToSimpleGamma(DScalar(), vscalar)); +} + +// TODO(veluca): this function computes an approximation of the derivative of +// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or +// exact derivatives. For reference, SimpleGamma was: +/* +template +V SimpleGamma(const D d, V v) { + // A simple HDR compatible gamma function. + const auto mul = Set(d, kSGmul); + const auto kRetMul = Set(d, kSGRetMul); + const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f); + const auto kVOffset = Set(d, kSGVOffset); + + v *= mul; + + // This should happen rarely, but may lead to a NaN, which is rather + // undesirable. Since negative photons don't exist we solve the NaNs by + // clamping here. + // TODO(veluca): with FastLog2f, this no longer leads to NaNs. + v = ZeroIfNegative(v); + return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd; +} +*/ + +template +V GammaModulation(const D d, const size_t x, const size_t y, + const RowBuffer& input, const V out_val) { + static const float kBias = 0.16f / kInputScaling; + static const float kScale = kInputScaling / 64.0f; + auto overall_ratio = Zero(d); + const auto bias = Set(d, kBias); + const auto scale = Set(d, kScale); + const float* const JXL_RESTRICT block_start = input.Row(y) + x; + for (size_t dy = 0; dy < 8; ++dy) { + const float* const JXL_RESTRICT row_in = block_start + dy * input.stride(); + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { + const auto iny = Add(Load(d, row_in + dx), bias); + const auto ratio_g = + RatioOfDerivativesOfCubicRootToSimpleGamma(d, iny); + overall_ratio = Add(overall_ratio, ratio_g); + } + } + overall_ratio = Mul(SumOfLanes(d, overall_ratio), scale); + // ideally -1.0, but likely optimal correction adds some entropy, so slightly + // less than that. + // ln(2) constant folded in because we want std::log but have FastLog2f. + const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f); + return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val); +} + +// Change precision in 8x8 blocks that have high frequency content. +template +V HfModulation(const D d, const size_t x, const size_t y, + const RowBuffer& input, const V out_val) { + // Zero out the invalid differences for the rightmost value per row. + const Rebind du; + HWY_ALIGN constexpr uint32_t kMaskRight[8] = {~0u, ~0u, ~0u, ~0u, + ~0u, ~0u, ~0u, 0}; + + auto sum = Zero(d); // sum of absolute differences with right and below + static const float kSumCoeff = -2.0052193233688884f * kInputScaling / 112.0; + auto sumcoeff = Set(d, kSumCoeff); + + const float* const JXL_RESTRICT block_start = input.Row(y) + x; + for (size_t dy = 0; dy < 8; ++dy) { + const float* JXL_RESTRICT row_in = block_start + dy * input.stride(); + const float* JXL_RESTRICT row_in_next = + dy == 7 ? row_in : row_in + input.stride(); + + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { + const auto p = Load(d, row_in + dx); + const auto pr = LoadU(d, row_in + dx + 1); + const auto mask = BitCast(d, Load(du, kMaskRight + dx)); + sum = Add(sum, And(mask, AbsDiff(p, pr))); + const auto pd = Load(d, row_in_next + dx); + sum = Add(sum, AbsDiff(p, pd)); + } + } + + sum = SumOfLanes(d, sum); + return MulAdd(sum, sumcoeff, out_val); +} + +void PerBlockModulations(const float y_quant_01, const RowBuffer& input, + const size_t yb0, const size_t yblen, + RowBuffer* aq_map) { + static const float kAcQuant = 0.841f; + float base_level = 0.48f * kAcQuant; + float kDampenRampStart = 9.0f; + float kDampenRampEnd = 65.0f; + float dampen = 1.0f; + if (y_quant_01 >= kDampenRampStart) { + dampen = 1.0f - ((y_quant_01 - kDampenRampStart) / + (kDampenRampEnd - kDampenRampStart)); + if (dampen < 0) { + dampen = 0; + } + } + const float mul = kAcQuant * dampen; + const float add = (1.0f - dampen) * base_level; + for (size_t iy = 0; iy < yblen; iy++) { + const size_t yb = yb0 + iy; + const size_t y = yb * 8; + float* const JXL_RESTRICT row_out = aq_map->Row(yb); + const HWY_CAPPED(float, 8) df; + for (size_t ix = 0; ix < aq_map->xsize(); ix++) { + size_t x = ix * 8; + auto out_val = Set(df, row_out[ix]); + out_val = ComputeMask(df, out_val); + out_val = HfModulation(df, x, y, input, out_val); + out_val = GammaModulation(df, x, y, input, out_val); + // We want multiplicative quantization field, so everything + // until this point has been modulating the exponent. + row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add; + } + } +} + +template +V MaskingSqrt(const D d, V v) { + static const float kLogOffset = 28; + static const float kMul = 211.50759899638012f; + const auto mul_v = Set(d, kMul * 1e8); + const auto offset_v = Set(d, kLogOffset); + return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v))); +} + +template +void Sort4(V& min0, V& min1, V& min2, V& min3) { + const auto tmp0 = Min(min0, min1); + const auto tmp1 = Max(min0, min1); + const auto tmp2 = Min(min2, min3); + const auto tmp3 = Max(min2, min3); + const auto tmp4 = Max(tmp0, tmp2); + const auto tmp5 = Min(tmp1, tmp3); + min0 = Min(tmp0, tmp2); + min1 = Min(tmp4, tmp5); + min2 = Max(tmp4, tmp5); + min3 = Max(tmp1, tmp3); +} + +template +void UpdateMin4(const V v, V& min0, V& min1, V& min2, V& min3) { + const auto tmp0 = Max(min0, v); + const auto tmp1 = Max(min1, tmp0); + const auto tmp2 = Max(min2, tmp1); + min0 = Min(min0, v); + min1 = Min(min1, tmp0); + min2 = Min(min2, tmp1); + min3 = Min(min3, tmp2); +} + +// Computes a linear combination of the 4 lowest values of the 3x3 neighborhood +// of each pixel. Output is downsampled 2x. +void FuzzyErosion(const RowBuffer& pre_erosion, const size_t yb0, + const size_t yblen, RowBuffer* tmp, + RowBuffer* aq_map) { + int xsize_blocks = aq_map->xsize(); + int xsize = pre_erosion.xsize(); + HWY_FULL(float) d; + const auto mul0 = Set(d, 0.125f); + const auto mul1 = Set(d, 0.075f); + const auto mul2 = Set(d, 0.06f); + const auto mul3 = Set(d, 0.05f); + for (size_t iy = 0; iy < 2 * yblen; ++iy) { + size_t y = 2 * yb0 + iy; + const float* JXL_RESTRICT rowt = pre_erosion.Row(y - 1); + const float* JXL_RESTRICT rowm = pre_erosion.Row(y); + const float* JXL_RESTRICT rowb = pre_erosion.Row(y + 1); + float* row_out = tmp->Row(y); + for (int x = 0; x < xsize; x += Lanes(d)) { + int xm1 = x - 1; + int xp1 = x + 1; + auto min0 = LoadU(d, rowm + x); + auto min1 = LoadU(d, rowm + xm1); + auto min2 = LoadU(d, rowm + xp1); + auto min3 = LoadU(d, rowt + xm1); + Sort4(min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowt + x), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowt + xp1), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowb + xm1), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowb + x), min0, min1, min2, min3); + UpdateMin4(LoadU(d, rowb + xp1), min0, min1, min2, min3); + const auto v = Add(Add(Mul(mul0, min0), Mul(mul1, min1)), + Add(Mul(mul2, min2), Mul(mul3, min3))); + Store(v, d, row_out + x); + } + if (iy % 2 == 1) { + const float* JXL_RESTRICT row_out0 = tmp->Row(y - 1); + float* JXL_RESTRICT aq_out = aq_map->Row(yb0 + iy / 2); + for (int bx = 0, x = 0; bx < xsize_blocks; ++bx, x += 2) { + aq_out[bx] = + (row_out[x] + row_out[x + 1] + row_out0[x] + row_out0[x + 1]); + } + } + } +} + +void ComputePreErosion(const RowBuffer& input, const size_t xsize, + const size_t y0, const size_t ylen, int border, + float* diff_buffer, RowBuffer* pre_erosion) { + const size_t xsize_out = xsize / 4; + const size_t y0_out = y0 / 4; + + // The XYB gamma is 3.0 to be able to decode faster with two muls. + // Butteraugli's gamma is matching the gamma of human eye, around 2.6. + // We approximate the gamma difference by adding one cubic root into + // the adaptive quantization. This gives us a total gamma of 2.6666 + // for quantization uses. + static const float match_gamma_offset = 0.019 / kInputScaling; + + const HWY_CAPPED(float, 8) df; + + static const float limit = 0.2f; + // Computes image (padded to multiple of 8x8) of local pixel differences. + // Subsample both directions by 4. + for (size_t iy = 0; iy < ylen; ++iy) { + size_t y = y0 + iy; + const float* row_in = input.Row(y); + const float* row_in1 = input.Row(y + 1); + const float* row_in2 = input.Row(y - 1); + float* JXL_RESTRICT row_out = diff_buffer; + const auto match_gamma_offset_v = Set(df, match_gamma_offset); + const auto quarter = Set(df, 0.25f); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto in = LoadU(df, row_in + x); + const auto in_r = LoadU(df, row_in + x + 1); + const auto in_l = LoadU(df, row_in + x - 1); + const auto in_t = LoadU(df, row_in2 + x); + const auto in_b = LoadU(df, row_in1 + x); + const auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b))); + const auto gammacv = + RatioOfDerivativesOfCubicRootToSimpleGamma( + df, Add(in, match_gamma_offset_v)); + auto diff = Mul(gammacv, Sub(in, base)); + diff = Mul(diff, diff); + diff = Min(diff, Set(df, limit)); + diff = MaskingSqrt(df, diff); + if ((iy & 3) != 0) { + diff = Add(diff, LoadU(df, row_out + x)); + } + StoreU(diff, df, row_out + x); + } + if (iy % 4 == 3) { + size_t y_out = y0_out + iy / 4; + float* row_dout = pre_erosion->Row(y_out); + for (size_t x = 0; x < xsize_out; x++) { + row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] + + row_out[x * 4 + 2] + row_out[x * 4 + 3]) * + 0.25f; + } + pre_erosion->PadRow(y_out, xsize_out, border); + } + } +} + +} // namespace + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +HWY_EXPORT(ComputePreErosion); +HWY_EXPORT(FuzzyErosion); +HWY_EXPORT(PerBlockModulations); + +namespace { + +static constexpr int kPreErosionBorder = 1; + +} // namespace + +void ComputeAdaptiveQuantField(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + if (!m->use_adaptive_quantization) { + return; + } + int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0; + jpeg_component_info* y_comp = &cinfo->comp_info[y_channel]; + int y_quant_01 = cinfo->quant_tbl_ptrs[y_comp->quant_tbl_no]->quantval[1]; + if (m->next_iMCU_row == 0) { + m->input_buffer[y_channel].CopyRow(-1, 0, 1); + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + size_t last_row = m->ysize_blocks * DCTSIZE - 1; + m->input_buffer[y_channel].CopyRow(last_row + 1, last_row, 1); + } + const RowBuffer& input = m->input_buffer[y_channel]; + const size_t xsize_blocks = y_comp->width_in_blocks; + const size_t xsize = xsize_blocks * DCTSIZE; + const size_t yb0 = m->next_iMCU_row * cinfo->max_v_samp_factor; + const size_t yblen = cinfo->max_v_samp_factor; + size_t y0 = yb0 * DCTSIZE; + size_t ylen = cinfo->max_v_samp_factor * DCTSIZE; + if (y0 == 0) { + ylen += 4; + } else { + y0 += 4; + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + ylen -= 4; + } + HWY_DYNAMIC_DISPATCH(ComputePreErosion) + (input, xsize, y0, ylen, kPreErosionBorder, m->diff_buffer, &m->pre_erosion); + if (y0 == 0) { + m->pre_erosion.CopyRow(-1, 0, kPreErosionBorder); + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + size_t last_row = m->ysize_blocks * 2 - 1; + m->pre_erosion.CopyRow(last_row + 1, last_row, kPreErosionBorder); + } + HWY_DYNAMIC_DISPATCH(FuzzyErosion) + (m->pre_erosion, yb0, yblen, &m->fuzzy_erosion_tmp, &m->quant_field); + HWY_DYNAMIC_DISPATCH(PerBlockModulations) + (y_quant_01, input, yb0, yblen, &m->quant_field); + for (int y = 0; y < cinfo->max_v_samp_factor; ++y) { + float* row = m->quant_field.Row(yb0 + y); + for (size_t x = 0; x < xsize_blocks; ++x) { + row[x] = std::max(0.0f, (0.6f / row[x]) - 1.0f); + } + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.h b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.h new file mode 100644 index 0000000000..d8537e85df --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/adaptive_quantization.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_ +#define LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ComputeAdaptiveQuantField(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ADAPTIVE_QUANTIZATION_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/bit_writer.cc b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.cc new file mode 100644 index 0000000000..9788f35b8d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.cc @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/bit_writer.h" + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void JpegBitWriterInit(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + JpegBitWriter* bw = &m->bw; + size_t buffer_size = m->blocks_per_iMCU_row * (DCTSIZE2 * 16 + 8) + (1 << 16); + bw->cinfo = cinfo; + bw->data = Allocate(cinfo, buffer_size, JPOOL_IMAGE); + bw->len = buffer_size; + bw->pos = 0; + bw->output_pos = 0; + bw->put_buffer = 0; + bw->free_bits = 64; + bw->healthy = true; +} + +bool EmptyBitWriterBuffer(JpegBitWriter* bw) { + while (bw->output_pos < bw->pos) { + j_compress_ptr cinfo = bw->cinfo; + if (cinfo->dest->free_in_buffer == 0 && + !(*cinfo->dest->empty_output_buffer)(cinfo)) { + return false; + } + size_t buflen = bw->pos - bw->output_pos; + size_t copylen = std::min(cinfo->dest->free_in_buffer, buflen); + memcpy(cinfo->dest->next_output_byte, bw->data + bw->output_pos, copylen); + bw->output_pos += copylen; + cinfo->dest->free_in_buffer -= copylen; + cinfo->dest->next_output_byte += copylen; + } + bw->output_pos = bw->pos = 0; + return true; +} + +void JumpToByteBoundary(JpegBitWriter* bw) { + size_t n_bits = bw->free_bits & 7u; + if (n_bits > 0) { + WriteBits(bw, n_bits, (1u << n_bits) - 1); + } + bw->put_buffer <<= bw->free_bits; + while (bw->free_bits <= 56) { + int c = (bw->put_buffer >> 56) & 0xFF; + EmitByte(bw, c); + bw->put_buffer <<= 8; + bw->free_bits += 8; + } + bw->put_buffer = 0; + bw->free_bits = 64; +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/bit_writer.h b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.h new file mode 100644 index 0000000000..3adf1eaca1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/bit_writer.h @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_BIT_WRITER_H_ +#define LIB_JPEGLI_BIT_WRITER_H_ + +#include +#include + +#include "lib/jpegli/common.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +// Handles the packing of bits into output bytes. +struct JpegBitWriter { + j_compress_ptr cinfo; + uint8_t* data; + size_t len; + size_t pos; + size_t output_pos; + uint64_t put_buffer; + int free_bits; + bool healthy; +}; + +void JpegBitWriterInit(j_compress_ptr cinfo); + +bool EmptyBitWriterBuffer(JpegBitWriter* bw); + +void JumpToByteBoundary(JpegBitWriter* bw); + +// Returns non-zero if and only if x has a zero byte, i.e. one of +// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero. +static JXL_INLINE uint64_t HasZeroByte(uint64_t x) { + return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL; +} + +/** + * Writes the given byte to the output, writes an extra zero if byte is 0xFF. + * + * This method is "careless" - caller must make sure that there is enough + * space in the output buffer. Emits up to 2 bytes to buffer. + */ +static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) { + bw->data[bw->pos++] = byte; + if (byte == 0xFF) bw->data[bw->pos++] = 0; +} + +static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw) { + // At this point we are ready to emit the bytes of put_buffer to the output. + // The JPEG format requires that after every 0xff byte in the entropy + // coded section, there is a zero byte, therefore we first check if any of + // the bytes of put_buffer is 0xFF. + if (HasZeroByte(~bw->put_buffer)) { + // We have a 0xFF byte somewhere, examine each byte and append a zero + // byte if necessary. + EmitByte(bw, (bw->put_buffer >> 56) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 48) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 40) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 32) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 24) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 16) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 8) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 0) & 0xFF); + } else { + // We don't have any 0xFF bytes, output all 8 bytes without checking. + StoreBE64(bw->put_buffer, bw->data + bw->pos); + bw->pos += 8; + } +} + +static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) { + // This is an optimization; if everything goes well, + // then |nbits| is positive; if non-existing Huffman symbol is going to be + // encoded, its length should be zero; later encoder could check the + // "health" of JpegBitWriter. + if (nbits == 0) { + bw->healthy = false; + return; + } + bw->free_bits -= nbits; + if (bw->free_bits < 0) { + bw->put_buffer <<= (bw->free_bits + nbits); + bw->put_buffer |= (bits >> -bw->free_bits); + DischargeBitBuffer(bw); + bw->free_bits += 64; + bw->put_buffer = nbits; + } + bw->put_buffer <<= nbits; + bw->put_buffer |= bits; +} + +} // namespace jpegli +#endif // LIB_JPEGLI_BIT_WRITER_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/bitstream.cc b/third-party/libjxl/libjxl/lib/jpegli/bitstream.cc new file mode 100644 index 0000000000..3448367dde --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/bitstream.cc @@ -0,0 +1,452 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/bitstream.h" + +#include + +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" + +namespace jpegli { + +void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize) { + size_t pos = 0; + while (pos < bufsize) { + if (cinfo->dest->free_in_buffer == 0 && + !(*cinfo->dest->empty_output_buffer)(cinfo)) { + JPEGLI_ERROR("Destination suspension is not supported in markers."); + } + size_t len = std::min(cinfo->dest->free_in_buffer, bufsize - pos); + memcpy(cinfo->dest->next_output_byte, buf + pos, len); + pos += len; + cinfo->dest->free_in_buffer -= len; + cinfo->dest->next_output_byte += len; + } +} + +void WriteOutput(j_compress_ptr cinfo, const std::vector& bytes) { + WriteOutput(cinfo, bytes.data(), bytes.size()); +} + +void WriteOutput(j_compress_ptr cinfo, std::initializer_list bytes) { + WriteOutput(cinfo, bytes.begin(), bytes.size()); +} + +void EncodeAPP0(j_compress_ptr cinfo) { + WriteOutput(cinfo, + {0xff, 0xe0, 0, 16, 'J', 'F', 'I', 'F', '\0', + cinfo->JFIF_major_version, cinfo->JFIF_minor_version, + cinfo->density_unit, static_cast(cinfo->X_density >> 8), + static_cast(cinfo->X_density & 0xff), + static_cast(cinfo->Y_density >> 8), + static_cast(cinfo->Y_density & 0xff), 0, 0}); +} + +void EncodeAPP14(j_compress_ptr cinfo) { + uint8_t color_transform = cinfo->jpeg_color_space == JCS_YCbCr ? 1 + : cinfo->jpeg_color_space == JCS_YCCK ? 2 + : 0; + WriteOutput(cinfo, {0xff, 0xee, 0, 14, 'A', 'd', 'o', 'b', 'e', 0, 100, 0, 0, + 0, 0, color_transform}); +} + +void WriteFileHeader(j_compress_ptr cinfo) { + WriteOutput(cinfo, {0xFF, 0xD8}); // SOI + if (cinfo->write_JFIF_header) { + EncodeAPP0(cinfo); + } + if (cinfo->write_Adobe_marker) { + EncodeAPP14(cinfo); + } +} + +bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables) { + uint8_t data[4 + NUM_QUANT_TBLS * (1 + 2 * DCTSIZE2)]; // 520 bytes + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xDB; + pos += 2; // Length will be filled in later. + + int send_table[NUM_QUANT_TBLS] = {}; + if (write_all_tables) { + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + if (cinfo->quant_tbl_ptrs[i]) send_table[i] = 1; + } + } else { + for (int c = 0; c < cinfo->num_components; ++c) { + send_table[cinfo->comp_info[c].quant_tbl_no] = 1; + } + } + + bool is_baseline = true; + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + if (!send_table[i]) continue; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[i]; + if (quant_table == nullptr) { + JPEGLI_ERROR("Missing quant table %d", i); + } + int precision = 0; + for (size_t k = 0; k < DCTSIZE2; ++k) { + if (quant_table->quantval[k] > 255) { + precision = 1; + is_baseline = false; + } + } + if (quant_table->sent_table) { + continue; + } + data[pos++] = (precision << 4) + i; + for (size_t j = 0; j < DCTSIZE2; ++j) { + int val_idx = kJPEGNaturalOrder[j]; + int val = quant_table->quantval[val_idx]; + if (val == 0) { + JPEGLI_ERROR("Invalid quantval 0."); + } + if (precision) { + data[pos++] = val >> 8; + } + data[pos++] = val & 0xFFu; + } + quant_table->sent_table = TRUE; + } + if (pos > 4) { + data[2] = (pos - 2) >> 8u; + data[3] = (pos - 2) & 0xFFu; + WriteOutput(cinfo, data, pos); + } + return is_baseline; +} + +void EncodeSOF(j_compress_ptr cinfo, bool is_baseline) { + if (cinfo->data_precision != kJpegPrecision) { + is_baseline = false; + JPEGLI_ERROR("Unsupported data precision %d", cinfo->data_precision); + } + const uint8_t marker = cinfo->progressive_mode ? 0xc2 + : is_baseline ? 0xc0 + : 0xc1; + const size_t n_comps = cinfo->num_components; + const size_t marker_len = 8 + 3 * n_comps; + std::vector data(marker_len + 2); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = marker; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + data[pos++] = kJpegPrecision; + data[pos++] = cinfo->image_height >> 8u; + data[pos++] = cinfo->image_height & 0xFFu; + data[pos++] = cinfo->image_width >> 8u; + data[pos++] = cinfo->image_width & 0xFFu; + data[pos++] = n_comps; + for (size_t i = 0; i < n_comps; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + data[pos++] = comp->component_id; + data[pos++] = ((comp->h_samp_factor << 4u) | (comp->v_samp_factor)); + const uint32_t quant_idx = comp->quant_tbl_no; + if (cinfo->quant_tbl_ptrs[quant_idx] == nullptr) { + JPEGLI_ERROR("Invalid component quant table index %u.", quant_idx); + } + data[pos++] = quant_idx; + } + WriteOutput(cinfo, data); +} + +void WriteFrameHeader(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + bool is_baseline = EncodeDQT(cinfo, /*write_all_tables=*/false); + if (cinfo->progressive_mode || cinfo->arith_code || + cinfo->data_precision != 8) { + is_baseline = false; + } + for (size_t i = 0; i < m->num_huffman_tables; ++i) { + int slot_id = m->slot_id_map[i]; + if (slot_id > 0x11 || (slot_id > 0x01 && slot_id < 0x10)) { + is_baseline = false; + } + } + EncodeSOF(cinfo, is_baseline); +} + +void EncodeDRI(j_compress_ptr cinfo) { + WriteOutput(cinfo, {0xFF, 0xDD, 0, 4, + static_cast(cinfo->restart_interval >> 8), + static_cast(cinfo->restart_interval & 0xFF)}); +} + +void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num) { + jpeg_comp_master* m = cinfo->master; + size_t marker_len = 2; + for (size_t i = 0; i < num; ++i) { + const JHUFF_TBL& table = m->huffman_tables[offset + i]; + if (table.sent_table) continue; + marker_len += kJpegHuffmanMaxBitLength + 1; + for (size_t j = 0; j <= kJpegHuffmanMaxBitLength; ++j) { + marker_len += table.bits[j]; + } + } + std::vector data(marker_len + 2); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xC4; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + for (size_t i = 0; i < num; ++i) { + const JHUFF_TBL& table = m->huffman_tables[offset + i]; + if (table.sent_table) continue; + size_t total_count = 0; + for (size_t i = 0; i <= kJpegHuffmanMaxBitLength; ++i) { + total_count += table.bits[i]; + } + data[pos++] = m->slot_id_map[offset + i]; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + data[pos++] = table.bits[i]; + } + for (size_t i = 0; i < total_count; ++i) { + data[pos++] = table.huffval[i]; + } + } + if (marker_len > 2) { + WriteOutput(cinfo, data); + } +} + +void EncodeSOS(j_compress_ptr cinfo, int scan_index) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const size_t marker_len = 6 + 2 * scan_info->comps_in_scan; + std::vector data(marker_len + 2); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xDA; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + data[pos++] = scan_info->comps_in_scan; + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + data[pos++] = cinfo->comp_info[comp_idx].component_id; + int dc_slot_id = m->slot_id_map[m->context_map[comp_idx]]; + int ac_context = m->ac_ctx_offset[scan_index] + i; + int ac_slot_id = m->slot_id_map[m->context_map[ac_context]]; + data[pos++] = (dc_slot_id << 4u) + (ac_slot_id - 16); + } + data[pos++] = scan_info->Ss; + data[pos++] = scan_info->Se; + data[pos++] = ((scan_info->Ah << 4u) | (scan_info->Al)); + WriteOutput(cinfo, data); +} + +void WriteScanHeader(j_compress_ptr cinfo, int scan_index) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + cinfo->restart_interval = m->scan_token_info[scan_index].restart_interval; + if (cinfo->restart_interval != m->last_restart_interval) { + EncodeDRI(cinfo); + m->last_restart_interval = cinfo->restart_interval; + } + size_t num_dht = 0; + if (scan_index == 0) { + // For the first scan we emit all DC and at most 4 AC Huffman codes. + for (size_t i = 0, num_ac = 0; i < m->num_huffman_tables; ++i) { + if (m->slot_id_map[i] >= 16 && num_ac++ >= 4) break; + ++num_dht; + } + } else if (scan_info->Ss > 0) { + // For multi-scan sequential and progressive DC scans we have already + // emitted all Huffman codes that we need before the first scan. For + // progressive AC scans we only need at most one new Huffman code. + if (m->context_map[m->ac_ctx_offset[scan_index]] == m->next_dht_index) { + num_dht = 1; + } + } + if (num_dht > 0) { + EncodeDHT(cinfo, m->next_dht_index, num_dht); + m->next_dht_index += num_dht; + } + EncodeSOS(cinfo, scan_index); +} + +void WriteBlock(const int32_t* JXL_RESTRICT symbols, + const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros, + const bool emit_eob, + const HuffmanCodeTable* JXL_RESTRICT dc_code, + const HuffmanCodeTable* JXL_RESTRICT ac_code, + JpegBitWriter* JXL_RESTRICT bw) { + int symbol = symbols[0]; + WriteBits(bw, dc_code->depth[symbol], dc_code->code[symbol] | extra_bits[0]); + for (int i = 1; i < num_nonzeros; ++i) { + symbol = symbols[i]; + if (symbol > 255) { + WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]); + symbol -= 256; + if (symbol > 255) { + WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]); + symbol -= 256; + if (symbol > 255) { + WriteBits(bw, ac_code->depth[0xf0], ac_code->code[0xf0]); + symbol -= 256; + } + } + } + WriteBits(bw, ac_code->depth[symbol], + ac_code->code[symbol] | extra_bits[i]); + } + if (emit_eob) { + WriteBits(bw, ac_code->depth[0], ac_code->code[0]); + } +} + +namespace { + +static JXL_INLINE void EmitMarker(JpegBitWriter* bw, int marker) { + bw->data[bw->pos++] = 0xFF; + bw->data[bw->pos++] = marker; +} + +void WriteTokens(j_compress_ptr cinfo, int scan_index, JpegBitWriter* bw) { + jpeg_comp_master* m = cinfo->master; + HuffmanCodeTable* coding_tables = &m->coding_tables[0]; + int next_restart_marker = 0; + const ScanTokenInfo& sti = m->scan_token_info[scan_index]; + size_t num_token_arrays = m->cur_token_array + 1; + size_t total_tokens = 0; + size_t restart_idx = 0; + size_t next_restart = sti.restarts[restart_idx]; + uint8_t* context_map = m->context_map; + for (size_t i = 0; i < num_token_arrays; ++i) { + Token* tokens = m->token_arrays[i].tokens; + size_t num_tokens = m->token_arrays[i].num_tokens; + if (sti.token_offset < total_tokens + num_tokens && + total_tokens < sti.token_offset + sti.num_tokens) { + size_t start_ix = + total_tokens < sti.token_offset ? sti.token_offset - total_tokens : 0; + size_t end_ix = std::min(sti.token_offset + sti.num_tokens - total_tokens, + num_tokens); + size_t cycle_len = bw->len / 8; + size_t next_cycle = cycle_len; + for (size_t i = start_ix; i < end_ix; ++i) { + if (total_tokens + i == next_restart) { + JumpToByteBoundary(bw); + EmitMarker(bw, 0xD0 + next_restart_marker); + next_restart_marker += 1; + next_restart_marker &= 0x7; + next_restart = sti.restarts[++restart_idx]; + } + Token t = tokens[i]; + const HuffmanCodeTable* code = &coding_tables[context_map[t.context]]; + WriteBits(bw, code->depth[t.symbol], code->code[t.symbol] | t.bits); + if (--next_cycle == 0) { + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR( + "Output suspension is not supported in " + "finish_compress"); + } + next_cycle = cycle_len; + } + } + } + total_tokens += num_tokens; + } +} + +void WriteACRefinementTokens(j_compress_ptr cinfo, int scan_index, + JpegBitWriter* bw) { + jpeg_comp_master* m = cinfo->master; + const ScanTokenInfo& sti = m->scan_token_info[scan_index]; + const uint8_t context = m->ac_ctx_offset[scan_index]; + const HuffmanCodeTable* code = &m->coding_tables[m->context_map[context]]; + size_t cycle_len = bw->len / 64; + size_t next_cycle = cycle_len; + size_t refbit_idx = 0; + size_t eobrun_idx = 0; + size_t restart_idx = 0; + size_t next_restart = sti.restarts[restart_idx]; + int next_restart_marker = 0; + for (size_t i = 0; i < sti.num_tokens; ++i) { + if (i == next_restart) { + JumpToByteBoundary(bw); + EmitMarker(bw, 0xD0 + next_restart_marker); + next_restart_marker += 1; + next_restart_marker &= 0x7; + next_restart = sti.restarts[++restart_idx]; + } + RefToken t = sti.tokens[i]; + int symbol = t.symbol & 253; + uint16_t bits = 0; + if ((symbol & 1) == 0) { + int r = symbol >> 4; + if (r > 0 && r < 15) { + bits = sti.eobruns[eobrun_idx++]; + } + } else { + bits = (t.symbol >> 1) & 1; + } + WriteBits(bw, code->depth[symbol], code->code[symbol] | bits); + for (int j = 0; j < t.refbits; ++j) { + WriteBits(bw, 1, sti.refbits[refbit_idx++]); + } + if (--next_cycle == 0) { + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR("Output suspension is not supported in finish_compress"); + } + next_cycle = cycle_len; + } + } +} + +void WriteDCRefinementBits(j_compress_ptr cinfo, int scan_index, + JpegBitWriter* bw) { + jpeg_comp_master* m = cinfo->master; + const ScanTokenInfo& sti = m->scan_token_info[scan_index]; + size_t restart_idx = 0; + size_t next_restart = sti.restarts[restart_idx]; + int next_restart_marker = 0; + size_t cycle_len = bw->len * 4; + size_t next_cycle = cycle_len; + size_t refbit_idx = 0; + for (size_t i = 0; i < sti.num_tokens; ++i) { + if (i == next_restart) { + JumpToByteBoundary(bw); + EmitMarker(bw, 0xD0 + next_restart_marker); + next_restart_marker += 1; + next_restart_marker &= 0x7; + next_restart = sti.restarts[++restart_idx]; + } + WriteBits(bw, 1, sti.refbits[refbit_idx++]); + if (--next_cycle == 0) { + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR( + "Output suspension is not supported in " + "finish_compress"); + } + next_cycle = cycle_len; + } + } +} + +} // namespace + +void WriteScanData(j_compress_ptr cinfo, int scan_index) { + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + JpegBitWriter* bw = &cinfo->master->bw; + if (scan_info->Ah == 0) { + WriteTokens(cinfo, scan_index, bw); + } else if (scan_info->Ss > 0) { + WriteACRefinementTokens(cinfo, scan_index, bw); + } else { + WriteDCRefinementBits(cinfo, scan_index, bw); + } + if (!bw->healthy) { + JPEGLI_ERROR("Unknown Huffman coded symbol found in scan %d", scan_index); + } + JumpToByteBoundary(bw); + if (!EmptyBitWriterBuffer(bw)) { + JPEGLI_ERROR("Output suspension is not supported in finish_compress"); + } +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/bitstream.h b/third-party/libjxl/libjxl/lib/jpegli/bitstream.h new file mode 100644 index 0000000000..aa54c73d7e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/bitstream.h @@ -0,0 +1,44 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_BITSTREAM_H_ +#define LIB_JPEGLI_BITSTREAM_H_ + +#include +#include + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void WriteOutput(j_compress_ptr cinfo, const uint8_t* buf, size_t bufsize); +void WriteOutput(j_compress_ptr cinfo, const std::vector& bytes); +void WriteOutput(j_compress_ptr cinfo, std::initializer_list bytes); + +void EncodeAPP0(j_compress_ptr cinfo); +void EncodeAPP14(j_compress_ptr cinfo); +void WriteFileHeader(j_compress_ptr cinfo); + +// Returns true of only baseline 8-bit tables are used. +bool EncodeDQT(j_compress_ptr cinfo, bool write_all_tables); +void EncodeSOF(j_compress_ptr cinfo, bool is_baseline); +void WriteFrameHeader(j_compress_ptr cinfo); + +void EncodeDRI(j_compress_ptr cinfo); +void EncodeDHT(j_compress_ptr cinfo, size_t offset, size_t num); +void EncodeSOS(j_compress_ptr cinfo, int scan_index); +void WriteScanHeader(j_compress_ptr cinfo, int scan_index); + +void WriteBlock(const int32_t* JXL_RESTRICT symbols, + const int32_t* JXL_RESTRICT extra_bits, const int num_nonzeros, + const bool emit_eob, + const HuffmanCodeTable* JXL_RESTRICT dc_code, + const HuffmanCodeTable* JXL_RESTRICT ac_code, + JpegBitWriter* JXL_RESTRICT bw); +void WriteScanData(j_compress_ptr cinfo, int scan_index); + +} // namespace jpegli + +#endif // LIB_JPEGLI_BITSTREAM_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_quantize.cc b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.cc new file mode 100644 index 0000000000..1079c45c9f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.cc @@ -0,0 +1,533 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/color_quantize.h" + +#include +#include +#include + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" + +namespace jpegli { + +namespace { + +static constexpr int kNumColorCellBits[kMaxComponents] = {3, 4, 3, 3}; +static constexpr int kCompW[kMaxComponents] = {2, 3, 1, 1}; + +int Pow(int a, int b) { + int r = 1; + for (int i = 0; i < b; ++i) { + r *= a; + } + return r; +} + +int ComponentOrder(j_decompress_ptr cinfo, int i) { + if (cinfo->out_color_components == 3) { + return i < 2 ? 1 - i : i; + } + return i; +} + +int GetColorComponent(int i, int N) { + return (i * 255 + (N - 1) / 2) / (N - 1); +} + +} // namespace + +void ChooseColorMap1Pass(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + int components = cinfo->out_color_components; + int desired = std::min(cinfo->desired_number_of_colors, 256); + int num = 1; + while (Pow(num + 1, components) <= desired) { + ++num; + } + if (num == 1) { + JPEGLI_ERROR("Too few colors (%d) in requested colormap", desired); + } + int actual = Pow(num, components); + for (int i = 0; i < components; ++i) { + m->num_colors_[i] = num; + } + while (actual < desired) { + int total = actual; + for (int i = 0; i < components; ++i) { + int c = ComponentOrder(cinfo, i); + int new_total = (actual / m->num_colors_[c]) * (m->num_colors_[c] + 1); + if (new_total <= desired) { + ++m->num_colors_[c]; + actual = new_total; + } + } + if (actual == total) { + break; + } + } + cinfo->actual_number_of_colors = actual; + cinfo->colormap = (*cinfo->mem->alloc_sarray)( + reinterpret_cast(cinfo), JPOOL_IMAGE, actual, components); + int next_color[kMaxComponents] = {0}; + for (int i = 0; i < actual; ++i) { + for (int c = 0; c < components; ++c) { + cinfo->colormap[c][i] = + GetColorComponent(next_color[c], m->num_colors_[c]); + } + int c = components - 1; + while (c > 0 && next_color[c] + 1 == m->num_colors_[c]) { + next_color[c--] = 0; + } + ++next_color[c]; + } + if (!m->colormap_lut_) { + m->colormap_lut_ = Allocate(cinfo, components * 256, JPOOL_IMAGE); + } + int stride = actual; + for (int c = 0; c < components; ++c) { + int N = m->num_colors_[c]; + stride /= N; + for (int i = 0; i < 256; ++i) { + int index = ((2 * i - 1) * (N - 1) + 254) / 510; + m->colormap_lut_[c * 256 + i] = index * stride; + } + } +} + +namespace { + +// 2^13 priority levels for the PQ seems to be a good compromise between +// accuracy, running time and stack space usage. +static const int kMaxPriority = 1 << 13; +static const int kMaxLevel = 3; + +// This function is used in the multi-resolution grid to be able to compute +// the keys for the different resolutions by just shifting the first key. +inline int InterlaceBitsRGB(uint8_t r, uint8_t g, uint8_t b) { + int z = 0; + for (int i = 0; i < 7; ++i) { + z += (r >> 5) & 4; + z += (g >> 6) & 2; + z += (b >> 7); + z <<= 3; + r <<= 1; + g <<= 1; + b <<= 1; + } + z += (r >> 5) & 4; + z += (g >> 6) & 2; + z += (b >> 7); + return z; +} + +// This function will compute the actual priorities of the colors based on +// the current distance from the palette, the population count and the signals +// from the multi-resolution grid. +inline int Priority(int d, int n, const int* density, const int* radius) { + int p = d * n; + for (int level = 0; level < kMaxLevel; ++level) { + if (d > radius[level]) { + p += density[level] * (d - radius[level]); + } + } + return std::min(kMaxPriority - 1, p >> 4); +} + +inline int ColorIntQuadDistanceRGB(uint8_t r1, uint8_t g1, uint8_t b1, + uint8_t r2, uint8_t g2, uint8_t b2) { + // weights for the intensity calculation + static constexpr int ired = 2; + static constexpr int igreen = 5; + static constexpr int iblue = 1; + // normalization factor for the intensity calculation (2^ishift) + static constexpr int ishift = 3; + const int rd = r1 - r2; + const int gd = g1 - g2; + const int bd = b1 - b2; + const int id = ired * rd + igreen * gd + iblue * bd; + return rd * rd + gd * gd + bd * bd + ((id * id) >> (2 * ishift)); +} + +inline int ScaleQuadDistanceRGB(int d) { + return static_cast(sqrt(d * 0.25) + 0.5); +} + +// The function updates the minimal distances, the clustering and the +// quantization error after the insertion of the new color into the palette. +void AddToRGBPalette(const uint8_t* red, const uint8_t* green, + const uint8_t* blue, + const int* count, // histogram of colors + const int index, // index of color to be added + const int k, // size of current palette + const int n, // number of colors + int* dist, // array of distances from palette + int* cluster, // mapping of color indices to palette + int* center, // the inverse mapping + int64_t* error) { // measure of the quantization error + center[k] = index; + cluster[index] = k; + *error -= + static_cast(dist[index]) * static_cast(count[index]); + dist[index] = 0; + for (int j = 0; j < n; ++j) { + if (dist[j] > 0) { + const int d = ColorIntQuadDistanceRGB( + red[index], green[index], blue[index], red[j], green[j], blue[j]); + if (d < dist[j]) { + *error += static_cast((d - dist[j])) * + static_cast(count[j]); + dist[j] = d; + cluster[j] = k; + } + } + } +} + +struct RGBPixelHasher { + // A quick but good-enough hash to get 24 bits of RGB into the lower 12 bits. + size_t operator()(uint32_t a) const { return (a ^ (a >> 12)) * 0x9e3779b9; } +}; + +struct WangHasher { + // Thomas Wang's Hash. Nearly perfect and still quite fast. Above (for + // pixels) we use a simpler hash because the number of hash calls is + // proportional to the number of pixels and that hash dominates; we want the + // cost to be minimal and we start with a large table. We can use a better + // hash for the histogram since the number of hash calls is proportional to + // the number of unique colors in the image, which is hopefully much smaller. + // Note that the difference is slight; e.g. replacing RGBPixelHasher with + // WangHasher only slows things down by 5% on an Opteron. + size_t operator()(uint32_t a) const { + a = (a ^ 61) ^ (a >> 16); + a = a + (a << 3); + a = a ^ (a >> 4); + a = a * 0x27d4eb2d; + a = a ^ (a >> 15); + return a; + } +}; + +// Build an index of all the different colors in the input +// image. To do this we map the 24 bit RGB representation of the colors +// to a unique integer index assigned to the different colors in order of +// appearence in the image. Return the number of unique colors found. +// The colors are pre-quantized to 3 * 6 bits precision. +static int BuildRGBColorIndex(const uint8_t* const image, int const num_pixels, + int* const count, uint8_t* const red, + uint8_t* const green, uint8_t* const blue) { + // Impossible because rgb are in the low 24 bits, and the upper 8 bits is 0. + const uint32_t impossible_pixel_value = 0x10000000; + std::unordered_map index_map(1 << 12); + std::unordered_map::iterator index_map_lookup; + const uint8_t* imagep = &image[0]; + uint32_t prev_pixel = impossible_pixel_value; + int index = 0; + int n = 0; + for (int i = 0; i < num_pixels; ++i) { + uint8_t r = ((*imagep++) & 0xfc) + 2; + uint8_t g = ((*imagep++) & 0xfc) + 2; + uint8_t b = ((*imagep++) & 0xfc) + 2; + uint32_t pixel = (b << 16) | (g << 8) | r; + if (pixel != prev_pixel) { + prev_pixel = pixel; + index_map_lookup = index_map.find(pixel); + if (index_map_lookup != index_map.end()) { + index = index_map_lookup->second; + } else { + index_map[pixel] = index = n++; + red[index] = r; + green[index] = g; + blue[index] = b; + } + } + ++count[index]; + } + return n; +} + +} // namespace + +void ChooseColorMap2Pass(j_decompress_ptr cinfo) { + if (cinfo->out_color_space != JCS_RGB) { + JPEGLI_ERROR("Two-pass quantizer must use RGB output color space."); + } + jpeg_decomp_master* m = cinfo->master; + const size_t num_pixels = cinfo->output_width * cinfo->output_height; + const int max_color_count = std::max(num_pixels, 1u << 18); + const int max_palette_size = cinfo->desired_number_of_colors; + std::unique_ptr red(new uint8_t[max_color_count]); + std::unique_ptr green(new uint8_t[max_color_count]); + std::unique_ptr blue(new uint8_t[max_color_count]); + std::vector count(max_color_count, 0); + // number of colors + int n = BuildRGBColorIndex(m->pixels_, num_pixels, &count[0], &red[0], + &green[0], &blue[0]); + + std::vector dist(n, std::numeric_limits::max()); + std::vector cluster(n); + std::vector in_palette(n, false); + int center[256]; + int k = 0; // palette size + const int count_threshold = (num_pixels * 4) / max_palette_size; + static constexpr int kAveragePixelErrorThreshold = 1; + const int64_t error_threshold = num_pixels * kAveragePixelErrorThreshold; + int64_t error = 0; // quantization error + + int max_count = 0; + int winner = 0; + for (int i = 0; i < n; ++i) { + if (count[i] > max_count) { + max_count = count[i]; + winner = i; + } + if (!in_palette[i] && count[i] > count_threshold) { + AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n, + &dist[0], &cluster[0], ¢er[0], &error); + in_palette[i] = true; + } + } + if (k == 0) { + AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], winner, k++, n, + &dist[0], &cluster[0], ¢er[0], &error); + in_palette[winner] = true; + } + + // Calculation of the multi-resolution density grid. + std::vector density(n * kMaxLevel); + std::vector radius(n * kMaxLevel); + std::unordered_map histogram[kMaxLevel]; + for (int level = 0; level < kMaxLevel; ++level) { + // This value is never used because key = InterlaceBitsRGB(...) >> 6 + } + + for (int i = 0; i < n; ++i) { + if (!in_palette[i]) { + const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6; + for (int level = 0; level < kMaxLevel; ++level) { + histogram[level][key >> (3 * level)] += count[i]; + } + } + } + for (int i = 0; i < n; ++i) { + if (!in_palette[i]) { + for (int level = 0; level < kMaxLevel; ++level) { + const int mask = (4 << level) - 1; + const int rd = std::max(red[i] & mask, mask - (red[i] & mask)); + const int gd = std::max(green[i] & mask, mask - (green[i] & mask)); + const int bd = std::max(blue[i] & mask, mask - (blue[i] & mask)); + radius[i * kMaxLevel + level] = + ScaleQuadDistanceRGB(ColorIntQuadDistanceRGB(0, 0, 0, rd, gd, bd)); + } + const int key = InterlaceBitsRGB(red[i], green[i], blue[i]) >> 6; + if (kMaxLevel > 0) { + density[i * kMaxLevel] = histogram[0][key] - count[i]; + } + for (int level = 1; level < kMaxLevel; ++level) { + density[i * kMaxLevel + level] = + (histogram[level][key >> (3 * level)] - + histogram[level - 1][key >> (3 * level - 3)]); + } + } + } + + // Calculate the initial error now that the palette has been initialized. + error = 0; + for (int i = 0; i < n; ++i) { + error += static_cast(dist[i]) * static_cast(count[i]); + } + + std::unique_ptr[]> bucket_array( + new std::vector[kMaxPriority]); + int top_priority = -1; + for (int i = 0; i < n; ++i) { + if (!in_palette[i]) { + int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i], + &density[i * kMaxLevel], &radius[i * kMaxLevel]); + bucket_array[priority].push_back(i); + top_priority = std::max(priority, top_priority); + } + } + double error_accum = 0; + while (top_priority >= 0 && k < max_palette_size) { + if (error < error_threshold) { + error_accum += std::min(error_threshold, error_threshold - error); + if (error_accum >= 10 * error_threshold) { + break; + } + } + int i = bucket_array[top_priority].back(); + int priority = Priority(ScaleQuadDistanceRGB(dist[i]), count[i], + &density[i * kMaxLevel], &radius[i * kMaxLevel]); + if (priority < top_priority) { + bucket_array[priority].push_back(i); + } else { + AddToRGBPalette(&red[0], &green[0], &blue[0], &count[0], i, k++, n, + &dist[0], &cluster[0], ¢er[0], &error); + } + bucket_array[top_priority].pop_back(); + while (top_priority >= 0 && bucket_array[top_priority].empty()) { + --top_priority; + } + } + + cinfo->actual_number_of_colors = k; + cinfo->colormap = (*cinfo->mem->alloc_sarray)( + reinterpret_cast(cinfo), JPOOL_IMAGE, k, 3); + for (int i = 0; i < k; ++i) { + int index = center[i]; + cinfo->colormap[0][i] = red[index]; + cinfo->colormap[1][i] = green[index]; + cinfo->colormap[2][i] = blue[index]; + } +} + +namespace { + +void FindCandidatesForCell(j_decompress_ptr cinfo, int ncomp, int cell[], + std::vector* candidates) { + int cell_min[kMaxComponents]; + int cell_max[kMaxComponents]; + int cell_center[kMaxComponents]; + for (int c = 0; c < ncomp; ++c) { + cell_min[c] = cell[c] << (8 - kNumColorCellBits[c]); + cell_max[c] = cell_min[c] + (1 << (8 - kNumColorCellBits[c])) - 1; + cell_center[c] = (cell_min[c] + cell_max[c]) >> 1; + } + int min_maxdist = std::numeric_limits::max(); + int mindist[256]; + for (int i = 0; i < cinfo->actual_number_of_colors; ++i) { + int dmin = 0; + int dmax = 0; + for (int c = 0; c < ncomp; ++c) { + int palette_c = cinfo->colormap[c][i]; + int dminc = 0, dmaxc; + if (palette_c < cell_min[c]) { + dminc = cell_min[c] - palette_c; + dmaxc = cell_max[c] - palette_c; + } else if (palette_c > cell_max[c]) { + dminc = palette_c - cell_max[c]; + dmaxc = palette_c - cell_min[c]; + } else if (palette_c > cell_center[c]) { + dmaxc = palette_c - cell_min[c]; + } else { + dmaxc = cell_max[c] - palette_c; + } + dminc *= kCompW[c]; + dmaxc *= kCompW[c]; + dmin += dminc * dminc; + dmax += dmaxc * dmaxc; + } + mindist[i] = dmin; + min_maxdist = std::min(dmax, min_maxdist); + } + for (int i = 0; i < cinfo->actual_number_of_colors; ++i) { + if (mindist[i] < min_maxdist) { + candidates->push_back(i); + } + } +} + +} // namespace + +void CreateInverseColorMap(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + int ncomp = cinfo->out_color_components; + int num_cells = 1; + for (int c = 0; c < ncomp; ++c) { + num_cells *= (1 << kNumColorCellBits[c]); + } + m->candidate_lists_.resize(num_cells); + + int next_cell[kMaxComponents] = {0}; + for (int i = 0; i < num_cells; ++i) { + m->candidate_lists_[i].clear(); + FindCandidatesForCell(cinfo, ncomp, next_cell, &m->candidate_lists_[i]); + int c = ncomp - 1; + while (c > 0 && next_cell[c] + 1 == (1 << kNumColorCellBits[c])) { + next_cell[c--] = 0; + } + ++next_cell[c]; + } + m->regenerate_inverse_colormap_ = false; +} + +int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel) { + jpeg_decomp_master* m = cinfo->master; + int num_channels = cinfo->out_color_components; + int index = 0; + if (m->quant_mode_ == 1) { + for (int c = 0; c < num_channels; ++c) { + index += m->colormap_lut_[c * 256 + pixel[c]]; + } + } else { + size_t cell_idx = 0; + size_t stride = 1; + for (int c = num_channels - 1; c >= 0; --c) { + cell_idx += (pixel[c] >> (8 - kNumColorCellBits[c])) * stride; + stride <<= kNumColorCellBits[c]; + } + JXL_ASSERT(cell_idx < m->candidate_lists_.size()); + int mindist = std::numeric_limits::max(); + const auto& candidates = m->candidate_lists_[cell_idx]; + for (uint8_t i : candidates) { + int dist = 0; + for (int c = 0; c < num_channels; ++c) { + int d = (cinfo->colormap[c][i] - pixel[c]) * kCompW[c]; + dist += d * d; + } + if (dist < mindist) { + mindist = dist; + index = i; + } + } + } + JXL_ASSERT(index < cinfo->actual_number_of_colors); + return index; +} + +void CreateOrderedDitherTables(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + static constexpr size_t kDitherSize = 4; + static constexpr size_t kDitherMask = kDitherSize - 1; + static constexpr float kBaseDitherMatrix[] = { + 0, 8, 2, 10, // + 12, 4, 14, 6, // + 3, 11, 1, 9, // + 15, 7, 13, 5, // + }; + m->dither_size_ = kDitherSize; + m->dither_mask_ = kDitherMask; + size_t ncells = m->dither_size_ * m->dither_size_; + for (int c = 0; c < cinfo->out_color_components; ++c) { + float spread = 1.0f / (m->num_colors_[c] - 1); + float mul = spread / ncells; + float offset = 0.5f * spread; + if (m->dither_[c] == nullptr) { + m->dither_[c] = Allocate(cinfo, ncells, JPOOL_IMAGE_ALIGNED); + } + for (size_t idx = 0; idx < ncells; ++idx) { + m->dither_[c][idx] = kBaseDitherMatrix[idx] * mul - offset; + } + } +} + +void InitFSDitherState(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int c = 0; c < cinfo->out_color_components; ++c) { + if (m->error_row_[c] == nullptr) { + m->error_row_[c] = + Allocate(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED); + m->error_row_[c + kMaxComponents] = + Allocate(cinfo, cinfo->output_width, JPOOL_IMAGE_ALIGNED); + } + memset(m->error_row_[c], 0.0, cinfo->output_width * sizeof(float)); + memset(m->error_row_[c + kMaxComponents], 0.0, + cinfo->output_width * sizeof(float)); + } +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_quantize.h b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.h new file mode 100644 index 0000000000..3dda1d8713 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/color_quantize.h @@ -0,0 +1,27 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_COLOR_QUANTIZE_H_ +#define LIB_JPEGLI_COLOR_QUANTIZE_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ChooseColorMap1Pass(j_decompress_ptr cinfo); + +void ChooseColorMap2Pass(j_decompress_ptr cinfo); + +void CreateInverseColorMap(j_decompress_ptr cinfo); + +void CreateOrderedDitherTables(j_decompress_ptr cinfo); + +void InitFSDitherState(j_decompress_ptr cinfo); + +int LookupColorIndex(j_decompress_ptr cinfo, JSAMPLE* pixel); + +} // namespace jpegli + +#endif // LIB_JPEGLI_COLOR_QUANTIZE_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_transform.cc b/third-party/libjxl/libjxl/lib/jpegli/color_transform.cc new file mode 100644 index 0000000000..020a6fd80c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/color_transform.cc @@ -0,0 +1,281 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/color_transform.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/color_transform.cc" +#include +#include + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Sub; + +void YCbCrToRGB(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto crcr = Set(df, 1.402f); + const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f); + const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f); + const auto cbcb = Set(df, 1.772f); + + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto y_vec = Load(df, row0 + x); + const auto cb_vec = Load(df, row1 + x); + const auto cr_vec = Load(df, row2 + x); + const auto r_vec = MulAdd(crcr, cr_vec, y_vec); + const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec)); + const auto b_vec = MulAdd(cbcb, cb_vec, y_vec); + Store(r_vec, df, row0 + x); + Store(g_vec, df, row1 + x); + Store(b_vec, df, row2 + x); + } +} + +void YCCKToCMYK(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + YCbCrToRGB(row, xsize); + const auto offset = Set(df, -1.0f / 255.0f); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + Store(Sub(offset, Load(df, row0 + x)), df, row0 + x); + Store(Sub(offset, Load(df, row1 + x)), df, row1 + x); + Store(Sub(offset, Load(df, row2 + x)), df, row2 + x); + } +} + +void RGBToYCbCr(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto c128 = Set(df, 128.0f); + const auto kR = Set(df, 0.299f); // NTSC luma + const auto kG = Set(df, 0.587f); + const auto kB = Set(df, 0.114f); + const auto kAmpR = Set(df, 0.701f); + const auto kAmpB = Set(df, 0.886f); + const auto kDiffR = Add(kAmpR, kR); + const auto kDiffB = Add(kAmpB, kB); + const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB)))); + const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB)))); + + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto r = Load(df, row0 + x); + const auto g = Load(df, row1 + x); + const auto b = Load(df, row2 + x); + const auto r_base = Mul(r, kR); + const auto r_diff = Mul(r, kDiffR); + const auto g_base = Mul(g, kG); + const auto b_base = Mul(b, kB); + const auto b_diff = Mul(b, kDiffB); + const auto y_base = Add(r_base, Add(g_base, b_base)); + const auto cb_vec = MulAdd(Sub(b_diff, y_base), kNormB, c128); + const auto cr_vec = MulAdd(Sub(r_diff, y_base), kNormR, c128); + Store(y_base, df, row0 + x); + Store(cb_vec, df, row1 + x); + Store(cr_vec, df, row2 + x); + } +} + +void CMYKToYCCK(float* row[kMaxComponents], size_t xsize) { + const HWY_CAPPED(float, 8) df; + float* JXL_RESTRICT row0 = row[0]; + float* JXL_RESTRICT row1 = row[1]; + float* JXL_RESTRICT row2 = row[2]; + const auto unity = Set(df, 255.0f); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + Store(Sub(unity, Load(df, row0 + x)), df, row0 + x); + Store(Sub(unity, Load(df, row1 + x)), df, row1 + x); + Store(Sub(unity, Load(df, row2 + x)), df, row2 + x); + } + RGBToYCbCr(row, xsize); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(CMYKToYCCK); +HWY_EXPORT(YCCKToCMYK); +HWY_EXPORT(YCbCrToRGB); +HWY_EXPORT(RGBToYCbCr); + +bool CheckColorSpaceComponents(int num_components, J_COLOR_SPACE colorspace) { + switch (colorspace) { + case JCS_GRAYSCALE: + return num_components == 1; + case JCS_RGB: + case JCS_YCbCr: + case JCS_EXT_RGB: + case JCS_EXT_BGR: + return num_components == 3; + case JCS_CMYK: + case JCS_YCCK: + case JCS_EXT_RGBX: + case JCS_EXT_BGRX: + case JCS_EXT_XBGR: + case JCS_EXT_XRGB: + case JCS_EXT_RGBA: + case JCS_EXT_BGRA: + case JCS_EXT_ABGR: + case JCS_EXT_ARGB: + return num_components == 4; + default: + // Unrecognized colorspaces can have any number of channels, since no + // color transform will be performed on them. + return true; + } +} + +void NullTransform(float* row[kMaxComponents], size_t len) {} + +void GrayscaleToRGB(float* row[kMaxComponents], size_t len) { + memcpy(row[1], row[0], len * sizeof(row[1][0])); + memcpy(row[2], row[0], len * sizeof(row[2][0])); +} + +void GrayscaleToYCbCr(float* row[kMaxComponents], size_t len) { + memset(row[1], 0, len * sizeof(row[1][0])); + memset(row[2], 0, len * sizeof(row[2][0])); +} + +void ChooseColorTransform(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + if (!CheckColorSpaceComponents(cinfo->input_components, + cinfo->in_color_space)) { + JPEGLI_ERROR("Invalid number of input components %d for colorspace %d", + cinfo->input_components, cinfo->in_color_space); + } + if (!CheckColorSpaceComponents(cinfo->num_components, + cinfo->jpeg_color_space)) { + JPEGLI_ERROR("Invalid number of components %d for colorspace %d", + cinfo->num_components, cinfo->jpeg_color_space); + } + if (cinfo->jpeg_color_space == cinfo->in_color_space) { + if (cinfo->num_components != cinfo->input_components) { + JPEGLI_ERROR("Input/output components mismatch: %d vs %d", + cinfo->input_components, cinfo->num_components); + } + // No color transform requested. + m->color_transform = NullTransform; + return; + } + + if (cinfo->in_color_space == JCS_RGB && m->xyb_mode) { + JPEGLI_ERROR("Color transform on XYB colorspace is not supported."); + } + + m->color_transform = nullptr; + if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + if (cinfo->in_color_space == JCS_RGB) { + m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr); + } else if (cinfo->in_color_space == JCS_YCbCr || + cinfo->in_color_space == JCS_YCCK) { + // Since the first luminance channel is the grayscale version of the + // image, nothing to do here + m->color_transform = NullTransform; + } + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (cinfo->in_color_space == JCS_GRAYSCALE) { + m->color_transform = GrayscaleToRGB; + } + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + if (cinfo->in_color_space == JCS_RGB) { + m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr); + } else if (cinfo->in_color_space == JCS_GRAYSCALE) { + m->color_transform = GrayscaleToYCbCr; + } + } else if (cinfo->jpeg_color_space == JCS_YCCK) { + if (cinfo->in_color_space == JCS_CMYK) { + m->color_transform = HWY_DYNAMIC_DISPATCH(CMYKToYCCK); + } + } + + if (m->color_transform == nullptr) { + // TODO(szabadka) Support more color transforms. + JPEGLI_ERROR("Unsupported color transform %d -> %d", cinfo->in_color_space, + cinfo->jpeg_color_space); + } +} + +void ChooseColorTransform(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (!CheckColorSpaceComponents(cinfo->out_color_components, + cinfo->out_color_space)) { + JPEGLI_ERROR("Invalid number of output components %d for colorspace %d", + cinfo->out_color_components, cinfo->out_color_space); + } + if (!CheckColorSpaceComponents(cinfo->num_components, + cinfo->jpeg_color_space)) { + JPEGLI_ERROR("Invalid number of components %d for colorspace %d", + cinfo->num_components, cinfo->jpeg_color_space); + } + if (cinfo->jpeg_color_space == cinfo->out_color_space) { + if (cinfo->num_components != cinfo->out_color_components) { + JPEGLI_ERROR("Input/output components mismatch: %d vs %d", + cinfo->num_components, cinfo->out_color_components); + } + // No color transform requested. + m->color_transform = NullTransform; + return; + } + + m->color_transform = nullptr; + if (cinfo->jpeg_color_space == JCS_GRAYSCALE) { + if (cinfo->out_color_space == JCS_RGB) { + m->color_transform = GrayscaleToRGB; + } + } else if (cinfo->jpeg_color_space == JCS_RGB) { + if (cinfo->out_color_space == JCS_GRAYSCALE) { + m->color_transform = HWY_DYNAMIC_DISPATCH(RGBToYCbCr); + } + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + if (cinfo->out_color_space == JCS_RGB) { + m->color_transform = HWY_DYNAMIC_DISPATCH(YCbCrToRGB); + } else if (cinfo->out_color_space == JCS_GRAYSCALE) { + m->color_transform = NullTransform; + } + } else if (cinfo->jpeg_color_space == JCS_YCCK) { + if (cinfo->out_color_space == JCS_CMYK) { + m->color_transform = HWY_DYNAMIC_DISPATCH(YCCKToCMYK); + } + } + + if (m->color_transform == nullptr) { + // TODO(szabadka) Support more color transforms. + JPEGLI_ERROR("Unsupported color transform %d -> %d", + cinfo->jpeg_color_space, cinfo->out_color_space); + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/color_transform.h b/third-party/libjxl/libjxl/lib/jpegli/color_transform.h new file mode 100644 index 0000000000..8d58f8849a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/color_transform.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_COLOR_TRANSFORM_H_ +#define LIB_JPEGLI_COLOR_TRANSFORM_H_ + +#include "lib/jpegli/common.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +void ChooseColorTransform(j_compress_ptr cinfo); + +void ChooseColorTransform(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_COLOR_TRANSFORM_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/common.cc b/third-party/libjxl/libjxl/lib/jpegli/common.cc new file mode 100644 index 0000000000..5f34372f3e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/common.cc @@ -0,0 +1,59 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/common.h" + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/memory_manager.h" + +void jpegli_abort(j_common_ptr cinfo) { + if (cinfo->mem == nullptr) return; + for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) { + if (pool_id == JPOOL_PERMANENT) continue; + (*cinfo->mem->free_pool)(cinfo, pool_id); + } + if (cinfo->is_decompressor) { + cinfo->global_state = jpegli::kDecStart; + } else { + cinfo->global_state = jpegli::kEncStart; + } +} + +void jpegli_destroy(j_common_ptr cinfo) { + if (cinfo->mem == nullptr) return; + (*cinfo->mem->self_destruct)(cinfo); + if (cinfo->is_decompressor) { + cinfo->global_state = jpegli::kDecNull; + delete reinterpret_cast(cinfo)->master; + } else { + cinfo->global_state = jpegli::kEncNull; + } +} + +JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo) { + JQUANT_TBL* table = jpegli::Allocate(cinfo, 1); + table->sent_table = FALSE; + return table; +} + +JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo) { + JHUFF_TBL* table = jpegli::Allocate(cinfo, 1); + table->sent_table = FALSE; + return table; +} + +int jpegli_bytes_per_sample(JpegliDataType data_type) { + switch (data_type) { + case JPEGLI_TYPE_UINT8: + return 1; + case JPEGLI_TYPE_UINT16: + return 2; + case JPEGLI_TYPE_FLOAT: + return 4; + default: + return 0; + } +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/common.h b/third-party/libjxl/libjxl/lib/jpegli/common.h new file mode 100644 index 0000000000..3691b2c6a5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/common.h @@ -0,0 +1,48 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file conatins the C API of the common encoder/decoder part of libjpegli +// library, which is based on the C API of libjpeg, with the function names +// changed from jpeg_* to jpegli_*, while compressor and dempressor object +// definitions are included directly from jpeglib.h +// +// Applications can use the libjpegli library in one of the following ways: +// +// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function +// names of the API and link against libjpegli. +// +// (2) Leave the application code unchanged, but replace the libjpeg.so library +// with the one built by this project that is API- and ABI-compatible with +// libjpeg-turbo's version of libjpeg.so. + +#ifndef LIB_JPEGLI_COMMON_H_ +#define LIB_JPEGLI_COMMON_H_ + +/* clang-format off */ +#include +#include +/* clang-format on */ + +#include "lib/jpegli/types.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err); + +void jpegli_abort(j_common_ptr cinfo); + +void jpegli_destroy(j_common_ptr cinfo); + +JQUANT_TBL* jpegli_alloc_quant_table(j_common_ptr cinfo); + +JHUFF_TBL* jpegli_alloc_huff_table(j_common_ptr cinfo); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_COMMON_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/common_internal.h b/third-party/libjxl/libjxl/lib/jpegli/common_internal.h new file mode 100644 index 0000000000..248d3154e1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/common_internal.h @@ -0,0 +1,150 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_COMMON_INTERNAL_H_ +#define LIB_JPEGLI_COMMON_INTERNAL_H_ + +#include +#include +#include + +#include +#include + +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/simd.h" +#include "lib/jxl/base/compiler_specific.h" // for ssize_t +#include "lib/jxl/base/status.h" // for JXL_CHECK + +namespace jpegli { + +enum State { + kDecNull, + kDecStart, + kDecInHeader, + kDecHeaderDone, + kDecProcessMarkers, + kDecProcessScan, + kEncNull, + kEncStart, + kEncHeader, + kEncReadImage, + kEncWriteCoeffs, +}; + +template +constexpr inline T1 DivCeil(T1 a, T2 b) { + return (a + b - 1) / b; +} + +template +constexpr inline T1 RoundUpTo(T1 a, T2 b) { + return DivCeil(a, b) * b; +} + +constexpr size_t kDCTBlockSize = 64; +// This is set to the same value as MAX_COMPS_IN_SCAN, because that is the +// maximum number of channels the libjpeg-turbo decoder can decode. +constexpr int kMaxComponents = 4; +constexpr int kMaxQuantTables = 4; +constexpr int kJpegPrecision = 8; +constexpr int kMaxHuffmanTables = 4; +constexpr size_t kJpegHuffmanMaxBitLength = 16; +constexpr int kJpegHuffmanAlphabetSize = 256; +constexpr int kJpegDCAlphabetSize = 12; +constexpr int kMaxDHTMarkers = 512; +constexpr int kMaxDimPixels = 65535; +constexpr uint8_t kApp1 = 0xE1; +constexpr uint8_t kApp2 = 0xE2; +const uint8_t kIccProfileTag[12] = "ICC_PROFILE"; +const uint8_t kExifTag[6] = "Exif\0"; +const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/"; + +/* clang-format off */ +constexpr uint32_t kJPEGNaturalOrder[80] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // extra entries for safety in decoder + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +constexpr uint32_t kJPEGZigZagOrder[64] = { + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 +}; +/* clang-format on */ + +template +class RowBuffer { + public: + template + void Allocate(CInfoType cinfo, size_t num_rows, size_t rowsize) { + size_t vec_size = std::max(VectorSize(), sizeof(T)); + JXL_CHECK(vec_size % sizeof(T) == 0); + size_t alignment = std::max(HWY_ALIGNMENT, vec_size); + size_t min_memstride = alignment + rowsize * sizeof(T) + vec_size; + size_t memstride = RoundUpTo(min_memstride, alignment); + xsize_ = rowsize; + ysize_ = num_rows; + stride_ = memstride / sizeof(T); + offset_ = alignment / sizeof(T); + data_ = ::jpegli::Allocate(cinfo, ysize_ * stride_, JPOOL_IMAGE_ALIGNED); + } + + T* Row(ssize_t y) const { + return &data_[((ysize_ + y) % ysize_) * stride_ + offset_]; + } + + size_t xsize() const { return xsize_; }; + size_t ysize() const { return ysize_; }; + size_t stride() const { return stride_; } + + void PadRow(size_t y, size_t from, int border) { + float* row = Row(y); + for (int offset = -border; offset < 0; ++offset) { + row[offset] = row[0]; + } + float last_val = row[from - 1]; + for (size_t x = from; x < xsize_ + border; ++x) { + row[x] = last_val; + } + } + + void CopyRow(ssize_t dst_row, ssize_t src_row, int border) { + memcpy(Row(dst_row) - border, Row(src_row) - border, + (xsize_ + 2 * border) * sizeof(T)); + } + + void FillRow(ssize_t y, T val, size_t len) { + T* row = Row(y); + for (size_t x = 0; x < len; ++x) { + row[x] = val; + } + } + + private: + size_t xsize_; + size_t ysize_; + size_t stride_; + size_t offset_; + T* data_; +}; + +} // namespace jpegli + +#endif // LIB_JPEGLI_COMMON_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/dct-inl.h b/third-party/libjxl/libjxl/lib/jpegli/dct-inl.h new file mode 100644 index 0000000000..eb88654631 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/dct-inl.h @@ -0,0 +1,256 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JPEGLI_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JPEGLI_DCT_INL_H_ +#undef LIB_JPEGLI_DCT_INL_H_ +#else +#define LIB_JPEGLI_DCT_INL_H_ +#endif + +#include "lib/jpegli/transpose-inl.h" +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::DemoteTo; +using hwy::HWY_NAMESPACE::Ge; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Round; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); + +template +void AddReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2, + float* JXL_RESTRICT aout) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N; i++) { + auto in1 = Load(d8, ain1 + i * 8); + auto in2 = Load(d8, ain2 + (N - i - 1) * 8); + Store(Add(in1, in2), d8, aout + i * 8); + } +} + +template +void SubReverse(const float* JXL_RESTRICT ain1, const float* JXL_RESTRICT ain2, + float* JXL_RESTRICT aout) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N; i++) { + auto in1 = Load(d8, ain1 + i * 8); + auto in2 = Load(d8, ain2 + (N - i - 1) * 8); + Store(Sub(in1, in2), d8, aout + i * 8); + } +} + +template +void B(float* JXL_RESTRICT coeff) { + HWY_CAPPED(float, 8) d8; + constexpr float kSqrt2 = 1.41421356237f; + auto sqrt2 = Set(d8, kSqrt2); + auto in1 = Load(d8, coeff); + auto in2 = Load(d8, coeff + 8); + Store(MulAdd(in1, sqrt2, in2), d8, coeff); + for (size_t i = 1; i + 1 < N; i++) { + auto in1 = Load(d8, coeff + i * 8); + auto in2 = Load(d8, coeff + (i + 1) * 8); + Store(Add(in1, in2), d8, coeff + i * 8); + } +} + +// Ideally optimized away by compiler (except the multiply). +template +void InverseEvenOdd(const float* JXL_RESTRICT ain, float* JXL_RESTRICT aout) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N / 2; i++) { + auto in1 = Load(d8, ain + i * 8); + Store(in1, d8, aout + 2 * i * 8); + } + for (size_t i = N / 2; i < N; i++) { + auto in1 = Load(d8, ain + i * 8); + Store(in1, d8, aout + (2 * (i - N / 2) + 1) * 8); + } +} + +// Constants for DCT implementation. Generated by the following snippet: +// for i in range(N // 2): +// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ") +template +struct WcMultipliers; + +template <> +struct WcMultipliers<4> { + static constexpr float kMultipliers[] = { + 0.541196100146197, + 1.3065629648763764, + }; +}; + +template <> +struct WcMultipliers<8> { + static constexpr float kMultipliers[] = { + 0.5097955791041592, + 0.6013448869350453, + 0.8999762231364156, + 2.5629154477415055, + }; +}; + +constexpr float WcMultipliers<4>::kMultipliers[]; +constexpr float WcMultipliers<8>::kMultipliers[]; + +// Invoked on full vector. +template +void Multiply(float* JXL_RESTRICT coeff) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < N / 2; i++) { + auto in1 = Load(d8, coeff + (N / 2 + i) * 8); + auto mul = Set(d8, WcMultipliers::kMultipliers[i]); + Store(Mul(in1, mul), d8, coeff + (N / 2 + i) * 8); + } +} + +void LoadFromBlock(const float* JXL_RESTRICT pixels, size_t pixels_stride, + size_t off, float* JXL_RESTRICT coeff) { + HWY_CAPPED(float, 8) d8; + for (size_t i = 0; i < 8; i++) { + Store(LoadU(d8, pixels + i * pixels_stride + off), d8, coeff + i * 8); + } +} + +void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, float* output, + size_t off) { + HWY_CAPPED(float, 8) d8; + auto mul = Set(d8, 1.0f / 8); + for (size_t i = 0; i < 8; i++) { + StoreU(Mul(mul, Load(d8, coeff + i * 8)), d8, output + i * 8 + off); + } +} + +template +struct DCT1DImpl; + +template <> +struct DCT1DImpl<1> { + JXL_INLINE void operator()(float* JXL_RESTRICT mem) {} +}; + +template <> +struct DCT1DImpl<2> { + JXL_INLINE void operator()(float* JXL_RESTRICT mem) { + HWY_CAPPED(float, 8) d8; + auto in1 = Load(d8, mem); + auto in2 = Load(d8, mem + 8); + Store(Add(in1, in2), d8, mem); + Store(Sub(in1, in2), d8, mem + 8); + } +}; + +template +struct DCT1DImpl { + void operator()(float* JXL_RESTRICT mem) { + HWY_ALIGN float tmp[N * 8]; + AddReverse(mem, mem + N * 4, tmp); + DCT1DImpl()(tmp); + SubReverse(mem, mem + N * 4, tmp + N * 4); + Multiply(tmp); + DCT1DImpl()(tmp + N * 4); + B(tmp + N * 4); + InverseEvenOdd(tmp, mem); + } +}; + +void DCT1D(const float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT output) { + HWY_CAPPED(float, 8) d8; + HWY_ALIGN float tmp[64]; + for (size_t i = 0; i < 8; i += Lanes(d8)) { + // TODO(veluca): consider removing the temporary memory here (as is done in + // IDCT), if it turns out that some compilers don't optimize away the loads + // and this is performance-critical. + LoadFromBlock(pixels, pixels_stride, i, tmp); + DCT1DImpl<8>()(tmp); + StoreToBlockAndScale(tmp, output, i); + } +} + +void TransformFromPixels(const float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT scratch_space) { + DCT1D(pixels, pixels_stride, scratch_space); + Transpose8x8Block(scratch_space, coefficients); + DCT1D(coefficients, 8, scratch_space); + Transpose8x8Block(scratch_space, coefficients); +} + +void StoreQuantizedValue(const Vec& ival, int16_t* out) { + Rebind di16; + Store(DemoteTo(di16, ival), di16, out); +} + +void StoreQuantizedValue(const Vec& ival, int32_t* out) { + DI di; + Store(ival, di, out); +} + +template +void QuantizeBlock(const float* dct, const float* qmc, float aq_strength, + const float* zero_bias_offset, const float* zero_bias_mul, + T* block) { + D d; + DI di; + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto val = Load(d, dct + k); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto ival = ConvertTo(di, IfThenElseZero(nzero_mask, Round(qval))); + StoreQuantizedValue(ival, block + k); + } +} + +template +void ComputeCoefficientBlock(const float* JXL_RESTRICT pixels, size_t stride, + const float* JXL_RESTRICT qmc, + int16_t last_dc_coeff, float aq_strength, + const float* zero_bias_offset, + const float* zero_bias_mul, + float* JXL_RESTRICT tmp, T* block) { + float* JXL_RESTRICT dct = tmp; + float* JXL_RESTRICT scratch_space = tmp + DCTSIZE2; + TransformFromPixels(pixels, stride, dct, scratch_space); + QuantizeBlock(dct, qmc, aq_strength, zero_bias_offset, zero_bias_mul, block); + // Center DC values around zero. + static constexpr float kDCBias = 128.0f; + const float dc = (dct[0] - kDCBias) * qmc[0]; + float dc_threshold = zero_bias_offset[0] + aq_strength * zero_bias_mul[0]; + if (std::abs(dc - last_dc_coeff) < dc_threshold) { + block[0] = last_dc_coeff; + } else { + block[0] = std::round(dc); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); +#endif // LIB_JPEGLI_DCT_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode.cc b/third-party/libjxl/libjxl/lib/jpegli/decode.cc new file mode 100644 index 0000000000..758babeb5e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode.cc @@ -0,0 +1,1028 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" + +#include + +#include + +#include "lib/jpegli/color_quantize.h" +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/decode_marker.h" +#include "lib/jpegli/decode_scan.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/render.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { + +void InitializeImage(j_decompress_ptr cinfo) { + cinfo->restart_interval = 0; + cinfo->saw_JFIF_marker = FALSE; + cinfo->JFIF_major_version = 1; + cinfo->JFIF_minor_version = 1; + cinfo->density_unit = 0; + cinfo->X_density = 1; + cinfo->Y_density = 1; + cinfo->saw_Adobe_marker = FALSE; + cinfo->Adobe_transform = 0; + cinfo->CCIR601_sampling = FALSE; // not used + cinfo->marker_list = nullptr; + cinfo->comp_info = nullptr; + cinfo->input_scan_number = 0; + cinfo->input_iMCU_row = 0; + cinfo->output_scan_number = 0; + cinfo->output_iMCU_row = 0; + cinfo->output_scanline = 0; + cinfo->unread_marker = 0; + cinfo->coef_bits = nullptr; + // We set all these to zero since we don't yet support arithmetic coding. + memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L)); + memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U)); + memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K)); + // Initialize the private fields. + jpeg_decomp_master* m = cinfo->master; + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + m->codestream_bits_ahead_ = 0; + m->is_multiscan_ = false; + m->found_soi_ = false; + m->found_dri_ = false; + m->found_sof_ = false; + m->found_eoi_ = false; + m->icc_index_ = 0; + m->icc_total_ = 0; + m->icc_profile_.clear(); + memset(m->dc_huff_lut_, 0, sizeof(m->dc_huff_lut_)); + memset(m->ac_huff_lut_, 0, sizeof(m->ac_huff_lut_)); + // Initialize the values to an invalid symbol so that we can recognize it + // when reading the bit stream using a Huffman code with space > 0. + for (size_t i = 0; i < kAllHuffLutSize; ++i) { + m->dc_huff_lut_[i].bits = 0; + m->dc_huff_lut_[i].value = 0xffff; + m->ac_huff_lut_[i].bits = 0; + m->ac_huff_lut_[i].value = 0xffff; + } + m->colormap_lut_ = nullptr; + m->pixels_ = nullptr; + m->scanlines_ = nullptr; + m->regenerate_inverse_colormap_ = true; + for (int i = 0; i < kMaxComponents; ++i) { + m->dither_[i] = nullptr; + m->error_row_[i] = nullptr; + } + m->output_passes_done_ = 0; + m->xoffset_ = 0; + m->dequant_ = nullptr; +} + +void InitializeDecompressParams(j_decompress_ptr cinfo) { + cinfo->jpeg_color_space = JCS_UNKNOWN; + cinfo->out_color_space = JCS_UNKNOWN; + cinfo->scale_num = 1; + cinfo->scale_denom = 1; + cinfo->output_gamma = 0.0f; + cinfo->buffered_image = FALSE; + cinfo->raw_data_out = FALSE; + cinfo->dct_method = JDCT_DEFAULT; + cinfo->do_fancy_upsampling = TRUE; + cinfo->do_block_smoothing = TRUE; + cinfo->quantize_colors = FALSE; + cinfo->dither_mode = JDITHER_FS; + cinfo->two_pass_quantize = TRUE; + cinfo->desired_number_of_colors = 256; + cinfo->enable_1pass_quant = FALSE; + cinfo->enable_external_quant = FALSE; + cinfo->enable_2pass_quant = FALSE; + cinfo->actual_number_of_colors = 0; + cinfo->colormap = nullptr; +} + +void InitProgressMonitor(j_decompress_ptr cinfo, bool coef_only) { + if (!cinfo->progress) return; + jpeg_decomp_master* m = cinfo->master; + int nc = cinfo->num_components; + int estimated_num_scans = + cinfo->progressive_mode ? 2 + 3 * nc : (m->is_multiscan_ ? nc : 1); + cinfo->progress->pass_limit = cinfo->total_iMCU_rows * estimated_num_scans; + cinfo->progress->pass_counter = 0; + if (coef_only) { + cinfo->progress->total_passes = 1; + } else { + int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0; + bool two_pass_quant = cinfo->quantize_colors && !cinfo->colormap && + cinfo->two_pass_quantize && cinfo->enable_2pass_quant; + cinfo->progress->total_passes = input_passes + (two_pass_quant ? 2 : 1); + } + cinfo->progress->completed_passes = 0; +} + +void InitProgressMonitorForOutput(j_decompress_ptr cinfo) { + if (!cinfo->progress) return; + jpeg_decomp_master* m = cinfo->master; + int passes_per_output = cinfo->enable_2pass_quant ? 2 : 1; + int output_passes_left = cinfo->buffered_image && !m->found_eoi_ ? 2 : 1; + cinfo->progress->total_passes = + m->output_passes_done_ + passes_per_output * output_passes_left; + cinfo->progress->completed_passes = m->output_passes_done_; +} + +void ProgressMonitorInputPass(j_decompress_ptr cinfo) { + if (!cinfo->progress) return; + cinfo->progress->pass_counter = + ((cinfo->input_scan_number - 1) * cinfo->total_iMCU_rows + + cinfo->input_iMCU_row); + if (cinfo->progress->pass_counter > cinfo->progress->pass_limit) { + cinfo->progress->pass_limit = + cinfo->input_scan_number * cinfo->total_iMCU_rows; + } + (*cinfo->progress->progress_monitor)(reinterpret_cast(cinfo)); +} + +void ProgressMonitorOutputPass(j_decompress_ptr cinfo) { + if (!cinfo->progress) return; + jpeg_decomp_master* m = cinfo->master; + int input_passes = !cinfo->buffered_image && m->is_multiscan_ ? 1 : 0; + cinfo->progress->pass_counter = cinfo->output_scanline; + cinfo->progress->pass_limit = cinfo->output_height; + cinfo->progress->completed_passes = input_passes + m->output_passes_done_; + (*cinfo->progress->progress_monitor)(reinterpret_cast(cinfo)); +} + +void BuildHuffmanLookupTable(j_decompress_ptr cinfo, JHUFF_TBL* table, + HuffmanTableEntry* huff_lut) { + uint32_t counts[kJpegHuffmanMaxBitLength + 1] = {}; + counts[0] = 0; + int total_count = 0; + int space = 1 << kJpegHuffmanMaxBitLength; + int max_depth = 1; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + int count = table->bits[i]; + if (count != 0) { + max_depth = i; + } + counts[i] = count; + total_count += count; + space -= count * (1 << (kJpegHuffmanMaxBitLength - i)); + } + uint32_t values[kJpegHuffmanAlphabetSize + 1] = {}; + uint8_t values_seen[256] = {0}; + for (int i = 0; i < total_count; ++i) { + int value = table->huffval[i]; + if (values_seen[value]) { + return JPEGLI_ERROR("Duplicate Huffman code value %d", value); + } + values_seen[value] = 1; + values[i] = value; + } + // Add an invalid symbol that will have the all 1 code. + ++counts[max_depth]; + values[total_count] = kJpegHuffmanAlphabetSize; + space -= (1 << (kJpegHuffmanMaxBitLength - max_depth)); + if (space < 0) { + JPEGLI_ERROR("Invalid Huffman code lengths."); + } else if (space > 0 && huff_lut[0].value != 0xffff) { + // Re-initialize the values to an invalid symbol so that we can recognize + // it when reading the bit stream using a Huffman code with space > 0. + for (int i = 0; i < kJpegHuffmanLutSize; ++i) { + huff_lut[i].bits = 0; + huff_lut[i].value = 0xffff; + } + } + BuildJpegHuffmanTable(&counts[0], &values[0], huff_lut); +} + +void PrepareForScan(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + int comp_idx = cinfo->cur_comp_info[i]->component_index; + int* prev_coef_bits = cinfo->coef_bits[comp_idx + cinfo->num_components]; + for (int k = std::min(cinfo->Ss, 1); k <= std::max(cinfo->Se, 9); k++) { + prev_coef_bits[k] = + (cinfo->input_scan_number > 0) ? cinfo->coef_bits[comp_idx][k] : 0; + } + for (int k = cinfo->Ss; k <= cinfo->Se; ++k) { + cinfo->coef_bits[comp_idx][k] = cinfo->Al; + } + } + AddStandardHuffmanTables(reinterpret_cast(cinfo), + /*is_dc=*/false); + AddStandardHuffmanTables(reinterpret_cast(cinfo), + /*is_dc=*/true); + // Check that all the Huffman tables needed for this scan are defined and + // build derived lookup tables. + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + if (cinfo->Ss == 0) { + int dc_tbl_idx = cinfo->cur_comp_info[i]->dc_tbl_no; + JHUFF_TBL* table = cinfo->dc_huff_tbl_ptrs[dc_tbl_idx]; + HuffmanTableEntry* huff_lut = + &m->dc_huff_lut_[dc_tbl_idx * kJpegHuffmanLutSize]; + if (!table) { + return JPEGLI_ERROR("DC Huffman table %d not found", dc_tbl_idx); + } + BuildHuffmanLookupTable(cinfo, table, huff_lut); + } + if (cinfo->Se > 0) { + int ac_tbl_idx = cinfo->cur_comp_info[i]->ac_tbl_no; + JHUFF_TBL* table = cinfo->ac_huff_tbl_ptrs[ac_tbl_idx]; + HuffmanTableEntry* huff_lut = + &m->ac_huff_lut_[ac_tbl_idx * kJpegHuffmanLutSize]; + if (!table) { + return JPEGLI_ERROR("AC Huffman table %d not found", ac_tbl_idx); + } + BuildHuffmanLookupTable(cinfo, table, huff_lut); + } + } + // Copy quantization tables into comp_info. + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + jpeg_component_info* comp = cinfo->cur_comp_info[i]; + if (comp->quant_table == nullptr) { + comp->quant_table = Allocate(cinfo, 1, JPOOL_IMAGE); + memcpy(comp->quant_table, cinfo->quant_tbl_ptrs[comp->quant_tbl_no], + sizeof(JQUANT_TBL)); + } + } + if (cinfo->comps_in_scan == 1) { + const auto& comp = *cinfo->cur_comp_info[0]; + cinfo->MCUs_per_row = DivCeil(cinfo->image_width * comp.h_samp_factor, + cinfo->max_h_samp_factor * DCTSIZE); + cinfo->MCU_rows_in_scan = DivCeil(cinfo->image_height * comp.v_samp_factor, + cinfo->max_v_samp_factor * DCTSIZE); + m->mcu_rows_per_iMCU_row_ = cinfo->cur_comp_info[0]->v_samp_factor; + } else { + cinfo->MCU_rows_in_scan = cinfo->total_iMCU_rows; + cinfo->MCUs_per_row = m->iMCU_cols_; + m->mcu_rows_per_iMCU_row_ = 1; + size_t mcu_size = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + jpeg_component_info* comp = cinfo->cur_comp_info[i]; + mcu_size += comp->h_samp_factor * comp->v_samp_factor; + } + if (mcu_size > D_MAX_BLOCKS_IN_MCU) { + JPEGLI_ERROR("MCU size too big"); + } + } + memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_)); + m->restarts_to_go_ = cinfo->restart_interval; + m->next_restart_marker_ = 0; + m->eobrun_ = -1; + m->scan_mcu_row_ = 0; + m->scan_mcu_col_ = 0; + m->codestream_bits_ahead_ = 0; + ++cinfo->input_scan_number; + cinfo->input_iMCU_row = 0; + PrepareForiMCURow(cinfo); + cinfo->global_state = kDecProcessScan; +} + +int ConsumeInput(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->global_state == kDecProcessScan && m->streaming_mode_ && + cinfo->input_iMCU_row > cinfo->output_iMCU_row) { + // Prevent input from getting ahead of output in streaming mode. + return JPEG_SUSPENDED; + } + jpeg_source_mgr* src = cinfo->src; + int status; + for (;;) { + const uint8_t* data; + size_t len; + if (m->input_buffer_.empty()) { + data = cinfo->src->next_input_byte; + len = cinfo->src->bytes_in_buffer; + } else { + data = &m->input_buffer_[m->input_buffer_pos_]; + len = m->input_buffer_.size() - m->input_buffer_pos_; + } + size_t pos = 0; + if (cinfo->global_state == kDecProcessScan) { + status = ProcessScan(cinfo, data, len, &pos, &m->codestream_bits_ahead_); + } else { + status = ProcessMarkers(cinfo, data, len, &pos); + } + if (m->input_buffer_.empty()) { + cinfo->src->next_input_byte += pos; + cinfo->src->bytes_in_buffer -= pos; + } else { + m->input_buffer_pos_ += pos; + size_t bytes_left = m->input_buffer_.size() - m->input_buffer_pos_; + if (bytes_left <= src->bytes_in_buffer) { + src->next_input_byte += (src->bytes_in_buffer - bytes_left); + src->bytes_in_buffer = bytes_left; + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + } + } + if (status == kHandleRestart) { + JXL_DASSERT(m->input_buffer_.size() <= + m->input_buffer_pos_ + src->bytes_in_buffer); + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + if (cinfo->unread_marker == 0xd0 + m->next_restart_marker_) { + cinfo->unread_marker = 0; + } else { + if (!(*cinfo->src->resync_to_restart)(cinfo, m->next_restart_marker_)) { + return JPEG_SUSPENDED; + } + } + m->next_restart_marker_ += 1; + m->next_restart_marker_ &= 0x7; + m->restarts_to_go_ = cinfo->restart_interval; + if (cinfo->unread_marker != 0) { + JPEGLI_WARN("Failed to resync to next restart marker, skipping scan."); + return JPEG_SCAN_COMPLETED; + } + continue; + } + if (status == kHandleMarkerProcessor) { + JXL_DASSERT(m->input_buffer_.size() <= + m->input_buffer_pos_ + src->bytes_in_buffer); + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + if (!(*GetMarkerProcessor(cinfo))(cinfo)) { + return JPEG_SUSPENDED; + } + cinfo->unread_marker = 0; + continue; + } + if (status != kNeedMoreInput) { + break; + } + if (m->input_buffer_.empty()) { + JXL_DASSERT(m->input_buffer_pos_ == 0); + m->input_buffer_.assign(src->next_input_byte, + src->next_input_byte + src->bytes_in_buffer); + } + if (!(*cinfo->src->fill_input_buffer)(cinfo)) { + m->input_buffer_.clear(); + m->input_buffer_pos_ = 0; + return JPEG_SUSPENDED; + } + if (src->bytes_in_buffer == 0) { + JPEGLI_ERROR("Empty input."); + } + m->input_buffer_.insert(m->input_buffer_.end(), src->next_input_byte, + src->next_input_byte + src->bytes_in_buffer); + } + if (status == JPEG_SCAN_COMPLETED) { + cinfo->global_state = kDecProcessMarkers; + } else if (status == JPEG_REACHED_SOS) { + if (cinfo->global_state == kDecInHeader) { + cinfo->global_state = kDecHeaderDone; + } else { + PrepareForScan(cinfo); + } + } + return status; +} + +bool IsInputReady(j_decompress_ptr cinfo) { + if (cinfo->master->found_eoi_) { + return true; + } + if (cinfo->input_scan_number > cinfo->output_scan_number) { + return true; + } + if (cinfo->input_scan_number < cinfo->output_scan_number) { + return false; + } + if (cinfo->input_iMCU_row == cinfo->total_iMCU_rows) { + return true; + } + return cinfo->input_iMCU_row > + cinfo->output_iMCU_row + (cinfo->master->streaming_mode_ ? 0 : 2); +} + +bool ReadOutputPass(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (!m->pixels_) { + size_t stride = cinfo->out_color_components * cinfo->output_width; + size_t num_samples = cinfo->output_height * stride; + m->pixels_ = Allocate(cinfo, num_samples, JPOOL_IMAGE); + m->scanlines_ = + Allocate(cinfo, cinfo->output_height, JPOOL_IMAGE); + for (size_t i = 0; i < cinfo->output_height; ++i) { + m->scanlines_[i] = &m->pixels_[i * stride]; + } + } + size_t num_output_rows = 0; + while (num_output_rows < cinfo->output_height) { + if (IsInputReady(cinfo)) { + ProgressMonitorOutputPass(cinfo); + ProcessOutput(cinfo, &num_output_rows, m->scanlines_, + cinfo->output_height); + } else if (ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return false; + } + } + cinfo->output_scanline = 0; + cinfo->output_iMCU_row = 0; + return true; +} + +boolean PrepareQuantizedOutput(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->raw_data_out) { + JPEGLI_ERROR("Color quantization is not supported in raw data mode."); + } + if (m->output_data_type_ != JPEGLI_TYPE_UINT8) { + JPEGLI_ERROR("Color quantization must use 8-bit mode."); + } + if (cinfo->colormap) { + m->quant_mode_ = 3; + } else if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) { + m->quant_mode_ = 2; + } else if (cinfo->enable_1pass_quant) { + m->quant_mode_ = 1; + } else { + JPEGLI_ERROR("Invalid quantization mode change"); + } + if (m->quant_mode_ > 1 && cinfo->dither_mode == JDITHER_ORDERED) { + cinfo->dither_mode = JDITHER_FS; + } + if (m->quant_mode_ == 1) { + ChooseColorMap1Pass(cinfo); + } else if (m->quant_mode_ == 2) { + m->quant_pass_ = 0; + if (!ReadOutputPass(cinfo)) { + return FALSE; + } + ChooseColorMap2Pass(cinfo); + } + if (m->quant_mode_ == 2 || + (m->quant_mode_ == 3 && m->regenerate_inverse_colormap_)) { + CreateInverseColorMap(cinfo); + } + if (cinfo->dither_mode == JDITHER_ORDERED) { + CreateOrderedDitherTables(cinfo); + } else if (cinfo->dither_mode == JDITHER_FS) { + InitFSDitherState(cinfo); + } + m->quant_pass_ = 1; + return TRUE; +} + +void AllocateCoefficientBuffer(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + j_common_ptr comptr = reinterpret_cast(cinfo); + jvirt_barray_ptr* coef_arrays = jpegli::Allocate( + cinfo, cinfo->num_components, JPOOL_IMAGE); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + size_t height_in_blocks = + m->streaming_mode_ ? comp->v_samp_factor : comp->height_in_blocks; + coef_arrays[c] = (*cinfo->mem->request_virt_barray)( + comptr, JPOOL_IMAGE, TRUE, comp->width_in_blocks, height_in_blocks, + comp->v_samp_factor); + } + cinfo->master->coef_arrays = coef_arrays; + (*cinfo->mem->realize_virt_arrays)(comptr); +} + +void AllocateOutputBuffers(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + size_t iMCU_width = cinfo->max_h_samp_factor * m->min_scaled_dct_size; + size_t output_stride = m->iMCU_cols_ * iMCU_width; + m->need_context_rows_ = false; + for (int c = 0; c < cinfo->num_components; ++c) { + if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) { + m->need_context_rows_ = true; + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + const auto& comp = cinfo->comp_info[c]; + size_t cheight = comp.v_samp_factor * m->scaled_dct_size[c]; + int downsampled_width = output_stride / m->h_factor[c]; + m->raw_height_[c] = cinfo->total_iMCU_rows * cheight; + if (m->need_context_rows_) { + cheight *= 3; + } + m->raw_output_[c].Allocate(cinfo, cheight, downsampled_width); + } + int num_all_components = + std::max(cinfo->out_color_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + m->render_output_[c].Allocate(cinfo, cinfo->max_v_samp_factor, + output_stride); + } + m->idct_scratch_ = Allocate(cinfo, 5 * DCTSIZE2, JPOOL_IMAGE_ALIGNED); + // Padding for horizontal chroma upsampling. + constexpr size_t kPaddingLeft = 64; + constexpr size_t kPaddingRight = 64; + m->upsample_scratch_ = Allocate( + cinfo, output_stride + kPaddingLeft + kPaddingRight, JPOOL_IMAGE_ALIGNED); + size_t bytes_per_sample = jpegli_bytes_per_sample(m->output_data_type_); + size_t bytes_per_pixel = cinfo->out_color_components * bytes_per_sample; + size_t scratch_stride = RoundUpTo(output_stride, HWY_ALIGNMENT); + m->output_scratch_ = Allocate( + cinfo, bytes_per_pixel * scratch_stride, JPOOL_IMAGE_ALIGNED); + m->smoothing_scratch_ = + Allocate(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + size_t coeffs_per_block = cinfo->num_components * DCTSIZE2; + m->nonzeros_ = Allocate(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + m->sumabs_ = Allocate(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + m->biases_ = Allocate(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + m->dequant_ = Allocate(cinfo, coeffs_per_block, JPOOL_IMAGE_ALIGNED); + memset(m->dequant_, 0, coeffs_per_block * sizeof(float)); +} + +} // namespace jpegli + +void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize) { + cinfo->mem = nullptr; + if (structsize != sizeof(*cinfo)) { + JPEGLI_ERROR("jpeg_decompress_struct has wrong size."); + } + jpegli::InitMemoryManager(reinterpret_cast(cinfo)); + cinfo->is_decompressor = TRUE; + cinfo->progress = nullptr; + cinfo->src = nullptr; + for (int i = 0; i < NUM_QUANT_TBLS; i++) { + cinfo->quant_tbl_ptrs[i] = nullptr; + } + for (int i = 0; i < NUM_HUFF_TBLS; i++) { + cinfo->dc_huff_tbl_ptrs[i] = nullptr; + cinfo->ac_huff_tbl_ptrs[i] = nullptr; + } + cinfo->global_state = jpegli::kDecStart; + cinfo->sample_range_limit = nullptr; // not used + cinfo->rec_outbuf_height = 1; // output works with any buffer height + cinfo->master = new jpeg_decomp_master; + jpeg_decomp_master* m = cinfo->master; + for (int i = 0; i < 16; ++i) { + m->app_marker_parsers[i] = nullptr; + } + m->com_marker_parser = nullptr; + memset(m->markers_to_save_, 0, sizeof(m->markers_to_save_)); + jpegli::InitializeDecompressParams(cinfo); + jpegli::InitializeImage(cinfo); +} + +void jpegli_destroy_decompress(j_decompress_ptr cinfo) { + jpegli_destroy(reinterpret_cast(cinfo)); +} + +void jpegli_abort_decompress(j_decompress_ptr cinfo) { + jpegli_abort(reinterpret_cast(cinfo)); +} + +void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit) { + // TODO(szabadka) Limit our memory usage by taking into account length_limit. + jpeg_decomp_master* m = cinfo->master; + if (marker_code < 0xe0) { + JPEGLI_ERROR("jpegli_save_markers: invalid marker code %d", marker_code); + } + m->markers_to_save_[marker_code - 0xe0] = 1; +} + +void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine) { + jpeg_decomp_master* m = cinfo->master; + if (marker_code == 0xfe) { + m->com_marker_parser = routine; + } else if (marker_code >= 0xe0 && marker_code <= 0xef) { + m->app_marker_parsers[marker_code - 0xe0] = routine; + } else { + JPEGLI_ERROR("jpegli_set_marker_processor: invalid marker code %d", + marker_code); + } +} + +int jpegli_consume_input(j_decompress_ptr cinfo) { + if (cinfo->global_state == jpegli::kDecStart) { + (*cinfo->err->reset_error_mgr)(reinterpret_cast(cinfo)); + (*cinfo->src->init_source)(cinfo); + jpegli::InitializeDecompressParams(cinfo); + jpegli::InitializeImage(cinfo); + cinfo->global_state = jpegli::kDecInHeader; + } + if (cinfo->global_state == jpegli::kDecHeaderDone) { + return JPEG_REACHED_SOS; + } + if (cinfo->master->found_eoi_) { + return JPEG_REACHED_EOI; + } + if (cinfo->global_state == jpegli::kDecInHeader || + cinfo->global_state == jpegli::kDecProcessMarkers || + cinfo->global_state == jpegli::kDecProcessScan) { + return jpegli::ConsumeInput(cinfo); + } + JPEGLI_ERROR("Unexpected state %d", cinfo->global_state); + return JPEG_REACHED_EOI; // return value does not matter +} + +int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image) { + if (cinfo->global_state != jpegli::kDecStart && + cinfo->global_state != jpegli::kDecInHeader) { + JPEGLI_ERROR("jpegli_read_header: unexpected state %d", + cinfo->global_state); + } + if (cinfo->src == nullptr) { + JPEGLI_ERROR("Missing source."); + } + for (;;) { + int retcode = jpegli_consume_input(cinfo); + if (retcode == JPEG_SUSPENDED) { + return retcode; + } else if (retcode == JPEG_REACHED_SOS) { + break; + } else if (retcode == JPEG_REACHED_EOI) { + if (require_image) { + JPEGLI_ERROR("jpegli_read_header: unexpected EOI marker."); + } + jpegli_abort_decompress(cinfo); + return JPEG_HEADER_TABLES_ONLY; + } + }; + return JPEG_HEADER_OK; +} + +boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET** icc_data_ptr, + unsigned int* icc_data_len) { + if (cinfo->global_state == jpegli::kDecStart || + cinfo->global_state == jpegli::kDecInHeader) { + JPEGLI_ERROR("jpegli_read_icc_profile: unexpected state %d", + cinfo->global_state); + } + if (icc_data_ptr == nullptr || icc_data_len == nullptr) { + JPEGLI_ERROR("jpegli_read_icc_profile: invalid output buffer"); + } + jpeg_decomp_master* m = cinfo->master; + if (m->icc_profile_.empty()) { + *icc_data_ptr = nullptr; + *icc_data_len = 0; + return FALSE; + } + *icc_data_len = m->icc_profile_.size(); + *icc_data_ptr = (JOCTET*)malloc(*icc_data_len); + if (*icc_data_ptr == nullptr) { + JPEGLI_ERROR("jpegli_read_icc_profile: Out of memory"); + } + memcpy(*icc_data_ptr, m->icc_profile_.data(), *icc_data_len); + return TRUE; +} + +void jpegli_core_output_dimensions(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (!m->found_sof_) { + JPEGLI_ERROR("No SOF marker found."); + } + if (cinfo->raw_data_out) { + if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) { + JPEGLI_ERROR("Output scaling is not supported in raw output mode"); + } + } + if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) { + int dctsize = 16; + while (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * (dctsize - 1)) { + --dctsize; + } + m->min_scaled_dct_size = dctsize; + cinfo->output_width = + jpegli::DivCeil(cinfo->image_width * dctsize, DCTSIZE); + cinfo->output_height = + jpegli::DivCeil(cinfo->image_height * dctsize, DCTSIZE); + for (int c = 0; c < cinfo->num_components; ++c) { + m->scaled_dct_size[c] = m->min_scaled_dct_size; + } + } else { + cinfo->output_width = cinfo->image_width; + cinfo->output_height = cinfo->image_height; + m->min_scaled_dct_size = DCTSIZE; + for (int c = 0; c < cinfo->num_components; ++c) { + m->scaled_dct_size[c] = DCTSIZE; + } + } +} + +void jpegli_calc_output_dimensions(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + jpegli_core_output_dimensions(cinfo); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor; + m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor; + } + if (cinfo->scale_num != 1 || cinfo->scale_denom != 1) { + for (int c = 0; c < cinfo->num_components; ++c) { + // Prefer IDCT scaling over 2x upsampling. + while (m->scaled_dct_size[c] < DCTSIZE && (m->v_factor[c] % 2) == 0 && + (m->h_factor[c] % 2) == 0) { + m->scaled_dct_size[c] *= 2; + m->v_factor[c] /= 2; + m->h_factor[c] /= 2; + } + } + } + if (cinfo->out_color_space == JCS_GRAYSCALE) { + cinfo->out_color_components = 1; + } else if (cinfo->out_color_space == JCS_RGB || + cinfo->out_color_space == JCS_YCbCr) { + cinfo->out_color_components = 3; + } else if (cinfo->out_color_space == JCS_CMYK || + cinfo->out_color_space == JCS_YCCK) { + cinfo->out_color_components = 4; + } else { + cinfo->out_color_components = cinfo->num_components; + } + cinfo->output_components = + cinfo->quantize_colors ? 1 : cinfo->out_color_components; + cinfo->rec_outbuf_height = 1; +} + +boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo) { + if (cinfo->input_scan_number == 0) { + JPEGLI_ERROR("No SOS marker found."); + } + return cinfo->master->is_multiscan_; +} + +boolean jpegli_input_complete(j_decompress_ptr cinfo) { + return cinfo->master->found_eoi_; +} + +boolean jpegli_start_decompress(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->global_state == jpegli::kDecHeaderDone) { + m->streaming_mode_ = !m->is_multiscan_ && !cinfo->buffered_image && + (!cinfo->quantize_colors || !cinfo->two_pass_quantize); + jpegli::AllocateCoefficientBuffer(cinfo); + jpegli_calc_output_dimensions(cinfo); + jpegli::PrepareForScan(cinfo); + if (cinfo->quantize_colors) { + if (cinfo->colormap != nullptr) { + cinfo->enable_external_quant = TRUE; + } else if (cinfo->two_pass_quantize && + cinfo->out_color_space == JCS_RGB) { + cinfo->enable_2pass_quant = TRUE; + } else { + cinfo->enable_1pass_quant = TRUE; + } + } + jpegli::InitProgressMonitor(cinfo, /*coef_only=*/false); + jpegli::AllocateOutputBuffers(cinfo); + if (cinfo->buffered_image == TRUE) { + cinfo->output_scan_number = 0; + return TRUE; + } + } else if (!m->is_multiscan_) { + JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d", + cinfo->global_state); + } + if (m->is_multiscan_) { + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_start_decompress: unexpected state %d", + cinfo->global_state); + } + while (!m->found_eoi_) { + jpegli::ProgressMonitorInputPass(cinfo); + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return FALSE; + } + } + } + cinfo->output_scan_number = cinfo->input_scan_number; + jpegli::PrepareForOutput(cinfo); + if (cinfo->quantize_colors) { + return jpegli::PrepareQuantizedOutput(cinfo); + } else { + return TRUE; + } +} + +boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number) { + jpeg_decomp_master* m = cinfo->master; + if (!cinfo->buffered_image) { + JPEGLI_ERROR("jpegli_start_output: buffered image mode was not set"); + } + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_start_output: unexpected state %d", + cinfo->global_state); + } + cinfo->output_scan_number = std::max(1, scan_number); + if (m->found_eoi_) { + cinfo->output_scan_number = + std::min(cinfo->output_scan_number, cinfo->input_scan_number); + } + jpegli::InitProgressMonitorForOutput(cinfo); + jpegli::PrepareForOutput(cinfo); + if (cinfo->quantize_colors) { + return jpegli::PrepareQuantizedOutput(cinfo); + } else { + return TRUE; + } +} + +boolean jpegli_finish_output(j_decompress_ptr cinfo) { + if (!cinfo->buffered_image) { + JPEGLI_ERROR("jpegli_finish_output: buffered image mode was not set"); + } + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_finish_output: unexpected state %d", + cinfo->global_state); + } + // Advance input to the start of the next scan, or to the end of input. + while (cinfo->input_scan_number <= cinfo->output_scan_number && + !cinfo->master->found_eoi_) { + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return FALSE; + } + } + return TRUE; +} + +JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines) { + jpeg_decomp_master* m = cinfo->master; + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_read_scanlines: unexpected state %d", + cinfo->global_state); + } + if (cinfo->buffered_image) { + if (cinfo->output_scan_number == 0) { + JPEGLI_ERROR( + "jpegli_read_scanlines: " + "jpegli_start_output() was not called"); + } + } else if (m->is_multiscan_ && !m->found_eoi_) { + JPEGLI_ERROR( + "jpegli_read_scanlines: " + "jpegli_start_decompress() did not finish"); + } + if (cinfo->output_scanline + max_lines > cinfo->output_height) { + max_lines = cinfo->output_height - cinfo->output_scanline; + } + jpegli::ProgressMonitorOutputPass(cinfo); + size_t num_output_rows = 0; + while (num_output_rows < max_lines) { + if (jpegli::IsInputReady(cinfo)) { + jpegli::ProcessOutput(cinfo, &num_output_rows, scanlines, max_lines); + } else if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + break; + } + } + return num_output_rows; +} + +JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { + // TODO(szabadka) Skip the IDCT for skipped over blocks. + return jpegli_read_scanlines(cinfo, nullptr, num_lines); +} + +void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION* xoffset, + JDIMENSION* width) { + jpeg_decomp_master* m = cinfo->master; + if ((cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) || + cinfo->output_scanline != 0) { + JPEGLI_ERROR("jpegli_crop_decompress: unexpected state %d", + cinfo->global_state); + } + if (cinfo->raw_data_out) { + JPEGLI_ERROR("Output cropping is not supported in raw data mode"); + } + if (xoffset == nullptr || width == nullptr || *width == 0 || + *xoffset + *width > cinfo->output_width) { + JPEGLI_ERROR("jpegli_crop_scanline: Invalid arguments"); + } + // TODO(szabadka) Skip the IDCT for skipped over blocks. + size_t xend = *xoffset + *width; + size_t iMCU_width = m->min_scaled_dct_size * cinfo->max_h_samp_factor; + *xoffset = (*xoffset / iMCU_width) * iMCU_width; + *width = xend - *xoffset; + cinfo->master->xoffset_ = *xoffset; + cinfo->output_width = *width; +} + +JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines) { + if ((cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) || + !cinfo->raw_data_out) { + JPEGLI_ERROR("jpegli_read_raw_data: unexpected state %d", + cinfo->global_state); + } + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + if (max_lines < iMCU_height) { + JPEGLI_ERROR("jpegli_read_raw_data: output buffer too small"); + } + jpegli::ProgressMonitorOutputPass(cinfo); + while (!jpegli::IsInputReady(cinfo)) { + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return 0; + } + } + if (cinfo->output_iMCU_row < cinfo->total_iMCU_rows) { + jpegli::ProcessRawOutput(cinfo, data); + return iMCU_height; + } + return 0; +} + +jvirt_barray_ptr* jpegli_read_coefficients(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + m->streaming_mode_ = false; + if (!cinfo->buffered_image && cinfo->global_state == jpegli::kDecHeaderDone) { + jpegli::AllocateCoefficientBuffer(cinfo); + jpegli_calc_output_dimensions(cinfo); + jpegli::InitProgressMonitor(cinfo, /*coef_only=*/true); + jpegli::PrepareForScan(cinfo); + } + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_read_coefficients: unexpected state %d", + cinfo->global_state); + } + if (!cinfo->buffered_image) { + while (!m->found_eoi_) { + jpegli::ProgressMonitorInputPass(cinfo); + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return nullptr; + } + } + cinfo->output_scanline = cinfo->output_height; + } + return m->coef_arrays; +} + +boolean jpegli_finish_decompress(j_decompress_ptr cinfo) { + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_finish_decompress: unexpected state %d", + cinfo->global_state); + } + if (!cinfo->buffered_image && cinfo->output_scanline < cinfo->output_height) { + JPEGLI_ERROR("Incomplete output"); + } + while (!cinfo->master->found_eoi_) { + if (jpegli::ConsumeInput(cinfo) == JPEG_SUSPENDED) { + return FALSE; + } + } + (*cinfo->src->term_source)(cinfo); + jpegli_abort_decompress(cinfo); + return TRUE; +} + +boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired) { + JPEGLI_WARN("Invalid restart marker found: 0x%02x vs 0x%02x.", + cinfo->unread_marker, 0xd0 + desired); + // This is a trivial implementation, we just let the decoder skip the entire + // scan and attempt to render the partial input. + return TRUE; +} + +void jpegli_new_colormap(j_decompress_ptr cinfo) { + if (cinfo->global_state != jpegli::kDecProcessScan && + cinfo->global_state != jpegli::kDecProcessMarkers) { + JPEGLI_ERROR("jpegli_new_colormap: unexpected state %d", + cinfo->global_state); + } + if (!cinfo->buffered_image) { + JPEGLI_ERROR("jpegli_new_colormap: not in buffered image mode"); + } + if (!cinfo->enable_external_quant) { + JPEGLI_ERROR("external colormap quantizer was not enabled"); + } + if (!cinfo->quantize_colors || cinfo->colormap == nullptr) { + JPEGLI_ERROR("jpegli_new_colormap: not in external colormap mode"); + } + cinfo->master->regenerate_inverse_colormap_ = true; +} + +void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness) { + switch (data_type) { + case JPEGLI_TYPE_UINT8: + case JPEGLI_TYPE_UINT16: + case JPEGLI_TYPE_FLOAT: + cinfo->master->output_data_type_ = data_type; + break; + default: + JPEGLI_ERROR("Unsupported data type %d", data_type); + } + switch (endianness) { + case JPEGLI_NATIVE_ENDIAN: + cinfo->master->swap_endianness_ = false; + break; + case JPEGLI_LITTLE_ENDIAN: + cinfo->master->swap_endianness_ = !IsLittleEndian(); + break; + case JPEGLI_BIG_ENDIAN: + cinfo->master->swap_endianness_ = IsLittleEndian(); + break; + default: + JPEGLI_ERROR("Unsupported endianness %d", endianness); + } +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode.h b/third-party/libjxl/libjxl/lib/jpegli/decode.h new file mode 100644 index 0000000000..c862630f6b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode.h @@ -0,0 +1,106 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file conatins the C API of the decoder part of the libjpegli library, +// which is based on the C API of libjpeg, with the function names changed from +// jpeg_* to jpegli_*, while dempressor object definitions are included directly +// from jpeglib.h +// +// Applications can use the libjpegli library in one of the following ways: +// +// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function +// names of the API and link against libjpegli. +// +// (2) Leave the application code unchanged, but replace the libjpeg.so library +// with the one built by this project that is API- and ABI-compatible with +// libjpeg-turbo's version of libjpeg.so. + +#ifndef LIB_JPEGLI_DECODE_H_ +#define LIB_JPEGLI_DECODE_H_ + +#include "lib/jpegli/common.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#define jpegli_create_decompress(cinfo) \ + jpegli_CreateDecompress((cinfo), JPEG_LIB_VERSION, \ + (size_t)sizeof(struct jpeg_decompress_struct)) + +void jpegli_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize); + +void jpegli_stdio_src(j_decompress_ptr cinfo, FILE *infile); + +void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer, + unsigned long insize); + +int jpegli_read_header(j_decompress_ptr cinfo, boolean require_image); + +boolean jpegli_start_decompress(j_decompress_ptr cinfo); + +JDIMENSION jpegli_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines); + +JDIMENSION jpegli_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines); + +void jpegli_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width); + +boolean jpegli_finish_decompress(j_decompress_ptr cinfo); + +JDIMENSION jpegli_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines); + +jvirt_barray_ptr *jpegli_read_coefficients(j_decompress_ptr cinfo); + +boolean jpegli_has_multiple_scans(j_decompress_ptr cinfo); + +boolean jpegli_start_output(j_decompress_ptr cinfo, int scan_number); + +boolean jpegli_finish_output(j_decompress_ptr cinfo); + +boolean jpegli_input_complete(j_decompress_ptr cinfo); + +int jpegli_consume_input(j_decompress_ptr cinfo); + +#if JPEG_LIB_VERSION >= 80 +void jpegli_core_output_dimensions(j_decompress_ptr cinfo); +#endif +void jpegli_calc_output_dimensions(j_decompress_ptr cinfo); + +void jpegli_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit); + +void jpegli_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine); + +boolean jpegli_resync_to_restart(j_decompress_ptr cinfo, int desired); + +boolean jpegli_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, + unsigned int *icc_data_len); + +void jpegli_abort_decompress(j_decompress_ptr cinfo); + +void jpegli_destroy_decompress(j_decompress_ptr cinfo); + +void jpegli_new_colormap(j_decompress_ptr cinfo); + +// +// New API functions that are not available in libjpeg +// +// NOTE: This part of the API is still experimental and will probably change in +// the future. +// + +void jpegli_set_output_format(j_decompress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_DECODE_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_api_test.cc b/third-party/libjxl/libjxl/lib/jpegli/decode_api_test.cc new file mode 100644 index 0000000000..39dd693ce0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode_api_test.cc @@ -0,0 +1,1305 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9}; +static constexpr size_t kNumSourceBuffers = 4; + +// Custom source manager that refills the input buffer in chunks, simulating +// a file reader with a fixed buffer size. +class SourceManager { + public: + SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size) + : data_(data), len_(len), max_chunk_size_(max_chunk_size) { + pub_.skip_input_data = skip_input_data; + pub_.resync_to_restart = jpegli_resync_to_restart; + pub_.term_source = term_source; + pub_.init_source = init_source; + pub_.fill_input_buffer = fill_input_buffer; + if (max_chunk_size_ == 0) max_chunk_size_ = len; + buffers_.resize(kNumSourceBuffers, std::vector(max_chunk_size_)); + Reset(); + } + + void Reset() { + pub_.next_input_byte = nullptr; + pub_.bytes_in_buffer = 0; + pos_ = 0; + chunk_idx_ = 0; + } + + ~SourceManager() { + EXPECT_EQ(0, pub_.bytes_in_buffer); + EXPECT_EQ(len_, pos_); + } + + private: + jpeg_source_mgr pub_; + const uint8_t* data_; + size_t len_; + size_t chunk_idx_; + size_t pos_; + size_t max_chunk_size_; + std::vector> buffers_; + + static void init_source(j_decompress_ptr cinfo) {} + + static boolean fill_input_buffer(j_decompress_ptr cinfo) { + auto src = reinterpret_cast(cinfo->src); + if (src->pos_ < src->len_) { + size_t chunk_size = std::min(src->len_ - src->pos_, src->max_chunk_size_); + size_t next_idx = ++src->chunk_idx_ % kNumSourceBuffers; + uint8_t* next_buffer = src->buffers_[next_idx].data(); + memcpy(next_buffer, src->data_ + src->pos_, chunk_size); + src->pub_.next_input_byte = next_buffer; + src->pub_.bytes_in_buffer = chunk_size; + } else { + src->pub_.next_input_byte = kFakeEoiMarker; + src->pub_.bytes_in_buffer = 2; + src->len_ += 2; + } + src->pos_ += src->pub_.bytes_in_buffer; + return TRUE; + } + + static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + auto src = reinterpret_cast(cinfo->src); + if (num_bytes <= 0) { + return; + } + if (src->pub_.bytes_in_buffer >= static_cast(num_bytes)) { + src->pub_.bytes_in_buffer -= num_bytes; + src->pub_.next_input_byte += num_bytes; + } else { + src->pos_ += num_bytes - src->pub_.bytes_in_buffer; + src->pub_.bytes_in_buffer = 0; + } + } + + static void term_source(j_decompress_ptr cinfo) {} +}; + +uint8_t markers_seen[kMarkerSequenceLen]; +size_t num_markers_seen = 0; + +uint8_t get_next_byte(j_decompress_ptr cinfo) { + if (cinfo->src->bytes_in_buffer == 0) { + (*cinfo->src->fill_input_buffer)(cinfo); + } + cinfo->src->bytes_in_buffer--; + return *cinfo->src->next_input_byte++; +} + +boolean test_marker_processor(j_decompress_ptr cinfo) { + markers_seen[num_markers_seen] = cinfo->unread_marker; + size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo); + EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len); + if (marker_len > 2) { + (*cinfo->src->skip_input_data)(cinfo, marker_len - 2); + } + ++num_markers_seen; + return TRUE; +} + +void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo, + TestImage* output) { + JDIMENSION xoffset = 0; + JDIMENSION yoffset = 0; + JDIMENSION xsize_cropped = cinfo->output_width; + JDIMENSION ysize_cropped = cinfo->output_height; + if (dparams.crop_output) { + xoffset = xsize_cropped = cinfo->output_width / 3; + yoffset = ysize_cropped = cinfo->output_height / 3; + jpegli_crop_scanline(cinfo, &xoffset, &xsize_cropped); + } + output->ysize = ysize_cropped; + output->xsize = cinfo->output_width; + output->components = cinfo->out_color_components; + output->data_type = dparams.data_type; + output->endianness = dparams.endianness; + size_t bytes_per_sample = jpegli_bytes_per_sample(dparams.data_type); + if (cinfo->raw_data_out) { + output->color_space = cinfo->jpeg_color_space; + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + std::vector plane(ysize * xsize * bytes_per_sample); + output->raw_data.emplace_back(std::move(plane)); + } + } else { + output->color_space = cinfo->out_color_space; + output->AllocatePixels(); + } + size_t total_output_lines = 0; + while (cinfo->output_scanline < cinfo->output_height) { + size_t max_lines; + size_t num_output_lines; + if (cinfo->raw_data_out) { + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height); + max_lines = iMCU_height; + std::vector> rowdata(cinfo->num_components); + std::vector data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE; + rowdata[c].resize(num_lines); + size_t y0 = cinfo->output_iMCU_row * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr; + } + data[c] = &rowdata[c][0]; + } + num_output_lines = jpegli_read_raw_data(cinfo, &data[0], max_lines); + } else { + size_t max_output_lines = dparams.max_output_lines; + if (max_output_lines == 0) max_output_lines = cinfo->output_height; + if (cinfo->output_scanline < yoffset) { + max_lines = yoffset - cinfo->output_scanline; + num_output_lines = jpegli_skip_scanlines(cinfo, max_lines); + } else if (cinfo->output_scanline >= yoffset + ysize_cropped) { + max_lines = cinfo->output_height - cinfo->output_scanline; + num_output_lines = jpegli_skip_scanlines(cinfo, max_lines); + } else { + size_t lines_left = yoffset + ysize_cropped - cinfo->output_scanline; + max_lines = std::min(max_output_lines, lines_left); + size_t stride = cinfo->output_width * cinfo->out_color_components * + bytes_per_sample; + std::vector scanlines(max_lines); + for (size_t i = 0; i < max_lines; ++i) { + size_t yidx = cinfo->output_scanline - yoffset + i; + scanlines[i] = &output->pixels[yidx * stride]; + } + num_output_lines = + jpegli_read_scanlines(cinfo, &scanlines[0], max_lines); + if (cinfo->quantize_colors) { + for (size_t i = 0; i < num_output_lines; ++i) { + UnmapColors(scanlines[i], cinfo->output_width, + cinfo->out_color_components, cinfo->colormap, + cinfo->actual_number_of_colors); + } + } + } + } + total_output_lines += num_output_lines; + EXPECT_EQ(total_output_lines, cinfo->output_scanline); + EXPECT_EQ(num_output_lines, max_lines); + } + EXPECT_EQ(cinfo->total_iMCU_rows, + DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE)); +} + +struct TestConfig { + std::string fn; + std::string fn_desc; + TestImage input; + CompressParams jparams; + DecompressParams dparams; + bool compare_to_orig = false; + float max_tolerance_factor = 1.01f; + float max_rms_dist = 1.0f; + float max_diff = 35.0f; +}; + +std::vector GetTestJpegData(TestConfig& config) { + std::vector compressed; + if (!config.fn.empty()) { + compressed = ReadTestData(config.fn.c_str()); + } else { + GeneratePixels(&config.input); + JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed)); + } + if (config.dparams.size_factor < 1.0f) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + return compressed; +} + +void TestAPINonBuffered(const CompressParams& jparams, + const DecompressParams& dparams, + const TestImage& expected_output, + j_decompress_ptr cinfo, TestImage* output) { + if (jparams.add_marker) { + jpegli_save_markers(cinfo, kSpecialMarker0, 0xffff); + jpegli_save_markers(cinfo, kSpecialMarker1, 0xffff); + num_markers_seen = 0; + jpegli_set_marker_processor(cinfo, 0xe6, test_marker_processor); + jpegli_set_marker_processor(cinfo, 0xe7, test_marker_processor); + jpegli_set_marker_processor(cinfo, 0xe8, test_marker_processor); + } + if (!jparams.icc.empty()) { + jpegli_save_markers(cinfo, JPEG_APP0 + 2, 0xffff); + } + jpegli_read_header(cinfo, /*require_image=*/TRUE); + if (jparams.add_marker) { + EXPECT_EQ(num_markers_seen, kMarkerSequenceLen); + EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen)); + } + if (!jparams.icc.empty()) { + uint8_t* icc_data = nullptr; + unsigned int icc_len; + JXL_CHECK(jpegli_read_icc_profile(cinfo, &icc_data, &icc_len)); + JXL_CHECK(icc_data); + EXPECT_EQ(0, memcmp(jparams.icc.data(), icc_data, icc_len)); + free(icc_data); + } + // Check that jpegli_calc_output_dimensions can be called multiple times + // even with different parameters. + if (!cinfo->raw_data_out) { + cinfo->scale_num = 1; + cinfo->scale_denom = 2; + } + jpegli_calc_output_dimensions(cinfo); + SetDecompressParams(dparams, cinfo); + jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness); + VerifyHeader(jparams, cinfo); + jpegli_calc_output_dimensions(cinfo); + EXPECT_LE(expected_output.xsize, cinfo->output_width); + if (!dparams.crop_output) { + EXPECT_EQ(expected_output.xsize, cinfo->output_width); + } + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(cinfo, coef_arrays, output); + } else { + jpegli_start_decompress(cinfo); + VerifyScanHeader(jparams, cinfo); + ReadOutputImage(dparams, cinfo, output); + } + jpegli_finish_decompress(cinfo); +} + +void TestAPIBuffered(const CompressParams& jparams, + const DecompressParams& dparams, j_decompress_ptr cinfo, + std::vector* output_progression) { + EXPECT_EQ(JPEG_REACHED_SOS, + jpegli_read_header(cinfo, /*require_image=*/TRUE)); + cinfo->buffered_image = TRUE; + SetDecompressParams(dparams, cinfo); + jpegli_set_output_format(cinfo, dparams.data_type, dparams.endianness); + VerifyHeader(jparams, cinfo); + EXPECT_TRUE(jpegli_start_decompress(cinfo)); + // start decompress should not read the whole input in buffered image mode + EXPECT_FALSE(jpegli_input_complete(cinfo)); + bool has_multiple_scans = jpegli_has_multiple_scans(cinfo); + EXPECT_EQ(0, cinfo->output_scan_number); + int sos_marker_cnt = 1; // read_header reads the first SOS marker + while (!jpegli_input_complete(cinfo)) { + EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt); + if (dparams.skip_scans && (cinfo->input_scan_number % 2) != 1) { + int result = JPEG_SUSPENDED; + while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) { + result = jpegli_consume_input(cinfo); + } + if (result == JPEG_REACHED_SOS) ++sos_marker_cnt; + continue; + } + SetScanDecompressParams(dparams, cinfo, cinfo->input_scan_number); + EXPECT_TRUE(jpegli_start_output(cinfo, cinfo->input_scan_number)); + // start output sets output_scan_number, but does not change + // input_scan_number + EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number); + EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt); + VerifyScanHeader(jparams, cinfo); + TestImage output; + ReadOutputImage(dparams, cinfo, &output); + output_progression->emplace_back(std::move(output)); + // read scanlines/read raw data does not change input/output scan number + EXPECT_EQ(cinfo->input_scan_number, sos_marker_cnt); + EXPECT_EQ(cinfo->output_scan_number, cinfo->input_scan_number); + EXPECT_TRUE(jpegli_finish_output(cinfo)); + ++sos_marker_cnt; // finish output reads the next SOS marker or EOI + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(cinfo, coef_arrays, &output_progression->back()); + } + } + jpegli_finish_decompress(cinfo); + if (dparams.size_factor == 1.0f) { + EXPECT_EQ(has_multiple_scans, cinfo->input_scan_number > 1); + } +} + +TEST(DecodeAPITest, ReuseCinfo) { + TestImage input, output, expected; + std::vector output_progression, expected_output_progression; + CompressParams jparams; + DecompressParams dparams; + std::vector compressed; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + input.xsize = 129; + input.ysize = 73; + GeneratePixels(&input); + for (int h_samp : {2, 1}) { + for (int v_samp : {2, 1}) { + for (int progr : {0, 2}) { + jparams.h_sampling = {h_samp, 1, 1}; + jparams.v_sampling = {v_samp, 1, 1}; + jparams.progressive_mode = progr; + printf( + "Generating input with %dx%d chroma subsampling " + "progressive level %d\n", + h_samp, v_samp, progr); + JXL_CHECK(EncodeWithJpegli(input, jparams, &compressed)); + for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) { + for (bool crop : {true, false}) { + if (crop && output_mode != PIXELS) continue; + for (int scale_num : {1, 2, 3, 4, 7, 8, 13, 16}) { + if (scale_num != 8 && output_mode != PIXELS) continue; + int scale_denom = 8; + while (scale_num % 2 == 0 && scale_denom % 2 == 0) { + scale_num /= 2; + scale_denom /= 2; + } + printf("Decoding with output mode %d output scaling %d/%d %s\n", + output_mode, scale_num, scale_denom, + crop ? "with cropped output" : ""); + dparams.output_mode = output_mode; + dparams.scale_num = scale_num; + dparams.scale_denom = scale_denom; + expected.Clear(); + DecodeWithLibjpeg(jparams, dparams, compressed, &expected); + output.Clear(); + cinfo.buffered_image = false; + cinfo.raw_data_out = false; + cinfo.scale_num = cinfo.scale_denom = 1; + SourceManager src(compressed.data(), compressed.size(), + 1u << 12); + cinfo.src = reinterpret_cast(&src); + jpegli_read_header(&cinfo, /*require_image=*/TRUE); + jpegli_abort_decompress(&cinfo); + src.Reset(); + TestAPINonBuffered(jparams, dparams, expected, &cinfo, &output); + float max_rms = output_mode == COEFFICIENTS ? 0.0f : 1.0f; + if (scale_num == 1 && scale_denom == 8 && h_samp != v_samp) { + max_rms = 5.0f; // libjpeg does not do fancy upsampling + } + VerifyOutputImage(expected, output, max_rms); + printf("Decoding in buffered image mode\n"); + expected_output_progression.clear(); + DecodeAllScansWithLibjpeg(jparams, dparams, compressed, + &expected_output_progression); + output_progression.clear(); + src.Reset(); + TestAPIBuffered(jparams, dparams, &cinfo, &output_progression); + JXL_CHECK(output_progression.size() == + expected_output_progression.size()); + for (size_t i = 0; i < output_progression.size(); ++i) { + const TestImage& output = output_progression[i]; + const TestImage& expected = expected_output_progression[i]; + VerifyOutputImage(expected, output, max_rms); + } + } + } + } + } + } + } + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +std::vector GenerateBasicConfigs() { + std::vector all_configs; + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + TestConfig config; + config.input.xsize = 257 + samp * 37; + config.input.ysize = 265 + (progr / 2) * 17; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + GeneratePixels(&config.input); + all_configs.push_back(config); + } + } + return all_configs; +} + +TEST(DecodeAPITest, ReuseCinfoSameMemSource) { + std::vector all_configs = GenerateBasicConfigs(); + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + std::vector all_outputs(all_configs.size()); + { + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, buffer, buffer_size); + for (size_t i = 0; i < all_configs.size(); ++i) { + TestAPINonBuffered(all_configs[i].jparams, DecompressParams(), + all_configs[i].input, &cinfo, &all_outputs[i]); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + } + for (size_t i = 0; i < all_configs.size(); ++i) { + VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f); + } + if (buffer) free(buffer); +} + +TEST(DecodeAPITest, ReuseCinfoSameStdSource) { + std::vector all_configs = GenerateBasicConfigs(); + FILE* tmpf = tmpfile(); + JXL_CHECK(tmpf); + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_stdio_dest(&cinfo, tmpf); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + rewind(tmpf); + std::vector all_outputs(all_configs.size()); + { + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_stdio_src(&cinfo, tmpf); + for (size_t i = 0; i < all_configs.size(); ++i) { + TestAPINonBuffered(all_configs[i].jparams, DecompressParams(), + all_configs[i].input, &cinfo, &all_outputs[i]); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + } + for (size_t i = 0; i < all_configs.size(); ++i) { + VerifyOutputImage(all_configs[i].input, all_outputs[i], 2.35f); + } + fclose(tmpf); +} + +TEST(DecodeAPITest, AbbreviatedStreams) { + uint8_t* table_stream = nullptr; + unsigned long table_stream_size = 0; + uint8_t* data_stream = nullptr; + unsigned long data_stream_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size); + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + jpegli_write_tables(&cinfo); + jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.optimize_coding = FALSE; + jpegli_set_progressive_level(&cinfo, 0); + jpegli_start_compress(&cinfo, FALSE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + EXPECT_LT(data_stream_size, 50); + jpegli_destroy_compress(&cinfo); + } + { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, table_stream, table_stream_size); + jpegli_read_header(&cinfo, FALSE); + jpegli_mem_src(&cinfo, data_stream, data_stream_size); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + EXPECT_EQ(3, cinfo.num_components); + jpegli_start_decompress(&cinfo); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_read_scanlines(&cinfo, row, 1); + EXPECT_EQ(0, image[0]); + EXPECT_EQ(0, image[1]); + EXPECT_EQ(0, image[2]); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + } + if (table_stream) free(table_stream); + if (data_stream) free(data_stream); +} + +class DecodeAPITestParam : public ::testing::TestWithParam {}; + +TEST_P(DecodeAPITestParam, TestAPI) { + TestConfig config = GetParam(); + const DecompressParams& dparams = config.dparams; + if (dparams.skip_scans) return; + const std::vector compressed = GetTestJpegData(config); + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size); + + TestImage output1; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1); + + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast(&src); + TestAPINonBuffered(config.jparams, dparams, output1, &cinfo, &output0); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + if (config.compare_to_orig) { + double rms0 = DistanceRms(config.input, output0); + double rms1 = DistanceRms(config.input, output1); + printf("rms: %f vs %f\n", rms0, rms1); + EXPECT_LE(rms0, rms1 * config.max_tolerance_factor); + } else { + VerifyOutputImage(output0, output1, config.max_rms_dist, config.max_diff); + } +} + +class DecodeAPITestParamBuffered : public ::testing::TestWithParam { +}; + +TEST_P(DecodeAPITestParamBuffered, TestAPI) { + TestConfig config = GetParam(); + const DecompressParams& dparams = config.dparams; + const std::vector compressed = GetTestJpegData(config); + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size); + + std::vector output_progression1; + DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed, + &output_progression1); + + std::vector output_progression0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast(&src); + TestAPIBuffered(config.jparams, dparams, &cinfo, &output_progression0); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + ASSERT_EQ(output_progression0.size(), output_progression1.size()); + for (size_t i = 0; i < output_progression0.size(); ++i) { + const TestImage& output = output_progression0[i]; + const TestImage& expected = output_progression1[i]; + if (config.compare_to_orig) { + double rms0 = DistanceRms(config.input, output); + double rms1 = DistanceRms(config.input, expected); + printf("rms: %f vs %f\n", rms0, rms1); + EXPECT_LE(rms0, rms1 * config.max_tolerance_factor); + } else { + VerifyOutputImage(expected, output, config.max_rms_dist, config.max_diff); + } + } +} + +std::vector GenerateTests(bool buffered) { + std::vector all_tests; + { + std::vector> testfiles({ + {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"}, + {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"}, + {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"}, + }); + for (size_t i = 0; i < (buffered ? 1u : testfiles.size()); ++i) { + TestConfig config; + config.fn = testfiles[i].first; + config.fn_desc = testfiles[i].second; + for (size_t chunk_size : {0, 1, 64, 65536}) { + config.dparams.chunk_size = chunk_size; + for (size_t max_output_lines : {0, 1, 8, 16}) { + config.dparams.max_output_lines = max_output_lines; + config.dparams.output_mode = PIXELS; + all_tests.push_back(config); + } + { + config.dparams.max_output_lines = 16; + config.dparams.output_mode = RAW_DATA; + all_tests.push_back(config); + } + } + } + } + + { + std::vector> testfiles({ + {"jxl/flower/flower_small.q85_444_non_interleaved.jpg", + "Q85YUV444NonInterleaved"}, + {"jxl/flower/flower_small.q85_420_non_interleaved.jpg", + "Q85YUV420NonInterleaved"}, + {"jxl/flower/flower_small.q85_444_partially_interleaved.jpg", + "Q85YUV444PartiallyInterleaved"}, + {"jxl/flower/flower_small.q85_420_partially_interleaved.jpg", + "Q85YUV420PartiallyInterleaved"}, + {"jxl/flower/flower.png.im_q85_422.jpg", "Q85YUV422"}, + {"jxl/flower/flower.png.im_q85_440.jpg", "Q85YUV440"}, + {"jxl/flower/flower.png.im_q85_444_1x2.jpg", "Q85YUV444_1x2"}, + {"jxl/flower/flower.png.im_q85_asymmetric.jpg", "Q85Asymmetric"}, + {"jxl/flower/flower.png.im_q85_gray.jpg", "Q85Gray"}, + {"jxl/flower/flower.png.im_q85_luma_subsample.jpg", "Q85LumaSubsample"}, + {"jxl/flower/flower.png.im_q85_rgb.jpg", "Q85RGB"}, + {"jxl/flower/flower.png.im_q85_rgb_subsample_blue.jpg", + "Q85RGBSubsampleBlue"}, + {"jxl/flower/flower_small.cmyk.jpg", "CMYK"}, + }); + for (size_t i = 0; i < (buffered ? 4u : testfiles.size()); ++i) { + for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) { + TestConfig config; + config.fn = testfiles[i].first; + config.fn_desc = testfiles[i].second; + config.dparams.output_mode = output_mode; + all_tests.push_back(config); + } + } + } + + // Tests for common chroma subsampling and output modes. + for (JpegIOMode output_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) { + for (int h_samp : {1, 2}) { + for (int v_samp : {1, 2}) { + for (bool fancy : {true, false}) { + if (!fancy && (output_mode != PIXELS || h_samp * v_samp == 1)) { + continue; + } + TestConfig config; + config.dparams.output_mode = output_mode; + config.dparams.do_fancy_upsampling = fancy; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h_samp, 1, 1}; + config.jparams.v_sampling = {v_samp, 1, 1}; + if (output_mode == COEFFICIENTS) { + config.max_rms_dist = 0.0f; + } + all_tests.push_back(config); + } + } + } + } + + // Tests for partial input. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) { + for (int progr : {0, 1, 3}) { + for (int samp : {1, 2}) { + for (bool skip_scans : {false, true}) { + if (skip_scans && (progr != 1 || size_factor < 0.5f)) continue; + for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) { + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.dparams.size_factor = size_factor; + config.dparams.output_mode = output_mode; + config.dparams.skip_scans = skip_scans; + // The last partially available block can behave differently. + // TODO(szabadka) Figure out if we can make the behaviour more + // similar. + config.max_rms_dist = samp == 1 ? 1.75f : 3.0f; + config.max_diff = 255.0f; + all_tests.push_back(config); + } + } + } + } + } + + // Tests for block smoothing. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) { + for (int samp : {1, 2}) { + for (bool skip_scans : {false, true}) { + if (skip_scans && size_factor < 0.3f) continue; + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = 2; + config.dparams.size_factor = size_factor; + config.dparams.do_block_smoothing = true; + config.dparams.skip_scans = skip_scans; + // libjpeg does smoothing for incomplete scans differently at + // the border between current and previous scans. + config.max_rms_dist = 8.0f; + config.max_diff = 255.0f; + all_tests.push_back(config); + } + } + } + + // Test for switching output color quantization modes between scans. + if (buffered) { + TestConfig config; + config.jparams.progressive_mode = 2; + config.dparams.quantize_colors = true; + config.dparams.scan_params = { + {3, JDITHER_NONE, CQUANT_1PASS}, {4, JDITHER_ORDERED, CQUANT_1PASS}, + {5, JDITHER_FS, CQUANT_1PASS}, {6, JDITHER_NONE, CQUANT_EXTERNAL}, + {8, JDITHER_NONE, CQUANT_REUSE}, {9, JDITHER_NONE, CQUANT_EXTERNAL}, + {10, JDITHER_NONE, CQUANT_2PASS}, {11, JDITHER_NONE, CQUANT_REUSE}, + {12, JDITHER_NONE, CQUANT_2PASS}, {13, JDITHER_FS, CQUANT_2PASS}, + }; + config.compare_to_orig = true; + config.max_tolerance_factor = 1.04f; + all_tests.push_back(config); + } + + if (buffered) { + return all_tests; + } + + // Tests for output color quantization. + for (int num_colors : {8, 64, 256}) { + for (ColorQuantMode mode : {CQUANT_1PASS, CQUANT_EXTERNAL, CQUANT_2PASS}) { + if (mode == CQUANT_EXTERNAL && num_colors != 256) continue; + for (J_DITHER_MODE dither : {JDITHER_NONE, JDITHER_ORDERED, JDITHER_FS}) { + if (mode == CQUANT_EXTERNAL && dither != JDITHER_NONE) continue; + if (mode != CQUANT_1PASS && dither == JDITHER_ORDERED) continue; + for (bool crop : {false, true}) { + for (bool scale : {false, true}) { + for (bool samp : {false, true}) { + if ((num_colors != 256) && (crop || scale || samp)) { + continue; + } + if (mode == CQUANT_2PASS && crop) continue; + TestConfig config; + config.input.xsize = 1024; + config.input.ysize = 768; + config.dparams.quantize_colors = true; + config.dparams.desired_number_of_colors = num_colors; + config.dparams.scan_params = {{kLastScan, dither, mode}}; + config.dparams.crop_output = crop; + if (scale) { + config.dparams.scale_num = 7; + config.dparams.scale_denom = 8; + } + if (samp) { + config.jparams.h_sampling = {2, 1, 1}; + config.jparams.v_sampling = {2, 1, 1}; + } + if (!scale && !crop) { + config.compare_to_orig = true; + if (dither != JDITHER_NONE) { + config.max_tolerance_factor = 1.05f; + } + if (mode == CQUANT_2PASS && + (num_colors == 8 || dither == JDITHER_FS)) { + // TODO(szabadka) Lower this bound. + config.max_tolerance_factor = 1.5f; + } + } else { + // We only test for buffer overflows, etc. + config.max_rms_dist = 100.0f; + config.max_diff = 255.0f; + } + all_tests.push_back(config); + } + } + } + } + } + } + + // Tests for output formats. + for (JpegliDataType type : + {JPEGLI_TYPE_UINT8, JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) { + for (JpegliEndianness endianness : + {JPEGLI_NATIVE_ENDIAN, JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN}) { + if (type == JPEGLI_TYPE_UINT8 && endianness != JPEGLI_NATIVE_ENDIAN) { + continue; + } + for (int channels = 1; channels <= 4; ++channels) { + TestConfig config; + config.dparams.data_type = type; + config.dparams.endianness = endianness; + config.input.color_space = JCS_UNKNOWN; + config.input.components = channels; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = JCS_UNKNOWN; + all_tests.push_back(config); + } + } + } + // Test for output cropping. + { + TestConfig config; + config.dparams.crop_output = true; + all_tests.push_back(config); + } + // Tests for color transforms. + for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_GRAYSCALE}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = JCS_GRAYSCALE; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = out_color_space; + all_tests.push_back(config); + } + for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) { + for (J_COLOR_SPACE out_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) { + if (jpeg_color_space == JCS_RGB && out_color_space == JCS_YCbCr) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = out_color_space; + all_tests.push_back(config); + } + } + for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) { + for (J_COLOR_SPACE out_color_space : {JCS_CMYK, JCS_YCCK}) { + if (jpeg_color_space == JCS_CMYK && out_color_space == JCS_YCCK) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = JCS_CMYK; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.dparams.set_out_color_space = true; + config.dparams.out_color_space = out_color_space; + all_tests.push_back(config); + } + } + // Tests for progressive levels. + for (int p = 0; p < 3 + NumTestScanScripts(); ++p) { + TestConfig config; + config.jparams.progressive_mode = p; + all_tests.push_back(config); + } + // Tests for RST markers. + for (size_t r : {1, 17, 1024}) { + for (size_t chunk_size : {1, 65536}) { + for (int progr : {0, 2}) { + TestConfig config; + config.dparams.chunk_size = chunk_size; + config.jparams.progressive_mode = progr; + config.jparams.restart_interval = r; + all_tests.push_back(config); + } + } + } + for (size_t rr : {1, 3, 8, 100}) { + TestConfig config; + config.jparams.restart_in_rows = rr; + all_tests.push_back(config); + } + // Tests for custom quantization tables. + for (int type : {0, 1, 10, 100, 10000}) { + for (int scale : {1, 50, 100, 200, 500}) { + for (bool add_raw : {false, true}) { + for (bool baseline : {true, false}) { + if (!baseline && (add_raw || type * scale < 25500)) continue; + TestConfig config; + config.input.xsize = 64; + config.input.ysize = 64; + CustomQuantTable table; + table.table_type = type; + table.scale_factor = scale; + table.force_baseline = baseline; + table.add_raw = add_raw; + table.Generate(); + config.jparams.quant_tables.push_back(table); + config.jparams.quant_indexes = {0, 0, 0}; + config.compare_to_orig = true; + config.max_tolerance_factor = 1.02; + all_tests.push_back(config); + } + } + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + if (qidx == 3) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + all_tests.push_back(config); + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (int slot_idx = 0; slot_idx < 2; ++slot_idx) { + if (qidx == 0 && slot_idx == 0) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + CustomQuantTable table; + table.slot_idx = slot_idx; + table.Generate(); + config.jparams.quant_tables.push_back(table); + all_tests.push_back(config); + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.compare_to_orig = true; + all_tests.push_back(config); + } + } + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {0, 1, 2}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 2; + table.table_type = 30; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.compare_to_orig = true; + all_tests.push_back(config); + } + // Tests for fixed (and custom) prefix codes. + for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr}) { + for (bool flat_dc_luma : {false, true}) { + TestConfig config; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.jparams.use_flat_dc_luma_code = flat_dc_luma; + all_tests.push_back(config); + } + } + for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) { + for (bool flat_dc_luma : {false, true}) { + TestConfig config; + config.input.color_space = JCS_CMYK; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.jparams.use_flat_dc_luma_code = flat_dc_luma; + all_tests.push_back(config); + } + } + // Test for jpeg without DHT marker. + { + TestConfig config; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.jparams.omit_standard_tables = true; + all_tests.push_back(config); + } + // Test for custom component ids. + { + TestConfig config; + config.input.xsize = config.input.ysize = 128; + config.jparams.comp_ids = {7, 17, 177}; + all_tests.push_back(config); + } + // Tests for JFIF/Adobe markers. + for (int override_JFIF : {-1, 0, 1}) { + for (int override_Adobe : {-1, 0, 1}) { + if (override_JFIF == -1 && override_Adobe == -1) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 128; + config.jparams.override_JFIF = override_JFIF; + config.jparams.override_Adobe = override_Adobe; + all_tests.push_back(config); + } + } + // Tests for small images. + for (int xsize : {1, 7, 8, 9, 15, 16, 17}) { + for (int ysize : {1, 7, 8, 9, 15, 16, 17}) { + TestConfig config; + config.input.xsize = xsize; + config.input.ysize = ysize; + all_tests.push_back(config); + } + } + // Tests for custom marker processor. + for (size_t chunk_size : {0, 1, 64, 65536}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.dparams.chunk_size = chunk_size; + config.jparams.add_marker = true; + all_tests.push_back(config); + } + // Tests for icc profile decoding. + for (size_t icc_size : {728, 70000, 1000000}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.jparams.icc.resize(icc_size); + for (size_t i = 0; i < icc_size; ++i) { + config.jparams.icc[i] = (i * 17) & 0xff; + } + all_tests.push_back(config); + } + // Tests for unusual sampling factors. + for (int h0_samp : {1, 2, 3, 4}) { + for (int v0_samp : {1, 2, 3, 4}) { + for (int dxb = 0; dxb < h0_samp; ++dxb) { + for (int dyb = 0; dyb < v0_samp; ++dyb) { + for (int dx = 0; dx < 2; ++dx) { + for (int dy = 0; dy < 2; ++dy) { + TestConfig config; + config.input.xsize = 128 + dyb * 8 + dy; + config.input.ysize = 256 + dxb * 8 + dx; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, 1}; + config.jparams.v_sampling = {v0_samp, 1, 1}; + config.compare_to_orig = true; + all_tests.push_back(config); + } + } + } + } + } + } + for (int h0_samp : {1, 2, 4}) { + for (int v0_samp : {1, 2, 4}) { + for (int h2_samp : {1, 2, 4}) { + for (int v2_samp : {1, 2, 4}) { + TestConfig config; + config.input.xsize = 137; + config.input.ysize = 75; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + config.compare_to_orig = true; + all_tests.push_back(config); + } + } + } + } + for (int h0_samp : {1, 3}) { + for (int v0_samp : {1, 3}) { + for (int h2_samp : {1, 3}) { + for (int v2_samp : {1, 3}) { + TestConfig config; + config.input.xsize = 205; + config.input.ysize = 99; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + all_tests.push_back(config); + } + } + } + } + // Tests for output scaling. + for (int scale_num = 1; scale_num <= 16; ++scale_num) { + if (scale_num == 8) continue; + for (bool crop : {false, true}) { + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + TestConfig config; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.dparams.scale_num = scale_num; + config.dparams.scale_denom = 8; + config.dparams.crop_output = crop; + all_tests.push_back(config); + } + } + } + } + return all_tests; +} + +std::string QuantMode(ColorQuantMode mode) { + switch (mode) { + case CQUANT_1PASS: + return "1pass"; + case CQUANT_EXTERNAL: + return "External"; + case CQUANT_2PASS: + return "2pass"; + case CQUANT_REUSE: + return "Reuse"; + } + return ""; +} + +std::string DitherMode(J_DITHER_MODE mode) { + switch (mode) { + case JDITHER_NONE: + return "No"; + case JDITHER_ORDERED: + return "Ordered"; + case JDITHER_FS: + return "FS"; + } + return ""; +} + +std::ostream& operator<<(std::ostream& os, const DecompressParams& dparams) { + if (dparams.chunk_size == 0) { + os << "CompleteInput"; + } else { + os << "InputChunks" << dparams.chunk_size; + } + if (dparams.size_factor < 1.0f) { + os << "Partial" << static_cast(dparams.size_factor * 100) << "p"; + } + if (dparams.max_output_lines == 0) { + os << "CompleteOutput"; + } else { + os << "OutputLines" << dparams.max_output_lines; + } + if (dparams.output_mode == RAW_DATA) { + os << "RawDataOut"; + } else if (dparams.output_mode == COEFFICIENTS) { + os << "CoeffsOut"; + } + os << IOMethodName(dparams.data_type, dparams.endianness); + if (dparams.set_out_color_space) { + os << "OutColor" << ColorSpaceName((J_COLOR_SPACE)dparams.out_color_space); + } + if (dparams.crop_output) { + os << "Crop"; + } + if (dparams.do_block_smoothing) { + os << "BlockSmoothing"; + } + if (!dparams.do_fancy_upsampling) { + os << "NoFancyUpsampling"; + } + if (dparams.scale_num != 1 || dparams.scale_denom != 1) { + os << "Scale" << dparams.scale_num << "_" << dparams.scale_denom; + } + if (dparams.quantize_colors) { + os << "Quant" << dparams.desired_number_of_colors << "colors"; + for (size_t i = 0; i < dparams.scan_params.size(); ++i) { + if (i > 0) os << "_"; + const auto& sparam = dparams.scan_params[i]; + os << QuantMode(sparam.color_quant_mode); + os << DitherMode((J_DITHER_MODE)sparam.dither_mode) << "Dither"; + } + } + if (dparams.skip_scans) { + os << "SkipScans"; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + if (!c.fn.empty()) { + os << c.fn_desc; + } else { + os << c.input; + } + os << c.jparams; + os << c.dparams; + return os; +} + +std::string TestDescription(const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITest, DecodeAPITestParam, + testing::ValuesIn(GenerateTests(false)), + TestDescription); + +JPEGLI_INSTANTIATE_TEST_SUITE_P(DecodeAPITestBuffered, + DecodeAPITestParamBuffered, + testing::ValuesIn(GenerateTests(true)), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_internal.h b/third-party/libjxl/libjxl/lib/jpegli/decode_internal.h new file mode 100644 index 0000000000..ed7baa39e9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode_internal.h @@ -0,0 +1,151 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DECODE_INTERNAL_H_ +#define LIB_JPEGLI_DECODE_INTERNAL_H_ + +#include +#include + +#include + +#include "lib/jpegli/common.h" +#include "lib/jpegli/common_internal.h" +#include "lib/jpegli/huffman.h" + +namespace jpegli { + +static constexpr int kNeedMoreInput = 100; +static constexpr int kHandleRestart = 101; +static constexpr int kHandleMarkerProcessor = 102; +static constexpr int kProcessNextMarker = 103; +static constexpr size_t kAllHuffLutSize = NUM_HUFF_TBLS * kJpegHuffmanLutSize; + +typedef int16_t coeff_t; + +// State of the decoder that has to be saved before decoding one MCU in case +// we run out of the bitstream. +struct MCUCodingState { + coeff_t last_dc_coeff[kMaxComponents]; + int eobrun; + coeff_t coeffs[D_MAX_BLOCKS_IN_MCU * DCTSIZE2]; +}; + +} // namespace jpegli + +// Use this forward-declared libjpeg struct to hold all our private variables. +// TODO(szabadka) Remove variables that have a corresponding version in cinfo. +struct jpeg_decomp_master { + // + // Input handling state. + // + std::vector input_buffer_; + size_t input_buffer_pos_; + // Number of bits after codestream_pos_ that were already processed. + size_t codestream_bits_ahead_; + bool streaming_mode_; + + // Coefficient buffers + jvirt_barray_ptr* coef_arrays; + JBLOCKARRAY coeff_rows[jpegli::kMaxComponents]; + + // + // Marker data processing state. + // + bool found_soi_; + bool found_dri_; + bool found_sof_; + bool found_eoi_; + size_t icc_index_; + size_t icc_total_; + std::vector icc_profile_; + jpegli::HuffmanTableEntry dc_huff_lut_[jpegli::kAllHuffLutSize]; + jpegli::HuffmanTableEntry ac_huff_lut_[jpegli::kAllHuffLutSize]; + uint8_t markers_to_save_[32]; + jpeg_marker_parser_method app_marker_parsers[16]; + jpeg_marker_parser_method com_marker_parser; + // Whether this jpeg has multiple scans (progressive or non-interleaved + // sequential). + bool is_multiscan_; + + // Fields defined by SOF marker. + size_t iMCU_cols_; + int h_factor[jpegli::kMaxComponents]; + int v_factor[jpegli::kMaxComponents]; + + // Initialized at strat of frame. + uint16_t scan_progression_[jpegli::kMaxComponents][DCTSIZE2]; + + // + // Per scan state. + // + size_t scan_mcu_row_; + size_t scan_mcu_col_; + size_t mcu_rows_per_iMCU_row_; + jpegli::coeff_t last_dc_coeff_[jpegli::kMaxComponents]; + int eobrun_; + int restarts_to_go_; + int next_restart_marker_; + + jpegli::MCUCodingState mcu_; + + // + // Rendering state. + // + int output_passes_done_; + JpegliDataType output_data_type_ = JPEGLI_TYPE_UINT8; + bool swap_endianness_ = false; + size_t xoffset_; + bool need_context_rows_; + + int min_scaled_dct_size; + int scaled_dct_size[jpegli::kMaxComponents]; + + size_t raw_height_[jpegli::kMaxComponents]; + jpegli::RowBuffer raw_output_[jpegli::kMaxComponents]; + jpegli::RowBuffer render_output_[jpegli::kMaxComponents]; + + void (*inverse_transform[jpegli::kMaxComponents])( + const int16_t* JXL_RESTRICT qblock, const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, float* JXL_RESTRICT scratch_space, + float* JXL_RESTRICT output, size_t output_stride, size_t dctsize); + + void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len); + + float* idct_scratch_; + float* upsample_scratch_; + uint8_t* output_scratch_; + int16_t* smoothing_scratch_; + float* dequant_; + // 1 = 1pass, 2 = 2pass, 3 = external + int quant_mode_; + int quant_pass_; + int num_colors_[jpegli::kMaxComponents]; + uint8_t* colormap_lut_; + uint8_t* pixels_; + JSAMPARRAY scanlines_; + std::vector> candidate_lists_; + bool regenerate_inverse_colormap_; + float* dither_[jpegli::kMaxComponents]; + float* error_row_[2 * jpegli::kMaxComponents]; + size_t dither_size_; + size_t dither_mask_; + + // Per channel and per frequency statistics about the number of nonzeros and + // the sum of coefficient absolute values, used in dequantization bias + // computation. + int* nonzeros_; + int* sumabs_; + size_t num_processed_blocks_[jpegli::kMaxComponents]; + float* biases_; +#define SAVED_COEFS 10 + // This holds the coef_bits of the scan before the current scan, + // i.e. the bottom half when rendering incomplete scans. + int (*coef_bits_latch)[SAVED_COEFS]; + int (*prev_coef_bits_latch)[SAVED_COEFS]; + bool apply_smoothing; +}; + +#endif // LIB_JPEGLI_DECODE_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_marker.cc b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.cc new file mode 100644 index 0000000000..c5c5790cdf --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.cc @@ -0,0 +1,588 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode_marker.h" + +#include + +#include "lib/jpegli/common.h" +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jxl/base/printf_macros.h" + +namespace jpegli { +namespace { + +constexpr int kMaxDimPixels = 65535; +constexpr uint8_t kIccProfileTag[12] = "ICC_PROFILE"; + +// Macros for commonly used error conditions. + +#define JPEG_VERIFY_LEN(n) \ + if (pos + (n) > len) { \ + return JPEGLI_ERROR("Unexpected end of marker: pos=%" PRIuS \ + " need=%d len=%" PRIuS, \ + pos, static_cast(n), len); \ + } + +#define JPEG_VERIFY_INPUT(var, low, high) \ + if ((var) < (low) || (var) > (high)) { \ + return JPEGLI_ERROR("Invalid " #var ": %d", static_cast(var)); \ + } + +#define JPEG_VERIFY_MARKER_END() \ + if (pos != len) { \ + return JPEGLI_ERROR("Invalid marker length: declared=%" PRIuS \ + " actual=%" PRIuS, \ + len, pos); \ + } + +inline int ReadUint8(const uint8_t* data, size_t* pos) { + return data[(*pos)++]; +} + +inline int ReadUint16(const uint8_t* data, size_t* pos) { + int v = (data[*pos] << 8) + data[*pos + 1]; + *pos += 2; + return v; +} + +void ProcessSOF(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (!m->found_soi_) { + JPEGLI_ERROR("Unexpected SOF marker."); + } + if (m->found_sof_) { + JPEGLI_ERROR("Duplicate SOF marker."); + } + m->found_sof_ = true; + cinfo->progressive_mode = (cinfo->unread_marker == 0xc2); + cinfo->arith_code = 0; + size_t pos = 2; + JPEG_VERIFY_LEN(6); + cinfo->data_precision = ReadUint8(data, &pos); + cinfo->image_height = ReadUint16(data, &pos); + cinfo->image_width = ReadUint16(data, &pos); + cinfo->num_components = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(cinfo->data_precision, kJpegPrecision, kJpegPrecision); + JPEG_VERIFY_INPUT(cinfo->image_height, 1, kMaxDimPixels); + JPEG_VERIFY_INPUT(cinfo->image_width, 1, kMaxDimPixels); + JPEG_VERIFY_INPUT(cinfo->num_components, 1, kMaxComponents); + JPEG_VERIFY_LEN(3 * cinfo->num_components); + cinfo->comp_info = jpegli::Allocate( + cinfo, cinfo->num_components, JPOOL_IMAGE); + + // Read sampling factors and quant table index for each component. + uint8_t ids_seen[256] = {0}; + cinfo->max_h_samp_factor = 1; + cinfo->max_v_samp_factor = 1; + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + comp->component_index = i; + const int id = ReadUint8(data, &pos); + if (ids_seen[id]) { // (cf. section B.2.2, syntax of Ci) + JPEGLI_ERROR("Duplicate ID %d in SOF.", id); + } + ids_seen[id] = 1; + comp->component_id = id; + int factor = ReadUint8(data, &pos); + int h_samp_factor = factor >> 4; + int v_samp_factor = factor & 0xf; + JPEG_VERIFY_INPUT(h_samp_factor, 1, MAX_SAMP_FACTOR); + JPEG_VERIFY_INPUT(v_samp_factor, 1, MAX_SAMP_FACTOR); + comp->h_samp_factor = h_samp_factor; + comp->v_samp_factor = v_samp_factor; + cinfo->max_h_samp_factor = + std::max(cinfo->max_h_samp_factor, h_samp_factor); + cinfo->max_v_samp_factor = + std::max(cinfo->max_v_samp_factor, v_samp_factor); + int quant_tbl_idx = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(quant_tbl_idx, 0, NUM_QUANT_TBLS - 1); + comp->quant_tbl_no = quant_tbl_idx; + if (cinfo->quant_tbl_ptrs[quant_tbl_idx] == nullptr) { + JPEGLI_ERROR("Quantization table with index %u not found", quant_tbl_idx); + } + comp->quant_table = nullptr; // will be allocated after SOS marker + } + JPEG_VERIFY_MARKER_END(); + + // Set the input colorspace based on the markers we have seen and set + // default output colorspace. + if (cinfo->num_components == 1) { + cinfo->jpeg_color_space = JCS_GRAYSCALE; + cinfo->out_color_space = JCS_GRAYSCALE; + } else if (cinfo->num_components == 3) { + if (cinfo->saw_JFIF_marker) { + cinfo->jpeg_color_space = JCS_YCbCr; + } else if (cinfo->saw_Adobe_marker) { + cinfo->jpeg_color_space = + cinfo->Adobe_transform == 0 ? JCS_RGB : JCS_YCbCr; + } else { + cinfo->jpeg_color_space = JCS_YCbCr; + if (cinfo->comp_info[0].component_id == 'R' && // + cinfo->comp_info[1].component_id == 'G' && // + cinfo->comp_info[2].component_id == 'B') { + cinfo->jpeg_color_space = JCS_RGB; + } + } + cinfo->out_color_space = JCS_RGB; + } else if (cinfo->num_components == 4) { + if (cinfo->saw_Adobe_marker) { + cinfo->jpeg_color_space = + cinfo->Adobe_transform == 0 ? JCS_CMYK : JCS_YCCK; + } else { + cinfo->jpeg_color_space = JCS_CMYK; + } + cinfo->out_color_space = JCS_CMYK; + } + + // We have checked above that none of the sampling factors are 0, so the max + // sampling factors can not be 0. + cinfo->total_iMCU_rows = + DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE); + m->iMCU_cols_ = + DivCeil(cinfo->image_width, cinfo->max_h_samp_factor * DCTSIZE); + // Compute the block dimensions for each component. + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 || + cinfo->max_v_samp_factor % comp->v_samp_factor != 0) { + JPEGLI_ERROR("Non-integral subsampling ratios."); + } + m->h_factor[i] = cinfo->max_h_samp_factor / comp->h_samp_factor; + m->v_factor[i] = cinfo->max_v_samp_factor / comp->v_samp_factor; + comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[i]); + comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[i]); + comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE); + comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE); + } + memset(m->scan_progression_, 0, sizeof(m->scan_progression_)); +} + +void ProcessSOS(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (!m->found_sof_) { + JPEGLI_ERROR("Unexpected SOS marker."); + } + size_t pos = 2; + JPEG_VERIFY_LEN(1); + cinfo->comps_in_scan = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, cinfo->num_components); + JPEG_VERIFY_INPUT(cinfo->comps_in_scan, 1, MAX_COMPS_IN_SCAN); + + JPEG_VERIFY_LEN(2 * cinfo->comps_in_scan); + bool is_interleaved = (cinfo->comps_in_scan > 1); + uint8_t ids_seen[256] = {0}; + cinfo->blocks_in_MCU = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + int id = ReadUint8(data, &pos); + if (ids_seen[id]) { // (cf. section B.2.3, regarding CSj) + return JPEGLI_ERROR("Duplicate ID %d in SOS.", id); + } + ids_seen[id] = 1; + jpeg_component_info* comp = nullptr; + for (int j = 0; j < cinfo->num_components; ++j) { + if (cinfo->comp_info[j].component_id == id) { + comp = &cinfo->comp_info[j]; + cinfo->cur_comp_info[i] = comp; + } + } + if (!comp) { + return JPEGLI_ERROR("SOS marker: Could not find component with id %d", + id); + } + int c = ReadUint8(data, &pos); + comp->dc_tbl_no = c >> 4; + comp->ac_tbl_no = c & 0xf; + JPEG_VERIFY_INPUT(comp->dc_tbl_no, 0, 3); + JPEG_VERIFY_INPUT(comp->ac_tbl_no, 0, 3); + comp->MCU_width = is_interleaved ? comp->h_samp_factor : 1; + comp->MCU_height = is_interleaved ? comp->v_samp_factor : 1; + comp->MCU_blocks = comp->MCU_width * comp->MCU_height; + if (cinfo->blocks_in_MCU + comp->MCU_blocks > D_MAX_BLOCKS_IN_MCU) { + JPEGLI_ERROR("Too many blocks in MCU."); + } + for (int j = 0; j < comp->MCU_blocks; ++j) { + cinfo->MCU_membership[cinfo->blocks_in_MCU++] = i; + } + } + JPEG_VERIFY_LEN(3); + cinfo->Ss = ReadUint8(data, &pos); + cinfo->Se = ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(cinfo->Ss, 0, 63); + JPEG_VERIFY_INPUT(cinfo->Se, cinfo->Ss, 63); + int c = ReadUint8(data, &pos); + cinfo->Ah = c >> 4; + cinfo->Al = c & 0xf; + JPEG_VERIFY_MARKER_END(); + + if (cinfo->input_scan_number == 0) { + m->is_multiscan_ = (cinfo->comps_in_scan < cinfo->num_components || + cinfo->progressive_mode); + } + if (cinfo->Ah != 0 && cinfo->Al != cinfo->Ah - 1) { + // section G.1.1.1.2 : Successive approximation control only improves + // by one bit at a time. + JPEGLI_ERROR("Invalid progressive parameters: Al=%d Ah=%d", cinfo->Al, + cinfo->Ah); + } + if (!cinfo->progressive_mode) { + cinfo->Ss = 0; + cinfo->Se = 63; + cinfo->Ah = 0; + cinfo->Al = 0; + } + const uint16_t scan_bitmask = + cinfo->Ah == 0 ? (0xffff << cinfo->Al) : (1u << cinfo->Al); + const uint16_t refinement_bitmask = (1 << cinfo->Al) - 1; + if (!cinfo->coef_bits) { + cinfo->coef_bits = + Allocate(cinfo, cinfo->num_components * 2, JPOOL_IMAGE); + m->coef_bits_latch = + Allocate(cinfo, cinfo->num_components, JPOOL_IMAGE); + m->prev_coef_bits_latch = + Allocate(cinfo, cinfo->num_components, JPOOL_IMAGE); + + for (int c = 0; c < cinfo->num_components; ++c) { + for (int i = 0; i < DCTSIZE2; ++i) { + cinfo->coef_bits[c][i] = -1; + if (i < SAVED_COEFS) { + m->coef_bits_latch[c][i] = -1; + } + } + } + } + + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + int comp_idx = cinfo->cur_comp_info[i]->component_index; + for (int k = cinfo->Ss; k <= cinfo->Se; ++k) { + if (m->scan_progression_[comp_idx][k] & scan_bitmask) { + return JPEGLI_ERROR( + "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u", + comp_idx, k, m->scan_progression_[i][k], scan_bitmask); + } + if (m->scan_progression_[comp_idx][k] & refinement_bitmask) { + return JPEGLI_ERROR( + "Invalid scan order, a more refined scan was already done: " + "component=%d k=%d prev_mask=%u cur_mask=%u", + comp_idx, k, m->scan_progression_[i][k], scan_bitmask); + } + m->scan_progression_[comp_idx][k] |= scan_bitmask; + } + } + if (cinfo->Al > 10) { + return JPEGLI_ERROR("Scan parameter Al=%d is not supported.", cinfo->Al); + } +} + +// Reads the Define Huffman Table (DHT) marker segment and builds the Huffman +// decoding table in either dc_huff_lut_ or ac_huff_lut_, depending on the type +// and solt_id of Huffman code being read. +void ProcessDHT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + size_t pos = 2; + if (pos == len) { + return JPEGLI_ERROR("DHT marker: no Huffman table found"); + } + while (pos < len) { + JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength); + // The index of the Huffman code in the current set of Huffman codes. For AC + // component Huffman codes, 0x10 is added to the index. + int slot_id = ReadUint8(data, &pos); + int huffman_index = slot_id; + int is_ac_table = (slot_id & 0x10) != 0; + JHUFF_TBL** table; + if (is_ac_table) { + huffman_index -= 0x10; + JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1); + table = &cinfo->ac_huff_tbl_ptrs[huffman_index]; + } else { + JPEG_VERIFY_INPUT(huffman_index, 0, NUM_HUFF_TBLS - 1); + table = &cinfo->dc_huff_tbl_ptrs[huffman_index]; + } + if (*table == nullptr) { + *table = jpegli_alloc_huff_table(reinterpret_cast(cinfo)); + } + int total_count = 0; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + int count = ReadUint8(data, &pos); + (*table)->bits[i] = count; + total_count += count; + } + if (is_ac_table) { + JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize); + } else { + // Allow symbols up to 15 here, we check later whether any invalid symbols + // are actually decoded. + // TODO(szabadka) Make sure decoder works (does not crash) with up to + // 15-nbits DC symbols and then increase kJpegDCAlphabetSize. + JPEG_VERIFY_INPUT(total_count, 0, 16); + } + JPEG_VERIFY_LEN(total_count); + for (int i = 0; i < total_count; ++i) { + int value = ReadUint8(data, &pos); + if (!is_ac_table) { + JPEG_VERIFY_INPUT(value, 0, 15); + } + (*table)->huffval[i] = value; + } + for (int i = total_count; i < kJpegHuffmanAlphabetSize; ++i) { + (*table)->huffval[i] = 0; + } + } + JPEG_VERIFY_MARKER_END(); +} + +void ProcessDQT(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (m->found_sof_) { + JPEGLI_ERROR("Updating quant tables between scans is not supported."); + } + size_t pos = 2; + if (pos == len) { + return JPEGLI_ERROR("DQT marker: no quantization table found"); + } + while (pos < len) { + JPEG_VERIFY_LEN(1); + int quant_table_index = ReadUint8(data, &pos); + int precision = quant_table_index >> 4; + JPEG_VERIFY_INPUT(precision, 0, 1); + quant_table_index &= 0xf; + JPEG_VERIFY_INPUT(quant_table_index, 0, NUM_QUANT_TBLS - 1); + JPEG_VERIFY_LEN((precision + 1) * DCTSIZE2); + + if (cinfo->quant_tbl_ptrs[quant_table_index] == nullptr) { + cinfo->quant_tbl_ptrs[quant_table_index] = + jpegli_alloc_quant_table(reinterpret_cast(cinfo)); + } + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_table_index]; + + for (size_t i = 0; i < DCTSIZE2; ++i) { + int quant_val = + precision ? ReadUint16(data, &pos) : ReadUint8(data, &pos); + JPEG_VERIFY_INPUT(quant_val, 1, 65535); + quant_table->quantval[kJPEGNaturalOrder[i]] = quant_val; + } + } + JPEG_VERIFY_MARKER_END(); +} + +void ProcessDNL(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + // Ignore marker. +} + +void ProcessDRI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (m->found_dri_) { + return JPEGLI_ERROR("Duplicate DRI marker."); + } + m->found_dri_ = true; + size_t pos = 2; + JPEG_VERIFY_LEN(2); + cinfo->restart_interval = ReadUint16(data, &pos); + JPEG_VERIFY_MARKER_END(); +} + +void ProcessAPP(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + const uint8_t marker = cinfo->unread_marker; + const uint8_t* payload = data + 2; + size_t payload_size = len - 2; + if (marker == 0xE0) { + if (payload_size >= 14 && memcmp(payload, "JFIF", 4) == 0) { + cinfo->saw_JFIF_marker = TRUE; + cinfo->JFIF_major_version = payload[5]; + cinfo->JFIF_minor_version = payload[6]; + cinfo->density_unit = payload[7]; + cinfo->X_density = (payload[8] << 8) + payload[9]; + cinfo->Y_density = (payload[10] << 8) + payload[11]; + } + } else if (marker == 0xEE) { + if (payload_size >= 12 && memcmp(payload, "Adobe", 5) == 0) { + cinfo->saw_Adobe_marker = TRUE; + cinfo->Adobe_transform = payload[11]; + } + } else if (marker == 0xE2) { + if (payload_size >= sizeof(kIccProfileTag) && + memcmp(payload, kIccProfileTag, sizeof(kIccProfileTag)) == 0) { + payload += sizeof(kIccProfileTag); + payload_size -= sizeof(kIccProfileTag); + if (payload_size < 2) { + return JPEGLI_ERROR("ICC chunk is too small."); + } + uint8_t index = payload[0]; + uint8_t total = payload[1]; + ++m->icc_index_; + if (m->icc_index_ != index) { + return JPEGLI_ERROR("Invalid ICC chunk order."); + } + if (total == 0) { + return JPEGLI_ERROR("Invalid ICC chunk total."); + } + if (m->icc_total_ == 0) { + m->icc_total_ = total; + } else if (m->icc_total_ != total) { + return JPEGLI_ERROR("Invalid ICC chunk total."); + } + if (m->icc_index_ > m->icc_total_) { + return JPEGLI_ERROR("Invalid ICC chunk index."); + } + m->icc_profile_.insert(m->icc_profile_.end(), payload + 2, + payload + payload_size); + } + } +} + +void ProcessCOM(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + // Ignore marker. +} + +void ProcessSOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + jpeg_decomp_master* m = cinfo->master; + if (m->found_soi_) { + JPEGLI_ERROR("Duplicate SOI marker"); + } + m->found_soi_ = true; +} + +void ProcessEOI(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + cinfo->master->found_eoi_ = true; +} + +void SaveMarker(j_decompress_ptr cinfo, const uint8_t* data, size_t len) { + const uint8_t marker = cinfo->unread_marker; + const uint8_t* payload = data + 2; + size_t payload_size = len - 2; + + // Insert new saved marker to the head of the list. + jpeg_saved_marker_ptr next = cinfo->marker_list; + cinfo->marker_list = + jpegli::Allocate(cinfo, 1, JPOOL_IMAGE); + cinfo->marker_list->next = next; + cinfo->marker_list->marker = marker; + cinfo->marker_list->original_length = payload_size; + cinfo->marker_list->data_length = payload_size; + cinfo->marker_list->data = + jpegli::Allocate(cinfo, payload_size, JPOOL_IMAGE); + memcpy(cinfo->marker_list->data, payload, payload_size); +} + +uint8_t ProcessNextMarker(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos) { + jpeg_decomp_master* m = cinfo->master; + size_t num_skipped = 0; + uint8_t marker = cinfo->unread_marker; + if (marker == 0) { + // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker. + static const uint8_t kIsValidMarker[] = { + 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + }; + // Skip bytes between markers. + while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] < 0xc0 || + !kIsValidMarker[data[*pos + 1] - 0xc0])) { + ++(*pos); + ++num_skipped; + } + if (*pos + 2 > len) { + return kNeedMoreInput; + } + marker = data[*pos + 1]; + if (num_skipped > 0) { + if (m->found_soi_) { + JPEGLI_WARN("Skipped %d bytes before marker 0x%02x", (int)num_skipped, + marker); + } else { + JPEGLI_ERROR("Did not find SOI marker."); + } + } + *pos += 2; + cinfo->unread_marker = marker; + } + if (!m->found_soi_ && marker != 0xd8) { + JPEGLI_ERROR("Did not find SOI marker."); + } + if (GetMarkerProcessor(cinfo)) { + return kHandleMarkerProcessor; + } + const uint8_t* marker_data = &data[*pos]; + size_t marker_len = 0; + if (marker != 0xd8 && marker != 0xd9) { + if (*pos + 2 > len) { + return kNeedMoreInput; + } + marker_len += (data[*pos] << 8) + data[*pos + 1]; + if (marker_len < 2) { + JPEGLI_ERROR("Invalid marker length"); + } + if (*pos + marker_len > len) { + // TODO(szabadka) Limit our memory usage by using the skip_input_data + // source manager callback on APP markers that are not saved. + return kNeedMoreInput; + } + if (marker >= 0xe0 && m->markers_to_save_[marker - 0xe0]) { + SaveMarker(cinfo, marker_data, marker_len); + } + } + if (marker == 0xc0 || marker == 0xc1 || marker == 0xc2) { + ProcessSOF(cinfo, marker_data, marker_len); + } else if (marker == 0xc4) { + ProcessDHT(cinfo, marker_data, marker_len); + } else if (marker == 0xda) { + ProcessSOS(cinfo, marker_data, marker_len); + } else if (marker == 0xdb) { + ProcessDQT(cinfo, marker_data, marker_len); + } else if (marker == 0xdc) { + ProcessDNL(cinfo, marker_data, marker_len); + } else if (marker == 0xdd) { + ProcessDRI(cinfo, marker_data, marker_len); + } else if (marker >= 0xe0 && marker <= 0xef) { + ProcessAPP(cinfo, marker_data, marker_len); + } else if (marker == 0xfe) { + ProcessCOM(cinfo, marker_data, marker_len); + } else if (marker == 0xd8) { + ProcessSOI(cinfo, marker_data, marker_len); + } else if (marker == 0xd9) { + ProcessEOI(cinfo, marker_data, marker_len); + } else { + JPEGLI_ERROR("Unexpected marker 0x%x", marker); + } + *pos += marker_len; + cinfo->unread_marker = 0; + if (marker == 0xda) { + return JPEG_REACHED_SOS; + } else if (marker == 0xd9) { + return JPEG_REACHED_EOI; + } + return kProcessNextMarker; +} + +} // namespace + +jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + uint8_t marker = cinfo->unread_marker; + jpeg_marker_parser_method callback = nullptr; + if (marker >= 0xe0 && marker <= 0xef) { + callback = m->app_marker_parsers[marker - 0xe0]; + } else if (marker == 0xfe) { + callback = m->com_marker_parser; + } + return callback; +} + +int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos) { + for (;;) { + int status = ProcessNextMarker(cinfo, data, len, pos); + if (status != kProcessNextMarker) { + return status; + } + } +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_marker.h b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.h new file mode 100644 index 0000000000..fb24b3ee87 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode_marker.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DECODE_MARKER_H_ +#define LIB_JPEGLI_DECODE_MARKER_H_ + +#include + +#include "lib/jpegli/common.h" + +namespace jpegli { + +// Reads the available input in the source manager's input buffer until either +// the end of the next SOS marker or the end of the input. +// The corresponding fields of cinfo are updated with the processed input data. +// Upon return, the input buffer will be at the start or at the end of a marker +// data segment (inter-marker data is allowed). +// Return value is one of: +// * JPEG_SUSPENDED, if the current input buffer ends before the next SOS or +// EOI marker. Input buffer refill is handled by the caller; +// * JPEG_REACHED_SOS, if the next SOS marker is found; +// * JPEG_REACHED_EOR, if the end of the input is found. +int ProcessMarkers(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos); + +jpeg_marker_parser_method GetMarkerProcessor(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_DECODE_MARKER_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_scan.cc b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.cc new file mode 100644 index 0000000000..29c0172950 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.cc @@ -0,0 +1,566 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode_scan.h" + +#include + +#include + +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { +namespace { + +// Max 14 block per MCU (when 1 channel is subsampled) +// Max 64 nonzero coefficients per block +// Max 16 symbol bits plus 11 extra bits per nonzero symbol +// Max 2 bytes per 8 bits (worst case is all bytes are escaped 0xff) +constexpr int kMaxMCUByteSize = 6048; + +// Helper structure to read bits from the entropy coded data segment. +struct BitReaderState { + BitReaderState(const uint8_t* data, const size_t len, size_t pos) + : data_(data), len_(len), start_pos_(pos) { + Reset(pos); + } + + void Reset(size_t pos) { + pos_ = pos; + val_ = 0; + bits_left_ = 0; + next_marker_pos_ = len_; + FillBitWindow(); + } + + // Returns the next byte and skips the 0xff/0x00 escape sequences. + uint8_t GetNextByte() { + if (pos_ >= next_marker_pos_) { + ++pos_; + return 0; + } + uint8_t c = data_[pos_++]; + if (c == 0xff) { + uint8_t escape = pos_ < len_ ? data_[pos_] : 0; + if (escape == 0) { + ++pos_; + } else { + // 0xff was followed by a non-zero byte, which means that we found the + // start of the next marker segment. + next_marker_pos_ = pos_ - 1; + } + } + return c; + } + + void FillBitWindow() { + if (bits_left_ <= 16) { + while (bits_left_ <= 56) { + val_ <<= 8; + val_ |= (uint64_t)GetNextByte(); + bits_left_ += 8; + } + } + } + + int ReadBits(int nbits) { + FillBitWindow(); + uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1); + bits_left_ -= nbits; + return val; + } + + // Sets *pos to the next stream position, and *bit_pos to the bit position + // within the next byte where parsing should continue. + // Returns false if the stream ended too early. + bool FinishStream(size_t* pos, size_t* bit_pos) { + *bit_pos = (8 - (bits_left_ & 7)) & 7; + // Give back some bytes that we did not use. + int unused_bytes_left = DivCeil(bits_left_, 8); + while (unused_bytes_left-- > 0) { + --pos_; + // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape + // sequence, and if yes, we need to give back one more byte. + if (((pos_ == len_ && pos_ == next_marker_pos_) || + (pos_ > 0 && pos_ < next_marker_pos_ && data_[pos_] == 0)) && + (data_[pos_ - 1] == 0xff)) { + --pos_; + } + } + if (pos_ >= next_marker_pos_) { + *pos = next_marker_pos_; + if (pos_ > next_marker_pos_ || *bit_pos > 0) { + // Data ran out before the scan was complete. + return false; + } + } + *pos = pos_; + return true; + } + + const uint8_t* data_; + const size_t len_; + size_t pos_; + uint64_t val_; + int bits_left_; + size_t next_marker_pos_; + size_t start_pos_; +}; + +// Returns the next Huffman-coded symbol. +int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) { + int nbits; + br->FillBitWindow(); + int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff; + table += val; + nbits = table->bits - 8; + if (nbits > 0) { + br->bits_left_ -= 8; + table += table->value; + val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1); + table += val; + } + br->bits_left_ -= table->bits; + return table->value; +} + +/** + * Returns the DC diff or AC value for extra bits value x and prefix code s. + * + * CCITT Rec. T.81 (1992 E) + * Table F.1 – Difference magnitude categories for DC coding + * SSSS | DIFF values + * ------+-------------------------- + * 0 | 0 + * 1 | –1, 1 + * 2 | –3, –2, 2, 3 + * 3 | –7..–4, 4..7 + * ......|.......................... + * 11 | –2047..–1024, 1024..2047 + * + * CCITT Rec. T.81 (1992 E) + * Table F.2 – Categories assigned to coefficient values + * [ Same as Table F.1, but does not include SSSS equal to 0 and 11] + * + * + * CCITT Rec. T.81 (1992 E) + * F.1.2.1.1 Structure of DC code table + * For each category,... additional bits... appended... to uniquely identify + * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF + * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are + * appended... Most significant bit... is 0 for negative differences and 1 for + * positive differences. + * + * In other words the upper half of extra bits range represents DIFF as is. + * The lower half represents the negative DIFFs with an offset. + */ +int HuffExtend(int x, int s) { + JXL_DASSERT(s >= 1); + int half = 1 << (s - 1); + if (x >= half) { + JXL_DASSERT(x < (1 << s)); + return x; + } else { + return x - (1 << s) + 1; + } +} + +// Decodes one 8x8 block of DCT coefficients from the bit stream. +bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff, + const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, + int* eobrun, BitReaderState* br, coeff_t* last_dc_coeff, + coeff_t* coeffs) { + // Nowadays multiplication is even faster than variable shift. + int Am = 1 << Al; + bool eobrun_allowed = Ss > 0; + if (Ss == 0) { + int s = ReadSymbol(dc_huff, br); + if (s >= kJpegDCAlphabetSize) { + return false; + } + int diff = 0; + if (s > 0) { + int bits = br->ReadBits(s); + diff = HuffExtend(bits, s); + } + int coeff = diff + *last_dc_coeff; + const int dc_coeff = coeff * Am; + coeffs[0] = dc_coeff; + // TODO(eustas): is there a more elegant / explicit way to check this? + if (dc_coeff != coeffs[0]) { + return false; + } + *last_dc_coeff = coeff; + ++Ss; + } + if (Ss > Se) { + return true; + } + if (*eobrun > 0) { + --(*eobrun); + return true; + } + for (int k = Ss; k <= Se; k++) { + int sr = ReadSymbol(ac_huff, br); + if (sr >= kJpegHuffmanAlphabetSize) { + return false; + } + int r = sr >> 4; + int s = sr & 15; + if (s > 0) { + k += r; + if (k > Se) { + return false; + } + if (s + Al >= kJpegDCAlphabetSize) { + return false; + } + int bits = br->ReadBits(s); + int coeff = HuffExtend(bits, s); + coeffs[kJPEGNaturalOrder[k]] = coeff * Am; + } else if (r == 15) { + k += 15; + } else { + *eobrun = 1 << r; + if (r > 0) { + if (!eobrun_allowed) { + return false; + } + *eobrun += br->ReadBits(r); + } + break; + } + } + --(*eobrun); + return true; +} + +bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, + int* eobrun, BitReaderState* br, coeff_t* coeffs) { + // Nowadays multiplication is even faster than variable shift. + int Am = 1 << Al; + bool eobrun_allowed = Ss > 0; + if (Ss == 0) { + int s = br->ReadBits(1); + coeff_t dc_coeff = coeffs[0]; + dc_coeff |= s * Am; + coeffs[0] = dc_coeff; + ++Ss; + } + if (Ss > Se) { + return true; + } + int p1 = Am; + int m1 = -Am; + int k = Ss; + int r; + int s; + bool in_zero_run = false; + if (*eobrun <= 0) { + for (; k <= Se; k++) { + s = ReadSymbol(ac_huff, br); + if (s >= kJpegHuffmanAlphabetSize) { + return false; + } + r = s >> 4; + s &= 15; + if (s) { + if (s != 1) { + return false; + } + s = br->ReadBits(1) ? p1 : m1; + in_zero_run = false; + } else { + if (r != 15) { + *eobrun = 1 << r; + if (r > 0) { + if (!eobrun_allowed) { + return false; + } + *eobrun += br->ReadBits(r); + } + break; + } + in_zero_run = true; + } + do { + coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; + if (thiscoef != 0) { + if (br->ReadBits(1)) { + if ((thiscoef & p1) == 0) { + if (thiscoef >= 0) { + thiscoef += p1; + } else { + thiscoef += m1; + } + } + } + coeffs[kJPEGNaturalOrder[k]] = thiscoef; + } else { + if (--r < 0) { + break; + } + } + k++; + } while (k <= Se); + if (s) { + if (k > Se) { + return false; + } + coeffs[kJPEGNaturalOrder[k]] = s; + } + } + } + if (in_zero_run) { + return false; + } + if (*eobrun > 0) { + for (; k <= Se; k++) { + coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; + if (thiscoef != 0) { + if (br->ReadBits(1)) { + if ((thiscoef & p1) == 0) { + if (thiscoef >= 0) { + thiscoef += p1; + } else { + thiscoef += m1; + } + } + } + coeffs[kJPEGNaturalOrder[k]] = thiscoef; + } + } + } + --(*eobrun); + return true; +} + +void SaveMCUCodingState(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + memcpy(m->mcu_.last_dc_coeff, m->last_dc_coeff_, sizeof(m->last_dc_coeff_)); + m->mcu_.eobrun = m->eobrun_; + size_t offset = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + size_t block_x = m->scan_mcu_col_ * comp->MCU_width; + for (int iy = 0; iy < comp->MCU_height; ++iy) { + size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; + size_t biy = block_y % comp->v_samp_factor; + if (block_y >= comp->height_in_blocks) { + continue; + } + size_t nblocks = + std::min(comp->MCU_width, comp->width_in_blocks - block_x); + size_t ncoeffs = nblocks * DCTSIZE2; + coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0]; + memcpy(&m->mcu_.coeffs[offset], coeffs, ncoeffs * sizeof(coeffs[0])); + offset += ncoeffs; + } + } +} + +void RestoreMCUCodingState(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + memcpy(m->last_dc_coeff_, m->mcu_.last_dc_coeff, sizeof(m->last_dc_coeff_)); + m->eobrun_ = m->mcu_.eobrun; + size_t offset = 0; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + size_t block_x = m->scan_mcu_col_ * comp->MCU_width; + for (int iy = 0; iy < comp->MCU_height; ++iy) { + size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; + size_t biy = block_y % comp->v_samp_factor; + if (block_y >= comp->height_in_blocks) { + continue; + } + size_t nblocks = + std::min(comp->MCU_width, comp->width_in_blocks - block_x); + size_t ncoeffs = nblocks * DCTSIZE2; + coeff_t* coeffs = &m->coeff_rows[c][biy][block_x][0]; + memcpy(coeffs, &m->mcu_.coeffs[offset], ncoeffs * sizeof(coeffs[0])); + offset += ncoeffs; + } + } +} + +bool FinishScan(j_decompress_ptr cinfo, const uint8_t* data, const size_t len, + size_t* pos, size_t* bit_pos) { + jpeg_decomp_master* m = cinfo->master; + if (m->eobrun_ > 0) { + JPEGLI_ERROR("End-of-block run too long."); + } + m->eobrun_ = -1; + memset(m->last_dc_coeff_, 0, sizeof(m->last_dc_coeff_)); + if (*bit_pos == 0) { + return true; + } + if (data[*pos] == 0xff) { + // After last br.FinishStream we checked that there is at least 2 bytes + // in the buffer. + JXL_DASSERT(*pos + 1 < len); + // br.FinishStream would have detected an early marker. + JXL_DASSERT(data[*pos + 1] == 0); + *pos += 2; + } else { + *pos += 1; + } + *bit_pos = 0; + return true; +} + +} // namespace + +void PrepareForiMCURow(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + int by0 = cinfo->input_iMCU_row * comp->v_samp_factor; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); + int offset = m->streaming_mode_ ? 0 : by0; + m->coeff_rows[c] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coef_arrays[c], offset, + max_block_rows, true); + } +} + +int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos, size_t* bit_pos) { + if (len == 0) { + return kNeedMoreInput; + } + jpeg_decomp_master* m = cinfo->master; + for (;;) { + // Handle the restart intervals. + if (cinfo->restart_interval > 0 && m->restarts_to_go_ == 0) { + if (!FinishScan(cinfo, data, len, pos, bit_pos)) { + return kNeedMoreInput; + } + // Go to the next marker, warn if we had to skip any data. + size_t num_skipped = 0; + while (*pos + 1 < len && (data[*pos] != 0xff || data[*pos + 1] == 0 || + data[*pos + 1] == 0xff)) { + ++(*pos); + ++num_skipped; + } + if (num_skipped > 0) { + JPEGLI_WARN("Skipped %d bytes before restart marker", (int)num_skipped); + } + if (*pos + 2 > len) { + return kNeedMoreInput; + } + cinfo->unread_marker = data[*pos + 1]; + *pos += 2; + return kHandleRestart; + } + + size_t start_pos = *pos; + BitReaderState br(data, len, start_pos); + if (*bit_pos > 0) { + br.ReadBits(*bit_pos); + } + if (start_pos + kMaxMCUByteSize > len) { + SaveMCUCodingState(cinfo); + } + + // Decode one MCU. + HWY_ALIGN_MAX coeff_t dummy_block[DCTSIZE2]; + bool scan_ok = true; + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + const jpeg_component_info* comp = cinfo->cur_comp_info[i]; + int c = comp->component_index; + const HuffmanTableEntry* dc_lut = + &m->dc_huff_lut_[comp->dc_tbl_no * kJpegHuffmanLutSize]; + const HuffmanTableEntry* ac_lut = + &m->ac_huff_lut_[comp->ac_tbl_no * kJpegHuffmanLutSize]; + for (int iy = 0; iy < comp->MCU_height; ++iy) { + size_t block_y = m->scan_mcu_row_ * comp->MCU_height + iy; + int biy = block_y % comp->v_samp_factor; + for (int ix = 0; ix < comp->MCU_width; ++ix) { + size_t block_x = m->scan_mcu_col_ * comp->MCU_width + ix; + coeff_t* coeffs; + if (block_x >= comp->width_in_blocks || + block_y >= comp->height_in_blocks) { + // Note that it is OK that dummy_block is uninitialized because + // it will never be used in any branches, even in the RefineDCTBlock + // case, because only DC scans can be interleaved and we don't use + // the zero-ness of the DC coeff in the DC refinement code-path. + coeffs = dummy_block; + } else { + coeffs = &m->coeff_rows[c][biy][block_x][0]; + } + if (cinfo->Ah == 0) { + if (!DecodeDCTBlock(dc_lut, ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al, + &m->eobrun_, &br, + &m->last_dc_coeff_[comp->component_index], + coeffs)) { + scan_ok = false; + } + } else { + if (!RefineDCTBlock(ac_lut, cinfo->Ss, cinfo->Se, cinfo->Al, + &m->eobrun_, &br, coeffs)) { + scan_ok = false; + } + } + } + } + } + size_t new_pos; + size_t new_bit_pos; + bool stream_ok = br.FinishStream(&new_pos, &new_bit_pos); + if (new_pos + 2 > len) { + // If reading stopped within the last two bytes, we have to request more + // input even if FinishStream() returned true, since the Huffman code + // reader could have peaked ahead some bits past the current input chunk + // and thus the last prefix code length could have been wrong. We can do + // this because a valid JPEG bit stream has two extra bytes at the end. + RestoreMCUCodingState(cinfo); + return kNeedMoreInput; + } + *pos = new_pos; + *bit_pos = new_bit_pos; + if (!stream_ok) { + // We hit a marker during parsing. + JXL_DASSERT(data[*pos] == 0xff); + JXL_DASSERT(data[*pos + 1] != 0); + RestoreMCUCodingState(cinfo); + JPEGLI_WARN("Incomplete scan detected."); + return JPEG_SCAN_COMPLETED; + } + if (!scan_ok) { + JPEGLI_ERROR("Failed to decode DCT block"); + } + if (m->restarts_to_go_ > 0) { + --m->restarts_to_go_; + } + ++m->scan_mcu_col_; + if (m->scan_mcu_col_ == cinfo->MCUs_per_row) { + ++m->scan_mcu_row_; + m->scan_mcu_col_ = 0; + if (m->scan_mcu_row_ == cinfo->MCU_rows_in_scan) { + if (!FinishScan(cinfo, data, len, pos, bit_pos)) { + return kNeedMoreInput; + } + break; + } else if ((m->scan_mcu_row_ % m->mcu_rows_per_iMCU_row_) == 0) { + // Current iMCU row is done. + break; + } + } + } + ++cinfo->input_iMCU_row; + if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows) { + PrepareForiMCURow(cinfo); + return JPEG_ROW_COMPLETED; + } + return JPEG_SCAN_COMPLETED; +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/decode_scan.h b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.h new file mode 100644 index 0000000000..1d7b18fc1a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/decode_scan.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DECODE_SCAN_H_ +#define LIB_JPEGLI_DECODE_SCAN_H_ + +#include + +#include "lib/jpegli/common.h" + +namespace jpegli { + +// Reads the available input in the source manager's input buffer until the end +// of the next iMCU row. +// The corresponding fields of cinfo are updated with the processed input data. +// Upon return, the input buffer will be at the start of an MCU, or at the end +// of the scan. +// Return value is one of: +// * JPEG_SUSPENDED, if the input buffer ends before the end of an iMCU row; +// * JPEG_ROW_COMPLETED, if the next iMCU row (but not the scan) is reached; +// * JPEG_SCAN_COMPLETED, if the end of the scan is reached. +int ProcessScan(j_decompress_ptr cinfo, const uint8_t* const data, + const size_t len, size_t* pos, size_t* bit_pos); + +void PrepareForiMCURow(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_DECODE_SCAN_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/destination_manager.cc b/third-party/libjxl/libjxl/lib/jpegli/destination_manager.cc new file mode 100644 index 0000000000..9bc269f0c9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/destination_manager.cc @@ -0,0 +1,148 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" + +namespace jpegli { + +constexpr size_t kDestBufferSize = 64 << 10; + +struct StdioDestinationManager { + jpeg_destination_mgr pub; + FILE* f; + uint8_t* buffer; + + static void init_destination(j_compress_ptr cinfo) { + auto dest = reinterpret_cast(cinfo->dest); + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = kDestBufferSize; + } + + static boolean empty_output_buffer(j_compress_ptr cinfo) { + auto dest = reinterpret_cast(cinfo->dest); + if (fwrite(dest->buffer, 1, kDestBufferSize, dest->f) != kDestBufferSize) { + JPEGLI_ERROR("Failed to write to output stream."); + } + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = kDestBufferSize; + return TRUE; + } + + static void term_destination(j_compress_ptr cinfo) { + auto dest = reinterpret_cast(cinfo->dest); + size_t bytes_left = kDestBufferSize - dest->pub.free_in_buffer; + if (bytes_left && + fwrite(dest->buffer, 1, bytes_left, dest->f) != bytes_left) { + JPEGLI_ERROR("Failed to write to output stream."); + } + fflush(dest->f); + if (ferror(dest->f)) { + JPEGLI_ERROR("Failed to write to output stream."); + } + } +}; + +struct MemoryDestinationManager { + jpeg_destination_mgr pub; + // Output buffer supplied by the application + uint8_t** output; + unsigned long* output_size; + // Output buffer allocated by us. + uint8_t* temp_buffer; + // Current output buffer (either application supplied or allocated by us). + uint8_t* current_buffer; + size_t buffer_size; + + static void init_destination(j_compress_ptr cinfo) {} + + static boolean empty_output_buffer(j_compress_ptr cinfo) { + auto dest = reinterpret_cast(cinfo->dest); + uint8_t* next_buffer = + reinterpret_cast(malloc(dest->buffer_size * 2)); + memcpy(next_buffer, dest->current_buffer, dest->buffer_size); + if (dest->temp_buffer != nullptr) { + free(dest->temp_buffer); + } + dest->temp_buffer = next_buffer; + dest->current_buffer = next_buffer; + *dest->output = next_buffer; + *dest->output_size = dest->buffer_size; + dest->pub.next_output_byte = next_buffer + dest->buffer_size; + dest->pub.free_in_buffer = dest->buffer_size; + dest->buffer_size *= 2; + return TRUE; + } + + static void term_destination(j_compress_ptr cinfo) { + auto dest = reinterpret_cast(cinfo->dest); + *dest->output_size = dest->buffer_size - dest->pub.free_in_buffer; + } +}; + +} // namespace jpegli + +void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile) { + if (outfile == nullptr) { + JPEGLI_ERROR("jpegli_stdio_dest: Invalid destination."); + } + if (cinfo->dest && cinfo->dest->init_destination != + jpegli::StdioDestinationManager::init_destination) { + JPEGLI_ERROR("jpegli_stdio_dest: a different dest manager was already set"); + } + if (!cinfo->dest) { + cinfo->dest = reinterpret_cast( + jpegli::Allocate(cinfo, 1)); + } + auto dest = reinterpret_cast(cinfo->dest); + dest->f = outfile; + dest->buffer = jpegli::Allocate(cinfo, jpegli::kDestBufferSize); + dest->pub.next_output_byte = dest->buffer; + dest->pub.free_in_buffer = jpegli::kDestBufferSize; + dest->pub.init_destination = + jpegli::StdioDestinationManager::init_destination; + dest->pub.empty_output_buffer = + jpegli::StdioDestinationManager::empty_output_buffer; + dest->pub.term_destination = + jpegli::StdioDestinationManager::term_destination; +} + +void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer, + unsigned long* outsize) { + if (outbuffer == nullptr || outsize == nullptr) { + JPEGLI_ERROR("jpegli_mem_dest: Invalid destination."); + } + if (cinfo->dest && cinfo->dest->init_destination != + jpegli::MemoryDestinationManager::init_destination) { + JPEGLI_ERROR("jpegli_mem_dest: a different dest manager was already set"); + } + if (!cinfo->dest) { + auto dest = jpegli::Allocate(cinfo, 1); + dest->temp_buffer = nullptr; + cinfo->dest = reinterpret_cast(dest); + } + auto dest = reinterpret_cast(cinfo->dest); + dest->pub.init_destination = + jpegli::MemoryDestinationManager::init_destination; + dest->pub.empty_output_buffer = + jpegli::MemoryDestinationManager::empty_output_buffer; + dest->pub.term_destination = + jpegli::MemoryDestinationManager::term_destination; + dest->output = outbuffer; + dest->output_size = outsize; + if (*outbuffer == nullptr || *outsize == 0) { + dest->temp_buffer = + reinterpret_cast(malloc(jpegli::kDestBufferSize)); + *outbuffer = dest->temp_buffer; + *outsize = jpegli::kDestBufferSize; + } + dest->current_buffer = *outbuffer; + dest->buffer_size = *outsize; + dest->pub.next_output_byte = dest->current_buffer; + dest->pub.free_in_buffer = dest->buffer_size; +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/downsample.cc b/third-party/libjxl/libjxl/lib/jpegli/downsample.cc new file mode 100644 index 0000000000..df2c156972 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/downsample.cc @@ -0,0 +1,356 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/downsample.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/downsample.cc" +#include +#include + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_CAPPED(float, 8); +constexpr D d; + +void DownsampleRow2x1(const float* row_in, size_t len, float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 2; + const auto mul = Set(d, 0.5f); + Vec v0, v1; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved2(d, row_in + 2 * x, v0, v1); + Store(Mul(mul, Add(v0, v1)), d, row_out + x); + } +} + +void DownsampleRow3x1(const float* row_in, size_t len, float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 3; + const auto mul = Set(d, 1.0f / 3); + Vec v0, v1, v2; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved3(d, row_in + 3 * x, v0, v1, v2); + Store(Mul(mul, Add(Add(v0, v1), v2)), d, row_out + x); + } +} + +void DownsampleRow4x1(const float* row_in, size_t len, float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 4; + const auto mul = Set(d, 0.25f); + Vec v0, v1, v2, v3; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved4(d, row_in + 4 * x, v0, v1, v2, v3); + Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x); + } +} + +void Downsample2x1(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow2x1(rows_in[0], len, row_out); +} + +void Downsample3x1(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, row_out); +} + +void Downsample4x1(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, row_out); +} + +void Downsample1x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const auto mul = Set(d, 0.5f); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + for (size_t x = 0; x < len; x += N) { + Store(Mul(mul, Add(Load(d, row0 + x), Load(d, row1 + x))), d, row_out + x); + } +} + +void Downsample2x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const size_t len_out = len / 2; + const auto mul = Set(d, 0.25f); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + Vec v0, v1, v2, v3; + for (size_t x = 0; x < len_out; x += N) { + LoadInterleaved2(d, row0 + 2 * x, v0, v1); + LoadInterleaved2(d, row1 + 2 * x, v2, v3); + Store(Mul(mul, Add(Add(v0, v1), Add(v2, v3))), d, row_out + x); + } +} + +void Downsample3x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, rows_in[0]); + DownsampleRow3x1(rows_in[1], len, rows_in[1]); + Downsample1x2(rows_in, len / 3, row_out); +} + +void Downsample4x2(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, rows_in[0]); + DownsampleRow4x1(rows_in[1], len, rows_in[1]); + Downsample1x2(rows_in, len / 4, row_out); +} + +void Downsample1x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const auto mul = Set(d, 1.0f / 3); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + float* row2 = rows_in[2]; + for (size_t x = 0; x < len; x += N) { + const auto in0 = Load(d, row0 + x); + const auto in1 = Load(d, row1 + x); + const auto in2 = Load(d, row2 + x); + Store(Mul(mul, Add(Add(in0, in1), in2)), d, row_out + x); + } +} + +void Downsample2x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow2x1(rows_in[0], len, rows_in[0]); + DownsampleRow2x1(rows_in[1], len, rows_in[1]); + DownsampleRow2x1(rows_in[2], len, rows_in[2]); + Downsample1x3(rows_in, len / 2, row_out); +} + +void Downsample3x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, rows_in[0]); + DownsampleRow3x1(rows_in[1], len, rows_in[1]); + DownsampleRow3x1(rows_in[2], len, rows_in[2]); + Downsample1x3(rows_in, len / 3, row_out); +} + +void Downsample4x3(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, rows_in[0]); + DownsampleRow4x1(rows_in[1], len, rows_in[1]); + DownsampleRow4x1(rows_in[2], len, rows_in[2]); + Downsample1x3(rows_in, len / 4, row_out); +} + +void Downsample1x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + const size_t N = Lanes(d); + const auto mul = Set(d, 0.25f); + float* row0 = rows_in[0]; + float* row1 = rows_in[1]; + float* row2 = rows_in[2]; + float* row3 = rows_in[3]; + for (size_t x = 0; x < len; x += N) { + const auto in0 = Load(d, row0 + x); + const auto in1 = Load(d, row1 + x); + const auto in2 = Load(d, row2 + x); + const auto in3 = Load(d, row3 + x); + Store(Mul(mul, Add(Add(in0, in1), Add(in2, in3))), d, row_out + x); + } +} + +void Downsample2x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow2x1(rows_in[0], len, rows_in[0]); + DownsampleRow2x1(rows_in[1], len, rows_in[1]); + DownsampleRow2x1(rows_in[2], len, rows_in[2]); + DownsampleRow2x1(rows_in[3], len, rows_in[3]); + Downsample1x4(rows_in, len / 2, row_out); +} + +void Downsample3x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow3x1(rows_in[0], len, rows_in[0]); + DownsampleRow3x1(rows_in[1], len, rows_in[1]); + DownsampleRow3x1(rows_in[2], len, rows_in[2]); + DownsampleRow3x1(rows_in[3], len, rows_in[3]); + Downsample1x4(rows_in, len / 3, row_out); +} + +void Downsample4x4(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) { + DownsampleRow4x1(rows_in[0], len, rows_in[0]); + DownsampleRow4x1(rows_in[1], len, rows_in[1]); + DownsampleRow4x1(rows_in[2], len, rows_in[2]); + DownsampleRow4x1(rows_in[3], len, rows_in[3]); + Downsample1x4(rows_in, len / 4, row_out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(Downsample1x2); +HWY_EXPORT(Downsample1x3); +HWY_EXPORT(Downsample1x4); +HWY_EXPORT(Downsample2x1); +HWY_EXPORT(Downsample2x2); +HWY_EXPORT(Downsample2x3); +HWY_EXPORT(Downsample2x4); +HWY_EXPORT(Downsample3x1); +HWY_EXPORT(Downsample3x2); +HWY_EXPORT(Downsample3x3); +HWY_EXPORT(Downsample3x4); +HWY_EXPORT(Downsample4x1); +HWY_EXPORT(Downsample4x2); +HWY_EXPORT(Downsample4x3); +HWY_EXPORT(Downsample4x4); + +void NullDownsample(float* rows_in[MAX_SAMP_FACTOR], size_t len, + float* row_out) {} + +void ChooseDownsampleMethods(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + for (int c = 0; c < cinfo->num_components; c++) { + m->downsample_method[c] = nullptr; + jpeg_component_info* comp = &cinfo->comp_info[c]; + const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor; + const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor; + if (v_factor == 1) { + if (h_factor == 1) { + m->downsample_method[c] = NullDownsample; + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x1); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x1); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x1); + } + } else if (v_factor == 2) { + if (h_factor == 1) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2); + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2); + } + } else if (v_factor == 3) { + if (h_factor == 1) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x2); + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x2); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x2); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x2); + } + } else if (v_factor == 4) { + if (h_factor == 1) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample1x4); + } else if (h_factor == 2) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample2x4); + } else if (h_factor == 3) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample3x4); + } else if (h_factor == 4) { + m->downsample_method[c] = HWY_DYNAMIC_DISPATCH(Downsample4x4); + } + } + if (m->downsample_method[c] == nullptr) { + JPEGLI_ERROR("Unsupported downsampling ratio %dx%d", h_factor, v_factor); + } + } +} + +void DownsampleInputBuffer(j_compress_ptr cinfo) { + if (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1) { + return; + } + jpeg_comp_master* m = cinfo->master; + const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + const size_t y0 = m->next_iMCU_row * iMCU_height; + const size_t y1 = y0 + iMCU_height; + const size_t xsize_padded = m->xsize_blocks * DCTSIZE; + for (int c = 0; c < cinfo->num_components; c++) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const int h_factor = cinfo->max_h_samp_factor / comp->h_samp_factor; + const int v_factor = cinfo->max_v_samp_factor / comp->v_samp_factor; + if (h_factor == 1 && v_factor == 1) { + continue; + } + auto& input = *m->smooth_input[c]; + auto& output = *m->raw_data[c]; + const size_t yout0 = y0 / v_factor; + float* rows_in[MAX_SAMP_FACTOR]; + for (size_t yin = y0, yout = yout0; yin < y1; yin += v_factor, ++yout) { + for (int iy = 0; iy < v_factor; ++iy) { + rows_in[iy] = input.Row(yin + iy); + } + float* row_out = output.Row(yout); + (*m->downsample_method[c])(rows_in, xsize_padded, row_out); + } + } +} + +void ApplyInputSmoothing(j_compress_ptr cinfo) { + if (!cinfo->smoothing_factor) { + return; + } + jpeg_comp_master* m = cinfo->master; + const float kW1 = cinfo->smoothing_factor / 1024.0; + const float kW0 = 1.0f - 8.0f * kW1; + const size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + const ssize_t y0 = m->next_iMCU_row * iMCU_height; + const ssize_t y1 = y0 + iMCU_height; + const ssize_t xsize_padded = m->xsize_blocks * DCTSIZE; + for (int c = 0; c < cinfo->num_components; c++) { + auto& input = m->input_buffer[c]; + auto& output = *m->smooth_input[c]; + if (m->next_iMCU_row == 0) { + input.CopyRow(-1, 0, 1); + } + if (m->next_iMCU_row + 1 == cinfo->total_iMCU_rows) { + size_t last_row = m->ysize_blocks * DCTSIZE - 1; + input.CopyRow(last_row + 1, last_row, 1); + } + // TODO(szabadka) SIMDify this. + for (ssize_t y = y0; y < y1; ++y) { + const float* row_t = input.Row(y - 1); + const float* row_m = input.Row(y); + const float* row_b = input.Row(y + 1); + float* row_out = output.Row(y); + for (ssize_t x = 0; x < xsize_padded; ++x) { + float val_tl = row_t[x - 1]; + float val_tm = row_t[x]; + float val_tr = row_t[x + 1]; + float val_ml = row_m[x - 1]; + float val_mm = row_m[x]; + float val_mr = row_m[x + 1]; + float val_bl = row_b[x - 1]; + float val_bm = row_b[x]; + float val_br = row_b[x + 1]; + float val1 = (val_tl + val_tm + val_tr + val_ml + val_mr + val_bl + + val_bm + val_br); + row_out[x] = val_mm * kW0 + val1 * kW1; + } + } + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/downsample.h b/third-party/libjxl/libjxl/lib/jpegli/downsample.h new file mode 100644 index 0000000000..3ccf069e4e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/downsample.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_DOWNSAMPLE_H_ +#define LIB_JPEGLI_DOWNSAMPLE_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ChooseDownsampleMethods(j_compress_ptr cinfo); + +void DownsampleInputBuffer(j_compress_ptr cinfo); + +void ApplyInputSmoothing(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_DOWNSAMPLE_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode.cc b/third-party/libjxl/libjxl/lib/jpegli/encode.cc new file mode 100644 index 0000000000..8a106e239a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode.cc @@ -0,0 +1,1253 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode.h" + +#include +#include +#include + +#include "lib/jpegli/adaptive_quantization.h" +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/bitstream.h" +#include "lib/jpegli/color_transform.h" +#include "lib/jpegli/downsample.h" +#include "lib/jpegli/encode_finish.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/encode_streaming.h" +#include "lib/jpegli/entropy_coding.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jpegli/input.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/quant.h" + +namespace jpegli { + +constexpr size_t kMaxBytesInMarker = 65533; + +void CheckState(j_compress_ptr cinfo, int state) { + if (cinfo->global_state != state) { + JPEGLI_ERROR("Unexpected global state %d [expected %d]", + cinfo->global_state, state); + } +} + +void CheckState(j_compress_ptr cinfo, int state1, int state2) { + if (cinfo->global_state != state1 && cinfo->global_state != state2) { + JPEGLI_ERROR("Unexpected global state %d [expected %d or %d]", + cinfo->global_state, state1, state2); + } +} + +// +// Parameter setup +// + +// Initialize cinfo fields that are not dependent on input image. This is shared +// between jpegli_CreateCompress() and jpegli_set_defaults() +void InitializeCompressParams(j_compress_ptr cinfo) { + cinfo->data_precision = 8; + cinfo->num_scans = 0; + cinfo->scan_info = nullptr; + cinfo->raw_data_in = FALSE; + cinfo->arith_code = FALSE; + cinfo->optimize_coding = FALSE; + cinfo->CCIR601_sampling = FALSE; + cinfo->smoothing_factor = 0; + cinfo->dct_method = JDCT_FLOAT; + cinfo->restart_interval = 0; + cinfo->restart_in_rows = 0; + cinfo->write_JFIF_header = FALSE; + cinfo->JFIF_major_version = 1; + cinfo->JFIF_minor_version = 1; + cinfo->density_unit = 0; + cinfo->X_density = 1; + cinfo->Y_density = 1; +#if JPEG_LIB_VERSION >= 70 + cinfo->scale_num = 1; + cinfo->scale_denom = 1; + cinfo->do_fancy_downsampling = FALSE; + cinfo->min_DCT_h_scaled_size = DCTSIZE; + cinfo->min_DCT_v_scaled_size = DCTSIZE; +#endif + cinfo->master->psnr_target = 0.0f; + cinfo->master->psnr_tolerance = 0.01f; + cinfo->master->min_distance = 0.1f; + cinfo->master->max_distance = 25.0f; +} + +float LinearQualityToDistance(int scale_factor) { + scale_factor = std::min(5000, std::max(0, scale_factor)); + int quality = + scale_factor < 100 ? 100 - scale_factor / 2 : 5000 / scale_factor; + return jpegli_quality_to_distance(quality); +} + +template +void SetSentTableFlag(T** table_ptrs, size_t num, boolean val) { + for (size_t i = 0; i < num; ++i) { + if (table_ptrs[i]) table_ptrs[i]->sent_table = val; + } +} + +// +// Compressor initialization +// + +struct ProgressiveScan { + int Ss, Se, Ah, Al; + bool interleaved; +}; + +void SetDefaultScanScript(j_compress_ptr cinfo) { + int level = cinfo->master->progressive_level; + std::vector progressive_mode; + bool interleave_dc = + (cinfo->max_h_samp_factor == 1 && cinfo->max_v_samp_factor == 1); + if (level == 0) { + progressive_mode.push_back({0, 63, 0, 0, true}); + } else if (level == 1) { + progressive_mode.push_back({0, 0, 0, 0, interleave_dc}); + progressive_mode.push_back({1, 63, 0, 1, false}); + progressive_mode.push_back({1, 63, 1, 0, false}); + } else { + progressive_mode.push_back({0, 0, 0, 0, interleave_dc}); + progressive_mode.push_back({1, 2, 0, 0, false}); + progressive_mode.push_back({3, 63, 0, 2, false}); + progressive_mode.push_back({3, 63, 2, 1, false}); + progressive_mode.push_back({3, 63, 1, 0, false}); + } + + cinfo->script_space_size = 0; + for (const auto& scan : progressive_mode) { + int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1; + cinfo->script_space_size += DivCeil(cinfo->num_components, comps); + } + cinfo->script_space = + Allocate(cinfo, cinfo->script_space_size); + + jpeg_scan_info* next_scan = cinfo->script_space; + for (const auto& scan : progressive_mode) { + int comps = scan.interleaved ? MAX_COMPS_IN_SCAN : 1; + for (int c = 0; c < cinfo->num_components; c += comps) { + next_scan->Ss = scan.Ss; + next_scan->Se = scan.Se; + next_scan->Ah = scan.Ah; + next_scan->Al = scan.Al; + next_scan->comps_in_scan = std::min(comps, cinfo->num_components - c); + for (int j = 0; j < next_scan->comps_in_scan; ++j) { + next_scan->component_index[j] = c + j; + } + ++next_scan; + } + } + JXL_ASSERT(next_scan - cinfo->script_space == cinfo->script_space_size); + cinfo->scan_info = cinfo->script_space; + cinfo->num_scans = cinfo->script_space_size; +} + +void ValidateScanScript(j_compress_ptr cinfo) { + // Mask of coefficient bits defined by the scan script, for each component + // and coefficient index. + uint16_t comp_mask[kMaxComponents][DCTSIZE2] = {}; + static constexpr int kMaxRefinementBit = 10; + + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info& si = cinfo->scan_info[i]; + if (si.comps_in_scan < 1 || si.comps_in_scan > MAX_COMPS_IN_SCAN) { + JPEGLI_ERROR("Invalid number of components in scan %d", si.comps_in_scan); + } + int last_ci = -1; + for (int j = 0; j < si.comps_in_scan; ++j) { + int ci = si.component_index[j]; + if (ci < 0 || ci >= cinfo->num_components) { + JPEGLI_ERROR("Invalid component index %d in scan", ci); + } else if (ci == last_ci) { + JPEGLI_ERROR("Duplicate component index %d in scan", ci); + } else if (ci < last_ci) { + JPEGLI_ERROR("Out of order component index %d in scan", ci); + } + last_ci = ci; + } + if (si.Ss < 0 || si.Se < si.Ss || si.Se >= DCTSIZE2) { + JPEGLI_ERROR("Invalid spectral range %d .. %d in scan", si.Ss, si.Se); + } + if (si.Ah < 0 || si.Al < 0 || si.Al > kMaxRefinementBit) { + JPEGLI_ERROR("Invalid refinement bits %d/%d", si.Ah, si.Al); + } + if (!cinfo->progressive_mode) { + if (si.Ss != 0 || si.Se != DCTSIZE2 - 1 || si.Ah != 0 || si.Al != 0) { + JPEGLI_ERROR("Invalid scan for sequential mode"); + } + } else { + if (si.Ss == 0 && si.Se != 0) { + JPEGLI_ERROR("DC and AC together in progressive scan"); + } + } + if (si.Ss != 0 && si.comps_in_scan != 1) { + JPEGLI_ERROR("Interleaved AC only scan."); + } + for (int j = 0; j < si.comps_in_scan; ++j) { + int ci = si.component_index[j]; + if (si.Ss != 0 && comp_mask[ci][0] == 0) { + JPEGLI_ERROR("AC before DC in component %d of scan", ci); + } + for (int k = si.Ss; k <= si.Se; ++k) { + if (comp_mask[ci][k] == 0) { + if (si.Ah != 0) { + JPEGLI_ERROR("Invalid first scan refinement bit"); + } + comp_mask[ci][k] = ((0xffff << si.Al) & 0xffff); + } else { + if (comp_mask[ci][k] != ((0xffff << si.Ah) & 0xffff) || + si.Al != si.Ah - 1) { + JPEGLI_ERROR("Invalid refinement bit progression."); + } + comp_mask[ci][k] |= 1 << si.Al; + } + } + } + if (si.comps_in_scan > 1) { + size_t mcu_size = 0; + for (int j = 0; j < si.comps_in_scan; ++j) { + int ci = si.component_index[j]; + jpeg_component_info* comp = &cinfo->comp_info[ci]; + mcu_size += comp->h_samp_factor * comp->v_samp_factor; + } + if (mcu_size > C_MAX_BLOCKS_IN_MCU) { + JPEGLI_ERROR("MCU size too big"); + } + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + if (comp_mask[c][k] != 0xffff) { + JPEGLI_ERROR("Incomplete scan of component %d and frequency %d", c, k); + } + } + } +} + +void ProcessCompressionParams(j_compress_ptr cinfo) { + if (cinfo->dest == nullptr) { + JPEGLI_ERROR("Missing destination."); + } + if (cinfo->image_width < 1 || cinfo->image_height < 1 || + cinfo->input_components < 1) { + JPEGLI_ERROR("Empty input image."); + } + if (cinfo->image_width > static_cast(JPEG_MAX_DIMENSION) || + cinfo->image_height > static_cast(JPEG_MAX_DIMENSION) || + cinfo->input_components > static_cast(kMaxComponents)) { + JPEGLI_ERROR("Input image too big."); + } + if (cinfo->num_components < 1 || + cinfo->num_components > static_cast(kMaxComponents)) { + JPEGLI_ERROR("Invalid number of components."); + } + if (cinfo->data_precision != kJpegPrecision) { + JPEGLI_ERROR("Invalid data precision"); + } + if (cinfo->arith_code) { + JPEGLI_ERROR("Arithmetic coding is not implemented."); + } + if (cinfo->CCIR601_sampling) { + JPEGLI_ERROR("CCIR601 sampling is not implemented."); + } + if (cinfo->restart_interval > 65535u) { + JPEGLI_ERROR("Restart interval too big"); + } + if (cinfo->smoothing_factor < 0 || cinfo->smoothing_factor > 100) { + JPEGLI_ERROR("Invalid smoothing factor %d", cinfo->smoothing_factor); + } + jpeg_comp_master* m = cinfo->master; + cinfo->max_h_samp_factor = cinfo->max_v_samp_factor = 1; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (comp->component_index != c) { + JPEGLI_ERROR("Invalid component index"); + } + for (int j = 0; j < c; ++j) { + if (cinfo->comp_info[j].component_id == comp->component_id) { + JPEGLI_ERROR("Duplicate component id %d", comp->component_id); + } + } + if (comp->h_samp_factor <= 0 || comp->v_samp_factor <= 0 || + comp->h_samp_factor > MAX_SAMP_FACTOR || + comp->v_samp_factor > MAX_SAMP_FACTOR) { + JPEGLI_ERROR("Invalid sampling factor %d x %d", comp->h_samp_factor, + comp->v_samp_factor); + } + cinfo->max_h_samp_factor = + std::max(comp->h_samp_factor, cinfo->max_h_samp_factor); + cinfo->max_v_samp_factor = + std::max(comp->v_samp_factor, cinfo->max_v_samp_factor); + } + if (cinfo->num_components == 1 && + (cinfo->max_h_samp_factor != 1 || cinfo->max_v_samp_factor != 1)) { + JPEGLI_ERROR("Sampling is not supported for simgle component image."); + } + size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor; + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width); + cinfo->total_iMCU_rows = DivCeil(cinfo->image_height, iMCU_height); + m->xsize_blocks = total_iMCU_cols * cinfo->max_h_samp_factor; + m->ysize_blocks = cinfo->total_iMCU_rows * cinfo->max_v_samp_factor; + + size_t blocks_per_iMCU = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (cinfo->max_h_samp_factor % comp->h_samp_factor != 0 || + cinfo->max_v_samp_factor % comp->v_samp_factor != 0) { + JPEGLI_ERROR("Non-integral sampling ratios are not supported."); + } + m->h_factor[c] = cinfo->max_h_samp_factor / comp->h_samp_factor; + m->v_factor[c] = cinfo->max_v_samp_factor / comp->v_samp_factor; + comp->downsampled_width = DivCeil(cinfo->image_width, m->h_factor[c]); + comp->downsampled_height = DivCeil(cinfo->image_height, m->v_factor[c]); + comp->width_in_blocks = DivCeil(comp->downsampled_width, DCTSIZE); + comp->height_in_blocks = DivCeil(comp->downsampled_height, DCTSIZE); + blocks_per_iMCU += comp->h_samp_factor * comp->v_samp_factor; + } + m->blocks_per_iMCU_row = total_iMCU_cols * blocks_per_iMCU; + // Disable adaptive quantization for subsampled luma channel. + int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0; + jpeg_component_info* y_comp = &cinfo->comp_info[y_channel]; + if (y_comp->h_samp_factor != cinfo->max_h_samp_factor || + y_comp->v_samp_factor != cinfo->max_v_samp_factor) { + m->use_adaptive_quantization = false; + } + if (cinfo->scan_info == nullptr) { + SetDefaultScanScript(cinfo); + } + cinfo->progressive_mode = + cinfo->scan_info->Ss != 0 || cinfo->scan_info->Se != DCTSIZE2 - 1; + ValidateScanScript(cinfo); + m->scan_token_info = + Allocate(cinfo, cinfo->num_scans, JPOOL_IMAGE); + memset(m->scan_token_info, 0, cinfo->num_scans * sizeof(ScanTokenInfo)); + m->ac_ctx_offset = Allocate(cinfo, cinfo->num_scans, JPOOL_IMAGE); + size_t num_ac_contexts = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* scan_info = &cinfo->scan_info[i]; + m->ac_ctx_offset[i] = 4 + num_ac_contexts; + if (scan_info->Se > 0) { + num_ac_contexts += scan_info->comps_in_scan; + } + if (num_ac_contexts > 252) { + JPEGLI_ERROR("Too many AC scans in image"); + } + ScanTokenInfo* sti = &m->scan_token_info[i]; + if (scan_info->comps_in_scan == 1) { + int comp_idx = scan_info->component_index[0]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + sti->MCUs_per_row = comp->width_in_blocks; + sti->MCU_rows_in_scan = comp->height_in_blocks; + sti->blocks_in_MCU = 1; + } else { + sti->MCUs_per_row = + DivCeil(cinfo->image_width, DCTSIZE * cinfo->max_h_samp_factor); + sti->MCU_rows_in_scan = + DivCeil(cinfo->image_height, DCTSIZE * cinfo->max_v_samp_factor); + sti->blocks_in_MCU = 0; + for (int j = 0; j < scan_info->comps_in_scan; ++j) { + int comp_idx = scan_info->component_index[j]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + sti->blocks_in_MCU += comp->h_samp_factor * comp->v_samp_factor; + } + } + size_t num_MCUs = sti->MCU_rows_in_scan * sti->MCUs_per_row; + sti->num_blocks = num_MCUs * sti->blocks_in_MCU; + if (cinfo->restart_in_rows <= 0) { + sti->restart_interval = cinfo->restart_interval; + } else { + sti->restart_interval = + std::min(sti->MCUs_per_row * cinfo->restart_in_rows, 65535u); + } + sti->num_restarts = sti->restart_interval > 0 + ? DivCeil(num_MCUs, sti->restart_interval) + : 1; + sti->restarts = Allocate(cinfo, sti->num_restarts, JPOOL_IMAGE); + } + m->num_contexts = 4 + num_ac_contexts; +} + +bool IsStreamingSupported(j_compress_ptr cinfo) { + if (cinfo->global_state == kEncWriteCoeffs) { + return false; + } + // TODO(szabadka) Remove this restriction. + if (cinfo->restart_interval > 0 || cinfo->restart_in_rows > 0) { + return false; + } + if (cinfo->num_scans > 1) { + return false; + } + if (cinfo->master->psnr_target > 0) { + return false; + } + return true; +} + +void AllocateBuffers(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + memset(m->last_dc_coeff, 0, sizeof(m->last_dc_coeff)); + if (!IsStreamingSupported(cinfo) || cinfo->optimize_coding) { + int ysize_blocks = DivCeil(cinfo->image_height, DCTSIZE); + int num_arrays = cinfo->num_scans * ysize_blocks; + m->token_arrays = Allocate(cinfo, num_arrays, JPOOL_IMAGE); + m->cur_token_array = 0; + memset(m->token_arrays, 0, num_arrays * sizeof(TokenArray)); + m->num_tokens = 0; + m->total_num_tokens = 0; + } + if (cinfo->global_state == kEncWriteCoeffs) { + return; + } + size_t iMCU_width = DCTSIZE * cinfo->max_h_samp_factor; + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + size_t total_iMCU_cols = DivCeil(cinfo->image_width, iMCU_width); + size_t xsize_full = total_iMCU_cols * iMCU_width; + size_t ysize_full = 3 * iMCU_height; + if (!cinfo->raw_data_in) { + int num_all_components = + std::max(cinfo->input_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + m->input_buffer[c].Allocate(cinfo, ysize_full, xsize_full); + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + size_t xsize = total_iMCU_cols * comp->h_samp_factor * DCTSIZE; + size_t ysize = 3 * comp->v_samp_factor * DCTSIZE; + if (cinfo->raw_data_in) { + m->input_buffer[c].Allocate(cinfo, ysize, xsize); + } + m->smooth_input[c] = &m->input_buffer[c]; + if (!cinfo->raw_data_in && cinfo->smoothing_factor) { + m->smooth_input[c] = Allocate>(cinfo, 1, JPOOL_IMAGE); + m->smooth_input[c]->Allocate(cinfo, ysize_full, xsize_full); + } + m->raw_data[c] = m->smooth_input[c]; + if (!cinfo->raw_data_in && (m->h_factor[c] > 1 || m->v_factor[c] > 1)) { + m->raw_data[c] = Allocate>(cinfo, 1, JPOOL_IMAGE); + m->raw_data[c]->Allocate(cinfo, ysize, xsize); + } + m->quant_mul[c] = Allocate(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + } + m->dct_buffer = Allocate(cinfo, 2 * DCTSIZE2, JPOOL_IMAGE_ALIGNED); + m->block_tmp = Allocate(cinfo, DCTSIZE2 * 4, JPOOL_IMAGE_ALIGNED); + if (!IsStreamingSupported(cinfo)) { + m->coeff_buffers = + Allocate(cinfo, cinfo->num_components, JPOOL_IMAGE); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const size_t xsize_blocks = comp->width_in_blocks; + const size_t ysize_blocks = comp->height_in_blocks; + m->coeff_buffers[c] = (*cinfo->mem->request_virt_barray)( + reinterpret_cast(cinfo), JPOOL_IMAGE, + /*pre_zero=*/false, xsize_blocks, ysize_blocks, comp->v_samp_factor); + } + } + if (m->use_adaptive_quantization) { + int y_channel = cinfo->jpeg_color_space == JCS_RGB ? 1 : 0; + jpeg_component_info* y_comp = &cinfo->comp_info[y_channel]; + const size_t xsize_blocks = y_comp->width_in_blocks; + const size_t vecsize = VectorSize(); + const size_t xsize_padded = DivCeil(2 * xsize_blocks, vecsize) * vecsize; + m->diff_buffer = + Allocate(cinfo, xsize_blocks * DCTSIZE + 8, JPOOL_IMAGE_ALIGNED); + m->fuzzy_erosion_tmp.Allocate(cinfo, 2, xsize_padded); + m->pre_erosion.Allocate(cinfo, 6 * cinfo->max_v_samp_factor, xsize_padded); + size_t qf_height = cinfo->max_v_samp_factor; + if (m->psnr_target > 0) { + qf_height *= cinfo->total_iMCU_rows; + } + m->quant_field.Allocate(cinfo, qf_height, xsize_blocks); + } else { + m->quant_field.Allocate(cinfo, 1, m->xsize_blocks); + m->quant_field.FillRow(0, 0, m->xsize_blocks); + } + for (int c = 0; c < cinfo->num_components; ++c) { + m->zero_bias_offset[c] = + Allocate(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + m->zero_bias_mul[c] = Allocate(cinfo, DCTSIZE2, JPOOL_IMAGE_ALIGNED); + memset(m->zero_bias_mul[c], 0, DCTSIZE2 * sizeof(float)); + memset(m->zero_bias_offset[c], 0, DCTSIZE2 * sizeof(float)); + } +} + +void InitProgressMonitor(j_compress_ptr cinfo) { + if (cinfo->progress == nullptr) { + return; + } + if (IsStreamingSupported(cinfo)) { + // We have only one input pass. + cinfo->progress->total_passes = 1; + } else { + // We have one input pass, a histogram pass for each scan, and an encode + // pass for each scan. + cinfo->progress->total_passes = 1 + 2 * cinfo->num_scans; + } +} + +// Common setup code between streaming and transcoding code paths. Called in +// both jpegli_start_compress() and jpegli_write_coefficients(). +void InitCompress(j_compress_ptr cinfo, boolean write_all_tables) { + jpeg_comp_master* m = cinfo->master; + (*cinfo->err->reset_error_mgr)(reinterpret_cast(cinfo)); + ProcessCompressionParams(cinfo); + InitProgressMonitor(cinfo); + AllocateBuffers(cinfo); + if (cinfo->global_state != kEncWriteCoeffs) { + ChooseInputMethod(cinfo); + if (!cinfo->raw_data_in) { + ChooseColorTransform(cinfo); + ChooseDownsampleMethods(cinfo); + } + QuantPass pass = m->psnr_target > 0 ? QuantPass::SEARCH_FIRST_PASS + : QuantPass::NO_SEARCH; + InitQuantizer(cinfo, pass); + } + if (write_all_tables) { + jpegli_suppress_tables(cinfo, FALSE); + } + if (!cinfo->optimize_coding && !cinfo->progressive_mode) { + CopyHuffmanTables(cinfo); + InitEntropyCoder(cinfo); + } + (*cinfo->dest->init_destination)(cinfo); + WriteFileHeader(cinfo); + JpegBitWriterInit(cinfo); + m->next_iMCU_row = 0; + m->last_restart_interval = 0; + m->next_dht_index = 0; +} + +// +// Input streaming +// + +void ProgressMonitorInputPass(j_compress_ptr cinfo) { + if (cinfo->progress == nullptr) { + return; + } + cinfo->progress->completed_passes = 0; + cinfo->progress->pass_counter = cinfo->next_scanline; + cinfo->progress->pass_limit = cinfo->image_height; + (*cinfo->progress->progress_monitor)(reinterpret_cast(cinfo)); +} + +void ReadInputRow(j_compress_ptr cinfo, const uint8_t* scanline, + float* row[kMaxComponents]) { + jpeg_comp_master* m = cinfo->master; + int num_all_components = + std::max(cinfo->input_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + row[c] = m->input_buffer[c].Row(m->next_input_row); + } + ++m->next_input_row; + if (scanline == nullptr) { + for (int c = 0; c < cinfo->input_components; ++c) { + memset(row[c], 0, cinfo->image_width * sizeof(row[c][0])); + } + return; + } + (*m->input_method)(scanline, cinfo->image_width, row); +} + +void PadInputBuffer(j_compress_ptr cinfo, float* row[kMaxComponents]) { + jpeg_comp_master* m = cinfo->master; + const size_t len0 = cinfo->image_width; + const size_t len1 = m->xsize_blocks * DCTSIZE; + for (int c = 0; c < cinfo->num_components; ++c) { + // Pad row to a multiple of the iMCU width, plus create a border of 1 + // repeated pixel for adaptive quant field calculation. + float last_val = row[c][len0 - 1]; + for (size_t x = len0; x <= len1; ++x) { + row[c][x] = last_val; + } + row[c][-1] = row[c][0]; + } + if (m->next_input_row == cinfo->image_height) { + size_t num_rows = m->ysize_blocks * DCTSIZE - cinfo->image_height; + for (size_t i = 0; i < num_rows; ++i) { + for (int c = 0; c < cinfo->num_components; ++c) { + float* dest = m->input_buffer[c].Row(m->next_input_row) - 1; + memcpy(dest, row[c] - 1, (len1 + 2) * sizeof(dest[0])); + } + ++m->next_input_row; + } + } +} + +void ProcessiMCURow(j_compress_ptr cinfo) { + JXL_ASSERT(cinfo->master->next_iMCU_row < cinfo->total_iMCU_rows); + if (!cinfo->raw_data_in) { + ApplyInputSmoothing(cinfo); + DownsampleInputBuffer(cinfo); + } + ComputeAdaptiveQuantField(cinfo); + if (IsStreamingSupported(cinfo)) { + if (cinfo->optimize_coding) { + ComputeTokensForiMCURow(cinfo); + } else { + WriteiMCURow(cinfo); + } + } else { + ComputeCoefficientsForiMCURow(cinfo); + } + ++cinfo->master->next_iMCU_row; +} + +void ProcessiMCURows(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + // To have context rows both above and below the current iMCU row, we delay + // processing the first iMCU row and process two iMCU rows after we receive + // the last input row. + if (m->next_input_row % iMCU_height == 0 && m->next_input_row > iMCU_height) { + ProcessiMCURow(cinfo); + } + if (m->next_input_row >= cinfo->image_height) { + ProcessiMCURow(cinfo); + } +} + +// +// Non-streaming part +// + +void ZigZagShuffleBlocks(j_compress_ptr cinfo) { + JCOEF tmp[DCTSIZE2]; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + JCOEF* block = &ba[0][bx][0]; + for (int k = 0; k < DCTSIZE2; ++k) { + tmp[k] = block[kJPEGNaturalOrder[k]]; + } + memcpy(block, tmp, sizeof(tmp)); + } + } + } +} + +} // namespace jpegli + +// +// Parameter setup +// + +void jpegli_CreateCompress(j_compress_ptr cinfo, int version, + size_t structsize) { + cinfo->mem = nullptr; + if (structsize != sizeof(*cinfo)) { + JPEGLI_ERROR("jpegli_compress_struct has wrong size."); + } + jpegli::InitMemoryManager(reinterpret_cast(cinfo)); + cinfo->progress = nullptr; + cinfo->is_decompressor = FALSE; + cinfo->global_state = jpegli::kEncStart; + cinfo->dest = nullptr; + cinfo->image_width = 0; + cinfo->image_height = 0; + cinfo->input_components = 0; + cinfo->in_color_space = JCS_UNKNOWN; + cinfo->input_gamma = 1.0f; + cinfo->num_components = 0; + cinfo->jpeg_color_space = JCS_UNKNOWN; + cinfo->comp_info = nullptr; + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + cinfo->quant_tbl_ptrs[i] = nullptr; + } + for (int i = 0; i < NUM_HUFF_TBLS; ++i) { + cinfo->dc_huff_tbl_ptrs[i] = nullptr; + cinfo->ac_huff_tbl_ptrs[i] = nullptr; + } + memset(cinfo->arith_dc_L, 0, sizeof(cinfo->arith_dc_L)); + memset(cinfo->arith_dc_U, 0, sizeof(cinfo->arith_dc_U)); + memset(cinfo->arith_ac_K, 0, sizeof(cinfo->arith_ac_K)); + cinfo->write_Adobe_marker = false; + cinfo->master = jpegli::Allocate(cinfo, 1); + jpegli::InitializeCompressParams(cinfo); + cinfo->master->force_baseline = true; + cinfo->master->xyb_mode = false; + cinfo->master->cicp_transfer_function = 2; // unknown transfer function code + cinfo->master->use_std_tables = false; + cinfo->master->use_adaptive_quantization = true; + cinfo->master->progressive_level = jpegli::kDefaultProgressiveLevel; + cinfo->master->data_type = JPEGLI_TYPE_UINT8; + cinfo->master->endianness = JPEGLI_NATIVE_ENDIAN; + cinfo->master->coeff_buffers = nullptr; +} + +void jpegli_set_xyb_mode(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->xyb_mode = true; +} + +void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->cicp_transfer_function = code; +} + +void jpegli_set_defaults(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + jpegli::InitializeCompressParams(cinfo); + jpegli_default_colorspace(cinfo); + jpegli_set_quality(cinfo, 90, TRUE); + jpegli_set_progressive_level(cinfo, jpegli::kDefaultProgressiveLevel); + jpegli::AddStandardHuffmanTables(reinterpret_cast(cinfo), + /*is_dc=*/false); + jpegli::AddStandardHuffmanTables(reinterpret_cast(cinfo), + /*is_dc=*/true); +} + +void jpegli_default_colorspace(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + switch (cinfo->in_color_space) { + case JCS_GRAYSCALE: + jpegli_set_colorspace(cinfo, JCS_GRAYSCALE); + break; + case JCS_RGB: { + if (cinfo->master->xyb_mode) { + jpegli_set_colorspace(cinfo, JCS_RGB); + } else { + jpegli_set_colorspace(cinfo, JCS_YCbCr); + } + break; + } + case JCS_YCbCr: + jpegli_set_colorspace(cinfo, JCS_YCbCr); + break; + case JCS_CMYK: + jpegli_set_colorspace(cinfo, JCS_CMYK); + break; + case JCS_YCCK: + jpegli_set_colorspace(cinfo, JCS_YCCK); + break; + case JCS_UNKNOWN: + jpegli_set_colorspace(cinfo, JCS_UNKNOWN); + break; + default: + JPEGLI_ERROR("Unsupported input colorspace %d", cinfo->in_color_space); + } +} + +void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->jpeg_color_space = colorspace; + switch (colorspace) { + case JCS_GRAYSCALE: + cinfo->num_components = 1; + break; + case JCS_RGB: + case JCS_YCbCr: + cinfo->num_components = 3; + break; + case JCS_CMYK: + case JCS_YCCK: + cinfo->num_components = 4; + break; + case JCS_UNKNOWN: + cinfo->num_components = + std::min(jpegli::kMaxComponents, cinfo->input_components); + break; + default: + JPEGLI_ERROR("Unsupported jpeg colorspace %d", colorspace); + } + // Adobe marker is only needed to distinguish CMYK and YCCK JPEGs. + cinfo->write_Adobe_marker = (cinfo->jpeg_color_space == JCS_YCCK); + if (cinfo->comp_info == nullptr) { + cinfo->comp_info = + jpegli::Allocate(cinfo, MAX_COMPONENTS); + } + memset(cinfo->comp_info, 0, + jpegli::kMaxComponents * sizeof(jpeg_component_info)); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + comp->component_index = c; + comp->component_id = c + 1; + comp->h_samp_factor = 1; + comp->v_samp_factor = 1; + comp->quant_tbl_no = 0; + comp->dc_tbl_no = 0; + comp->ac_tbl_no = 0; + } + if (colorspace == JCS_RGB) { + cinfo->comp_info[0].component_id = 'R'; + cinfo->comp_info[1].component_id = 'G'; + cinfo->comp_info[2].component_id = 'B'; + if (cinfo->master->xyb_mode) { + // Subsample blue channel. + cinfo->comp_info[0].h_samp_factor = cinfo->comp_info[0].v_samp_factor = 2; + cinfo->comp_info[1].h_samp_factor = cinfo->comp_info[1].v_samp_factor = 2; + cinfo->comp_info[2].h_samp_factor = cinfo->comp_info[2].v_samp_factor = 1; + // Use separate quantization tables for each component + cinfo->comp_info[1].quant_tbl_no = 1; + cinfo->comp_info[2].quant_tbl_no = 2; + } + } else if (colorspace == JCS_CMYK) { + cinfo->comp_info[0].component_id = 'C'; + cinfo->comp_info[1].component_id = 'M'; + cinfo->comp_info[2].component_id = 'Y'; + cinfo->comp_info[3].component_id = 'K'; + } else if (colorspace == JCS_YCbCr || colorspace == JCS_YCCK) { + // Use separate quantization and Huffman tables for luma and chroma + cinfo->comp_info[1].quant_tbl_no = 1; + cinfo->comp_info[2].quant_tbl_no = 1; + cinfo->comp_info[1].dc_tbl_no = cinfo->comp_info[1].ac_tbl_no = 1; + cinfo->comp_info[2].dc_tbl_no = cinfo->comp_info[2].ac_tbl_no = 1; + } +} + +void jpegli_set_distance(j_compress_ptr cinfo, float distance, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true); +} + +float jpegli_quality_to_distance(int quality) { + return (quality >= 100 ? 0.01f + : quality >= 30 ? 0.1f + (100 - quality) * 0.09f + : 53.0f / 3000.0f * quality * quality - + 23.0f / 20.0f * quality + 25.0f); +} + +void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance, + float min_distance, float max_distance) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->psnr_target = psnr; + cinfo->master->psnr_tolerance = tolerance; + cinfo->master->min_distance = min_distance; + cinfo->master->max_distance = max_distance; +} + +void jpegli_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distance = jpegli_quality_to_distance(quality); + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false); +} + +void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distance = jpegli::LinearQualityToDistance(scale_factor); + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false); +} + +#if JPEG_LIB_VERSION >= 70 +void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->force_baseline = force_baseline; + float distances[NUM_QUANT_TBLS]; + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + distances[i] = jpegli::LinearQualityToDistance(cinfo->q_scale_factor[i]); + } + jpegli::SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/false); +} +#endif + +int jpegli_quality_scaling(int quality) { + quality = std::min(100, std::max(1, quality)); + return quality < 50 ? 5000 / quality : 200 - 2 * quality; +} + +void jpegli_use_standard_quant_tables(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->use_std_tables = true; +} + +void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int* basic_table, int scale_factor, + boolean force_baseline) { + CheckState(cinfo, jpegli::kEncStart); + if (which_tbl < 0 || which_tbl > NUM_QUANT_TBLS) { + JPEGLI_ERROR("Invalid quant table index %d", which_tbl); + } + if (cinfo->quant_tbl_ptrs[which_tbl] == nullptr) { + cinfo->quant_tbl_ptrs[which_tbl] = + jpegli_alloc_quant_table(reinterpret_cast(cinfo)); + } + int max_qval = force_baseline ? 255 : 32767U; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[which_tbl]; + for (int k = 0; k < DCTSIZE2; ++k) { + int qval = (basic_table[k] * scale_factor + 50) / 100; + qval = std::max(1, std::min(qval, max_qval)); + quant_table->quantval[k] = qval; + } + quant_table->sent_table = FALSE; +} + +void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->master->use_adaptive_quantization = value; +} + +void jpegli_simple_progression(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + jpegli_set_progressive_level(cinfo, 2); +} + +void jpegli_set_progressive_level(j_compress_ptr cinfo, int level) { + CheckState(cinfo, jpegli::kEncStart); + if (level < 0) { + JPEGLI_ERROR("Invalid progressive level %d", level); + } + cinfo->master->progressive_level = level; +} + +void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness) { + CheckState(cinfo, jpegli::kEncStart); + switch (data_type) { + case JPEGLI_TYPE_UINT8: + case JPEGLI_TYPE_UINT16: + case JPEGLI_TYPE_FLOAT: + cinfo->master->data_type = data_type; + break; + default: + JPEGLI_ERROR("Unsupported data type %d", data_type); + } + switch (endianness) { + case JPEGLI_NATIVE_ENDIAN: + case JPEGLI_LITTLE_ENDIAN: + case JPEGLI_BIG_ENDIAN: + cinfo->master->endianness = endianness; + break; + default: + JPEGLI_ERROR("Unsupported endianness %d", endianness); + } +} + +#if JPEG_LIB_VERSION >= 70 +void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo) { + // Since input scaling is not supported, we just copy the image dimensions. + cinfo->jpeg_width = cinfo->image_width; + cinfo->jpeg_height = cinfo->image_height; +} +#endif + +void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo) { + CheckState(dstinfo, jpegli::kEncStart); + // Image parameters. + dstinfo->image_width = srcinfo->image_width; + dstinfo->image_height = srcinfo->image_height; + dstinfo->input_components = srcinfo->num_components; + dstinfo->in_color_space = srcinfo->jpeg_color_space; + dstinfo->input_gamma = srcinfo->output_gamma; + // Compression parameters. + jpegli_set_defaults(dstinfo); + jpegli_set_colorspace(dstinfo, srcinfo->jpeg_color_space); + if (dstinfo->num_components != srcinfo->num_components) { + const auto& cinfo = dstinfo; + return JPEGLI_ERROR("Mismatch between src colorspace and components"); + } + dstinfo->data_precision = srcinfo->data_precision; + dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling; + dstinfo->JFIF_major_version = srcinfo->JFIF_major_version; + dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version; + dstinfo->density_unit = srcinfo->density_unit; + dstinfo->X_density = srcinfo->X_density; + dstinfo->Y_density = srcinfo->Y_density; + for (int c = 0; c < dstinfo->num_components; ++c) { + jpeg_component_info* srccomp = &srcinfo->comp_info[c]; + jpeg_component_info* dstcomp = &dstinfo->comp_info[c]; + dstcomp->component_id = srccomp->component_id; + dstcomp->h_samp_factor = srccomp->h_samp_factor; + dstcomp->v_samp_factor = srccomp->v_samp_factor; + dstcomp->quant_tbl_no = srccomp->quant_tbl_no; + } + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + if (!srcinfo->quant_tbl_ptrs[i]) continue; + if (dstinfo->quant_tbl_ptrs[i] == nullptr) { + dstinfo->quant_tbl_ptrs[i] = jpegli::Allocate(dstinfo, 1); + } + memcpy(dstinfo->quant_tbl_ptrs[i], srcinfo->quant_tbl_ptrs[i], + sizeof(JQUANT_TBL)); + dstinfo->quant_tbl_ptrs[i]->sent_table = FALSE; + } +} + +void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress) { + jpegli::SetSentTableFlag(cinfo->quant_tbl_ptrs, NUM_QUANT_TBLS, suppress); + jpegli::SetSentTableFlag(cinfo->dc_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress); + jpegli::SetSentTableFlag(cinfo->ac_huff_tbl_ptrs, NUM_HUFF_TBLS, suppress); +} + +// +// Compressor initialization +// + +void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->global_state = jpegli::kEncHeader; + jpegli::InitCompress(cinfo, write_all_tables); + cinfo->next_scanline = 0; + cinfo->master->next_input_row = 0; +} + +void jpegli_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr* coef_arrays) { + CheckState(cinfo, jpegli::kEncStart); + cinfo->global_state = jpegli::kEncWriteCoeffs; + jpegli::InitCompress(cinfo, /*write_all_tables=*/true); + cinfo->master->coeff_buffers = coef_arrays; + cinfo->next_scanline = cinfo->image_height; + cinfo->master->next_input_row = cinfo->image_height; +} + +void jpegli_write_tables(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncStart); + if (cinfo->dest == nullptr) { + JPEGLI_ERROR("Missing destination."); + } + jpeg_comp_master* m = cinfo->master; + (*cinfo->err->reset_error_mgr)(reinterpret_cast(cinfo)); + (*cinfo->dest->init_destination)(cinfo); + jpegli::WriteOutput(cinfo, {0xFF, 0xD8}); // SOI + jpegli::EncodeDQT(cinfo, /*write_all_tables=*/true); + jpegli::CopyHuffmanTables(cinfo); + jpegli::EncodeDHT(cinfo, 0, m->num_huffman_tables); + jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI + (*cinfo->dest->term_destination)(cinfo); + jpegli_suppress_tables(cinfo, TRUE); +} + +// +// Marker writing +// + +void jpegli_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen) { + CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncWriteCoeffs); + if (datalen > jpegli::kMaxBytesInMarker) { + JPEGLI_ERROR("Invalid marker length %u", datalen); + } + if (marker != 0xfe && (marker < 0xe0 || marker > 0xef)) { + JPEGLI_ERROR( + "jpegli_write_m_header: Only APP and COM markers are supported."); + } + std::vector marker_data(4 + datalen); + marker_data[0] = 0xff; + marker_data[1] = marker; + marker_data[2] = (datalen + 2) >> 8; + marker_data[3] = (datalen + 2) & 0xff; + jpegli::WriteOutput(cinfo, &marker_data[0], 4); +} + +void jpegli_write_m_byte(j_compress_ptr cinfo, int val) { + uint8_t data = val; + jpegli::WriteOutput(cinfo, &data, 1); +} + +void jpegli_write_marker(j_compress_ptr cinfo, int marker, + const JOCTET* dataptr, unsigned int datalen) { + jpegli_write_m_header(cinfo, marker, datalen); + jpegli::WriteOutput(cinfo, dataptr, datalen); +} + +void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr, + unsigned int icc_data_len) { + constexpr size_t kMaxIccBytesInMarker = + jpegli::kMaxBytesInMarker - sizeof jpegli::kICCSignature - 2; + const int num_markers = + static_cast(jpegli::DivCeil(icc_data_len, kMaxIccBytesInMarker)); + size_t begin = 0; + for (int current_marker = 0; current_marker < num_markers; ++current_marker) { + const size_t length = std::min(kMaxIccBytesInMarker, icc_data_len - begin); + jpegli_write_m_header( + cinfo, jpegli::kICCMarker, + static_cast(length + sizeof jpegli::kICCSignature + 2)); + for (const unsigned char c : jpegli::kICCSignature) { + jpegli_write_m_byte(cinfo, c); + } + jpegli_write_m_byte(cinfo, current_marker + 1); + jpegli_write_m_byte(cinfo, num_markers); + for (size_t i = 0; i < length; ++i) { + jpegli_write_m_byte(cinfo, icc_data_ptr[begin]); + ++begin; + } + } +} + +// +// Input streaming +// + +JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines) { + CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage); + if (cinfo->raw_data_in) { + JPEGLI_ERROR("jpegli_write_raw_data() must be called for raw data mode."); + } + jpegli::ProgressMonitorInputPass(cinfo); + if (cinfo->global_state == jpegli::kEncHeader && + jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) { + jpegli::WriteFrameHeader(cinfo); + jpegli::WriteScanHeader(cinfo, 0); + } + cinfo->global_state = jpegli::kEncReadImage; + jpeg_comp_master* m = cinfo->master; + if (num_lines + cinfo->next_scanline > cinfo->image_height) { + num_lines = cinfo->image_height - cinfo->next_scanline; + } + JDIMENSION prev_scanline = cinfo->next_scanline; + size_t input_lag = (std::min(cinfo->image_height, m->next_input_row) - + cinfo->next_scanline); + if (input_lag > num_lines) { + JPEGLI_ERROR("Need at least %u lines to continue", input_lag); + } + if (input_lag > 0) { + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + return 0; + } + cinfo->next_scanline += input_lag; + } + float* rows[jpegli::kMaxComponents]; + for (size_t i = input_lag; i < num_lines; ++i) { + jpegli::ReadInputRow(cinfo, scanlines[i], rows); + (*m->color_transform)(rows, cinfo->image_width); + jpegli::PadInputBuffer(cinfo, rows); + jpegli::ProcessiMCURows(cinfo); + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + break; + } + ++cinfo->next_scanline; + } + return cinfo->next_scanline - prev_scanline; +} + +JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines) { + CheckState(cinfo, jpegli::kEncHeader, jpegli::kEncReadImage); + if (!cinfo->raw_data_in) { + JPEGLI_ERROR("jpegli_write_raw_data(): raw data mode was not set"); + } + jpegli::ProgressMonitorInputPass(cinfo); + if (cinfo->global_state == jpegli::kEncHeader && + jpegli::IsStreamingSupported(cinfo) && !cinfo->optimize_coding) { + jpegli::WriteFrameHeader(cinfo); + jpegli::WriteScanHeader(cinfo, 0); + } + cinfo->global_state = jpegli::kEncReadImage; + jpeg_comp_master* m = cinfo->master; + if (cinfo->next_scanline >= cinfo->image_height) { + return 0; + } + size_t iMCU_height = DCTSIZE * cinfo->max_v_samp_factor; + if (num_lines < iMCU_height) { + JPEGLI_ERROR("Missing input lines, minimum is %u", iMCU_height); + } + if (cinfo->next_scanline < m->next_input_row) { + JXL_ASSERT(m->next_input_row - cinfo->next_scanline == iMCU_height); + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + return 0; + } + cinfo->next_scanline = m->next_input_row; + return iMCU_height; + } + size_t iMCU_y = m->next_input_row / iMCU_height; + float* rows[jpegli::kMaxComponents]; + for (int c = 0; c < cinfo->num_components; ++c) { + JSAMPARRAY plane = data[c]; + jpeg_component_info* comp = &cinfo->comp_info[c]; + size_t xsize = comp->width_in_blocks * DCTSIZE; + size_t ysize = comp->v_samp_factor * DCTSIZE; + size_t y0 = iMCU_y * ysize; + auto& buffer = m->input_buffer[c]; + for (size_t i = 0; i < ysize; ++i) { + rows[0] = buffer.Row(y0 + i); + if (plane[i] == nullptr) { + memset(rows[0], 0, xsize * sizeof(rows[0][0])); + } else { + (*m->input_method)(plane[i], xsize, rows); + } + // We need a border of 1 repeated pixel for adaptive quant field. + buffer.PadRow(y0 + i, xsize, /*border=*/1); + } + } + m->next_input_row += iMCU_height; + jpegli::ProcessiMCURows(cinfo); + if (!jpegli::EmptyBitWriterBuffer(&m->bw)) { + return 0; + } + cinfo->next_scanline += iMCU_height; + return iMCU_height; +} + +// +// Non-streaming part +// + +void jpegli_finish_compress(j_compress_ptr cinfo) { + CheckState(cinfo, jpegli::kEncReadImage, jpegli::kEncWriteCoeffs); + jpeg_comp_master* m = cinfo->master; + if (cinfo->next_scanline < cinfo->image_height) { + JPEGLI_ERROR("Incomplete image, expected %d rows, got %d", + cinfo->image_height, cinfo->next_scanline); + } + + if (cinfo->global_state == jpegli::kEncWriteCoeffs) { + // Zig-zag shuffle all the blocks. For non-transcoding case it was already + // done in EncodeiMCURow(). + jpegli::ZigZagShuffleBlocks(cinfo); + } + + if (m->psnr_target > 0) { + jpegli::QuantizetoPSNR(cinfo); + } + + const bool tokens_done = jpegli::IsStreamingSupported(cinfo); + const bool bitstream_done = tokens_done && !cinfo->optimize_coding; + + if (!tokens_done) { + jpegli::TokenizeJpeg(cinfo); + } + + if (cinfo->optimize_coding || cinfo->progressive_mode) { + jpegli::OptimizeHuffmanCodes(cinfo); + jpegli::InitEntropyCoder(cinfo); + } + + if (!bitstream_done) { + jpegli::WriteFrameHeader(cinfo); + for (int i = 0; i < cinfo->num_scans; ++i) { + jpegli::WriteScanHeader(cinfo, i); + jpegli::WriteScanData(cinfo, i); + } + } else { + JumpToByteBoundary(&m->bw); + if (!EmptyBitWriterBuffer(&m->bw)) { + JPEGLI_ERROR("Output suspension is not supported in finish_compress"); + } + } + + jpegli::WriteOutput(cinfo, {0xFF, 0xD9}); // EOI + (*cinfo->dest->term_destination)(cinfo); + + // Release memory and reset global state. + jpegli_abort_compress(cinfo); +} + +void jpegli_abort_compress(j_compress_ptr cinfo) { + jpegli_abort(reinterpret_cast(cinfo)); +} + +void jpegli_destroy_compress(j_compress_ptr cinfo) { + jpegli_destroy(reinterpret_cast(cinfo)); +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode.h b/third-party/libjxl/libjxl/lib/jpegli/encode.h new file mode 100644 index 0000000000..7fa328f7e9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode.h @@ -0,0 +1,158 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file conatins the C API of the encoder part of the libjpegli library, +// which is based on the C API of libjpeg, with the function names changed from +// jpeg_* to jpegli_*, while compressor object definitions are included directly +// from jpeglib.h +// +// Applications can use the libjpegli library in one of the following ways: +// +// (1) Include jpegli/encode.h and/or jpegli/decode.h, update the function +// names of the API and link against libjpegli. +// +// (2) Leave the application code unchanged, but replace the libjpeg.so library +// with the one built by this project that is API- and ABI-compatible with +// libjpeg-turbo's version of libjpeg.so. + +#ifndef LIB_JPEGLI_ENCODE_H_ +#define LIB_JPEGLI_ENCODE_H_ + +#include "lib/jpegli/common.h" + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +#define jpegli_create_compress(cinfo) \ + jpegli_CreateCompress((cinfo), JPEG_LIB_VERSION, \ + (size_t)sizeof(struct jpeg_compress_struct)) +void jpegli_CreateCompress(j_compress_ptr cinfo, int version, + size_t structsize); + +void jpegli_stdio_dest(j_compress_ptr cinfo, FILE* outfile); + +void jpegli_mem_dest(j_compress_ptr cinfo, unsigned char** outbuffer, + unsigned long* outsize); + +void jpegli_set_defaults(j_compress_ptr cinfo); + +void jpegli_default_colorspace(j_compress_ptr cinfo); + +void jpegli_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace); + +void jpegli_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline); + +void jpegli_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline); + +#if JPEG_LIB_VERSION >= 70 +void jpegli_default_qtables(j_compress_ptr cinfo, boolean force_baseline); +#endif + +int jpegli_quality_scaling(int quality); + +void jpegli_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int* basic_table, int scale_factor, + boolean force_baseline); + +void jpegli_simple_progression(j_compress_ptr cinfo); + +void jpegli_suppress_tables(j_compress_ptr cinfo, boolean suppress); + +#if JPEG_LIB_VERSION >= 70 +void jpegli_calc_jpeg_dimensions(j_compress_ptr cinfo); +#endif + +void jpegli_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo); + +void jpegli_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen); + +void jpegli_write_m_byte(j_compress_ptr cinfo, int val); + +void jpegli_write_marker(j_compress_ptr cinfo, int marker, + const JOCTET* dataptr, unsigned int datalen); + +void jpegli_write_icc_profile(j_compress_ptr cinfo, const JOCTET* icc_data_ptr, + unsigned int icc_data_len); + +void jpegli_start_compress(j_compress_ptr cinfo, boolean write_all_tables); + +void jpegli_write_tables(j_compress_ptr cinfo); + +JDIMENSION jpegli_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines); + +JDIMENSION jpegli_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines); + +void jpegli_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr* coef_arrays); + +void jpegli_finish_compress(j_compress_ptr cinfo); + +void jpegli_abort_compress(j_compress_ptr cinfo); + +void jpegli_destroy_compress(j_compress_ptr cinfo); + +// +// New API functions that are not available in libjpeg +// +// NOTE: This part of the API is still experimental and will probably change in +// the future. +// + +// Sets the butteraugli target distance for the compressor. This may override +// the default quantization table indexes based on jpeg colorspace, therefore +// it must be called after jpegli_set_defaults() or after the last +// jpegli_set_colorspace() or jpegli_default_colorspace() calls. +void jpegli_set_distance(j_compress_ptr cinfo, float distance, + boolean force_baseline); + +// Returns the butteraugli target distance for the given quality parameter. +float jpegli_quality_to_distance(int quality); + +// Enables distance parameter search to meet the given psnr target. +void jpegli_set_psnr(j_compress_ptr cinfo, float psnr, float tolerance, + float min_distance, float max_distance); + +// Changes the default behaviour of the encoder in the selection of quantization +// matrices and chroma subsampling. Must be called before jpegli_set_defaults() +// because some default setting depend on the XYB mode. +void jpegli_set_xyb_mode(j_compress_ptr cinfo); + +// Signals to the encoder that the pixel data that will be provided later +// through jpegli_write_scanlines() has this transfer function. This must be +// called before jpegli_set_defaults() because it changes the default +// quantization tables. +void jpegli_set_cicp_transfer_function(j_compress_ptr cinfo, int code); + +void jpegli_set_input_format(j_compress_ptr cinfo, JpegliDataType data_type, + JpegliEndianness endianness); + +// Sets whether or not the encoder uses adaptive quantization for createing more +// zero coefficients based on the local properties of the image. +// Enabled by default. +void jpegli_enable_adaptive_quantization(j_compress_ptr cinfo, boolean value); + +// Sets the default progression parameters, where level 0 is sequential, and +// greater level value means more progression steps. Default is 2. +void jpegli_set_progressive_level(j_compress_ptr cinfo, int level); + +// If this function is called before starting compression, the quality and +// linear quality parameters will be used to scale the standard quantization +// tables from Annex K of the JPEG standard. By default jpegli uses a different +// set of quantization tables and used different scaling parameters for DC and +// AC coefficients. Must be called before jpegli_set_defaults(). +void jpegli_use_standard_quant_tables(j_compress_ptr cinfo); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_ENCODE_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_api_test.cc b/third-party/libjxl/libjxl/lib/jpegli/encode_api_test.cc new file mode 100644 index 0000000000..4039c297a5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode_api_test.cc @@ -0,0 +1,839 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include +#include + +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +struct TestConfig { + TestImage input; + CompressParams jparams; + JpegIOMode input_mode = PIXELS; + double max_bpp; + double max_dist; +}; + +class EncodeAPITestParam : public ::testing::TestWithParam {}; + +void GenerateInput(JpegIOMode input_mode, const CompressParams& jparams, + TestImage* input) { + GeneratePixels(input); + if (input_mode == RAW_DATA) { + GenerateRawData(jparams, input); + } else if (input_mode == COEFFICIENTS) { + GenerateCoeffs(jparams, input); + } +} + +TEST_P(EncodeAPITestParam, TestAPI) { + TestConfig config = GetParam(); + GenerateInput(config.input_mode, config.jparams, &config.input); + std::vector compressed; + ASSERT_TRUE(EncodeWithJpegli(config.input, config.jparams, &compressed)); + if (config.jparams.icc.empty()) { + double bpp = + compressed.size() * 8.0 / (config.input.xsize * config.input.ysize); + printf("bpp: %f\n", bpp); + EXPECT_LT(bpp, config.max_bpp); + } + DecompressParams dparams; + dparams.output_mode = + config.input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS; + if (config.jparams.set_jpeg_colorspace && + config.jparams.jpeg_color_space == JCS_GRAYSCALE) { + ConvertToGrayscale(&config.input); + } else { + dparams.set_out_color_space = true; + dparams.out_color_space = config.input.color_space; + } + TestImage output; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output); + VerifyOutputImage(config.input, output, config.max_dist); +} + +TEST(EncodeAPITest, ReuseCinfoSameImageTwice) { + TestImage input; + input.xsize = 129; + input.ysize = 73; + CompressParams jparams; + GenerateInput(PIXELS, jparams, &input); + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + std::vector compressed0; + std::vector compressed1; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + EncodeWithJpegli(input, jparams, &cinfo); + compressed0.assign(buffer, buffer + buffer_size); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + EncodeWithJpegli(input, jparams, &cinfo); + compressed1.assign(buffer, buffer + buffer_size); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); + ASSERT_EQ(compressed0.size(), compressed1.size()); + EXPECT_EQ(0, + memcmp(compressed0.data(), compressed1.data(), compressed0.size())); +} + +std::vector GenerateBasicConfigs() { + std::vector all_configs; + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + for (int optimize : {0, 1}) { + if (progr && optimize) continue; + TestConfig config; + config.input.xsize = 257 + samp * 37; + config.input.ysize = 265 + optimize * 17; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.jparams.optimize_coding = optimize; + config.max_dist = 2.4f; + GeneratePixels(&config.input); + all_configs.push_back(config); + } + } + } + return all_configs; +} + +TEST(EncodeAPITest, ReuseCinfoSameMemOutput) { + std::vector all_configs = GenerateBasicConfigs(); + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + size_t pos = 0; + for (size_t i = 0; i < all_configs.size(); ++i) { + TestImage output; + pos += + DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), nullptr, + 0, buffer + pos, buffer_size - pos, &output); + VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist); + } + if (buffer) free(buffer); +} + +TEST(EncodeAPITest, ReuseCinfoSameStdOutput) { + std::vector all_configs = GenerateBasicConfigs(); + FILE* tmpf = tmpfile(); + JXL_CHECK(tmpf); + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_stdio_dest(&cinfo, tmpf); + for (const TestConfig& config : all_configs) { + EncodeWithJpegli(config.input, config.jparams, &cinfo); + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + size_t total_size = ftell(tmpf); + rewind(tmpf); + std::vector compressed(total_size); + JXL_CHECK(total_size == fread(&compressed[0], 1, total_size, tmpf)); + fclose(tmpf); + size_t pos = 0; + for (size_t i = 0; i < all_configs.size(); ++i) { + TestImage output; + pos += DecodeWithLibjpeg(all_configs[i].jparams, DecompressParams(), + nullptr, 0, &compressed[pos], + compressed.size() - pos, &output); + VerifyOutputImage(all_configs[i].input, output, all_configs[i].max_dist); + } +} + +TEST(EncodeAPITest, ReuseCinfoChangeParams) { + TestImage input, output; + CompressParams jparams; + DecompressParams dparams; + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + std::vector compressed; + jpeg_compress_struct cinfo; + const auto max_rms = [](int q, int hs, int vs) { + if (hs == 1 && vs == 1) return q == 90 ? 2.2 : 0.6; + if (hs == 2 && vs == 2) return q == 90 ? 2.8 : 1.2; + return q == 90 ? 2.4 : 1.0; + }; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + input.xsize = 129; + input.ysize = 73; + dparams.set_out_color_space = true; + for (JpegIOMode input_mode : {PIXELS, RAW_DATA, PIXELS, COEFFICIENTS}) { + for (int h_samp : {2, 1}) { + for (int v_samp : {2, 1}) { + for (int progr : {0, 2}) { + for (int quality : {90, 100}) { + input.Clear(); + input.color_space = + (input_mode == RAW_DATA ? JCS_YCbCr : JCS_RGB); + jparams.quality = quality; + jparams.h_sampling = {h_samp, 1, 1}; + jparams.v_sampling = {v_samp, 1, 1}; + jparams.progressive_mode = progr; + printf( + "Generating input with quality %d chroma subsampling %dx%d " + "input mode %d progressive_mode %d\n", + quality, h_samp, v_samp, input_mode, progr); + GenerateInput(input_mode, jparams, &input); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + if (input_mode != COEFFICIENTS) { + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + jpegli_abort_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + } + EncodeWithJpegli(input, jparams, &cinfo); + compressed.resize(buffer_size); + std::copy_n(buffer, buffer_size, compressed.data()); + dparams.output_mode = + input_mode == COEFFICIENTS ? COEFFICIENTS : PIXELS; + dparams.out_color_space = input.color_space; + output.Clear(); + DecodeWithLibjpeg(jparams, dparams, compressed, &output); + VerifyOutputImage(input, output, + max_rms(quality, h_samp, v_samp)); + } + } + } + } + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncodeAPITest, AbbreviatedStreams) { + uint8_t* table_stream = nullptr; + unsigned long table_stream_size = 0; + uint8_t* data_stream = nullptr; + unsigned long data_stream_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &table_stream, &table_stream_size); + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + jpegli_write_tables(&cinfo); + jpegli_mem_dest(&cinfo, &data_stream, &data_stream_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.optimize_coding = FALSE; + jpegli_set_progressive_level(&cinfo, 0); + jpegli_start_compress(&cinfo, FALSE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + EXPECT_LT(data_stream_size, 50); + jpegli_destroy_compress(&cinfo); + } + TestImage output; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), table_stream, + table_stream_size, data_stream, data_stream_size, &output); + EXPECT_EQ(1, output.xsize); + EXPECT_EQ(1, output.ysize); + EXPECT_EQ(3, output.components); + EXPECT_EQ(0, output.pixels[0]); + EXPECT_EQ(0, output.pixels[1]); + EXPECT_EQ(0, output.pixels[2]); + if (table_stream) free(table_stream); + if (data_stream) free(data_stream); +} + +void CopyQuantTables(j_compress_ptr cinfo, uint16_t* quant_tables) { + for (int c = 0; c < cinfo->num_components; ++c) { + int quant_idx = cinfo->comp_info[c].quant_tbl_no; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx]; + for (int k = 0; k < DCTSIZE2; ++k) { + quant_tables[c * DCTSIZE2 + k] = quant_table->quantval[k]; + } + } +} + +TEST(EncodeAPITest, QualitySettings) { + // Test that jpegli_set_quality, jpegli_set_linear_quality and + // jpegli_quality_scaling are consistent with each other. + uint16_t quant_tables0[3 * DCTSIZE2]; + uint16_t quant_tables1[3 * DCTSIZE2]; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + for (boolean baseline : {FALSE, TRUE}) { + for (int q = 1; q <= 100; ++q) { + jpegli_set_quality(&cinfo, q, baseline); + CopyQuantTables(&cinfo, quant_tables0); + jpegli_set_linear_quality(&cinfo, jpegli_quality_scaling(q), baseline); + CopyQuantTables(&cinfo, quant_tables1); + EXPECT_EQ(0, + memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0))); +#if JPEG_LIB_VERSION >= 70 + for (int i = 0; i < NUM_QUANT_TBLS; ++i) { + cinfo.q_scale_factor[i] = jpegli_quality_scaling(q); + } + jpegli_default_qtables(&cinfo, baseline); + CopyQuantTables(&cinfo, quant_tables1); + EXPECT_EQ(0, + memcmp(quant_tables0, quant_tables1, sizeof(quant_tables0))); +#endif + } + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + // Test jpegli_quality_scaling for some specific values . + EXPECT_EQ(5000, jpegli_quality_scaling(-1)); + EXPECT_EQ(5000, jpegli_quality_scaling(0)); + EXPECT_EQ(5000, jpegli_quality_scaling(1)); + EXPECT_EQ(100, jpegli_quality_scaling(50)); + EXPECT_EQ(50, jpegli_quality_scaling(75)); + EXPECT_EQ(20, jpegli_quality_scaling(90)); + EXPECT_EQ(0, jpegli_quality_scaling(100)); + EXPECT_EQ(0, jpegli_quality_scaling(101)); +} + +std::vector GenerateTests() { + std::vector all_tests; + for (int h_samp : {1, 2}) { + for (int v_samp : {1, 2}) { + for (int progr : {0, 2}) { + for (int optimize : {0, 1}) { + if (progr && optimize) continue; + TestConfig config; + config.jparams.h_sampling = {h_samp, 1, 1}; + config.jparams.v_sampling = {v_samp, 1, 1}; + config.jparams.progressive_mode = progr; + if (!progr) { + config.jparams.optimize_coding = optimize; + } + const float kMaxBpp[4] = {1.55, 1.4, 1.4, 1.32}; + const float kMaxDist[4] = {1.95, 2.2, 2.2, 2.0}; + const int idx = v_samp * 2 + h_samp - 3; + config.max_bpp = + kMaxBpp[idx] * (optimize ? 0.97 : 1.0) * (progr ? 0.97 : 1.0); + config.max_dist = kMaxDist[idx]; + all_tests.push_back(config); + } + } + } + } + { + TestConfig config; + config.jparams.quality = 100; + config.max_bpp = 6.6; + config.max_dist = 0.6; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.quality = 80; + config.max_bpp = 1.05; + config.max_dist = 2.7; + all_tests.push_back(config); + } + for (int samp : {1, 2}) { + for (int progr : {0, 2}) { + for (int optimize : {0, 1}) { + if (progr && optimize) continue; + TestConfig config; + config.input.xsize = 257; + config.input.ysize = 265; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + if (!progr) { + config.jparams.optimize_coding = optimize; + } + config.jparams.use_adaptive_quantization = false; + config.max_bpp = 2.05f; + config.max_dist = 2.3f; + all_tests.push_back(config); + } + } + } + for (int h0_samp : {1, 2, 4}) { + for (int v0_samp : {1, 2, 4}) { + for (int h2_samp : {1, 2, 4}) { + for (int v2_samp : {1, 2, 4}) { + TestConfig config; + config.input.xsize = 137; + config.input.ysize = 75; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + config.max_bpp = 2.5; + config.max_dist = 12.0; + all_tests.push_back(config); + } + } + } + } + for (int h0_samp : {1, 3}) { + for (int v0_samp : {1, 3}) { + for (int h2_samp : {1, 3}) { + for (int v2_samp : {1, 3}) { + TestConfig config; + config.input.xsize = 205; + config.input.ysize = 99; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, h2_samp}; + config.jparams.v_sampling = {v0_samp, 1, v2_samp}; + config.max_bpp = 2.5; + config.max_dist = 10.0; + all_tests.push_back(config); + } + } + } + } + for (int h0_samp : {1, 2, 3, 4}) { + for (int v0_samp : {1, 2, 3, 4}) { + TestConfig config; + config.input.xsize = 217; + config.input.ysize = 129; + config.jparams.progressive_mode = 2; + config.jparams.h_sampling = {h0_samp, 1, 1}; + config.jparams.v_sampling = {v0_samp, 1, 1}; + config.max_bpp = 2.0; + config.max_dist = 5.5; + all_tests.push_back(config); + } + } + for (int p = 0; p < 3 + NumTestScanScripts(); ++p) { + for (int samp : {1, 2}) { + for (int quality : {100, 90, 1}) { + for (int r : {0, 1024, 1}) { + for (int optimize : {0, 1}) { + bool progressive = p == 1 || p == 2 || p > 4; + if (progressive && !optimize) continue; + TestConfig config; + config.input.xsize = 273; + config.input.ysize = 265; + config.jparams.progressive_mode = p; + if (!progressive) { + config.jparams.optimize_coding = optimize; + } + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.quality = quality; + config.jparams.restart_interval = r; + config.max_bpp = quality == 100 ? 8.0 : 1.9; + if (r == 1) { + config.max_bpp += 10.0; + } + config.max_dist = quality == 1 ? 20.0 : 2.1; + all_tests.push_back(config); + } + } + } + } + } + { + TestConfig config; + config.jparams.simple_progression = true; + config.max_bpp = 1.48; + config.max_dist = 2.0; + all_tests.push_back(config); + } + { + TestConfig config; + config.input_mode = COEFFICIENTS; + config.jparams.h_sampling = {2, 1, 1}; + config.jparams.v_sampling = {2, 1, 1}; + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.max_bpp = 16; + config.max_dist = 0.0; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.xyb_mode = true; + config.jparams.progressive_mode = 2; + config.max_bpp = 1.5; + config.max_dist = 3.5; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.libjpeg_mode = true; + config.max_bpp = 2.1; + config.max_dist = 1.7; + all_tests.push_back(config); + } + + for (J_COLOR_SPACE in_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) { + for (J_COLOR_SPACE jpeg_color_space : {JCS_RGB, JCS_YCbCr, JCS_GRAYSCALE}) { + if (jpeg_color_space == JCS_RGB && in_color_space == JCS_YCbCr) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = in_color_space; + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + config.max_bpp = jpeg_color_space == JCS_RGB ? 4.5 : 1.85; + config.max_dist = jpeg_color_space == JCS_RGB ? 1.4 : 2.05; + all_tests.push_back(config); + } + } + for (J_COLOR_SPACE in_color_space : {JCS_CMYK, JCS_YCCK}) { + for (J_COLOR_SPACE jpeg_color_space : {JCS_CMYK, JCS_YCCK}) { + if (jpeg_color_space == JCS_CMYK && in_color_space == JCS_YCCK) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input.color_space = in_color_space; + if (in_color_space != jpeg_color_space) { + config.jparams.set_jpeg_colorspace = true; + config.jparams.jpeg_color_space = jpeg_color_space; + } + config.max_bpp = jpeg_color_space == JCS_CMYK ? 4.0 : 3.6; + config.max_dist = jpeg_color_space == JCS_CMYK ? 1.2 : 1.5; + all_tests.push_back(config); + } + } + { + TestConfig config; + config.input.color_space = JCS_YCbCr; + config.max_bpp = 1.6; + config.max_dist = 1.35; + all_tests.push_back(config); + } + for (bool xyb : {false, true}) { + TestConfig config; + config.input.color_space = JCS_GRAYSCALE; + config.jparams.xyb_mode = xyb; + config.max_bpp = 1.35; + config.max_dist = 1.4; + all_tests.push_back(config); + } + for (int channels = 1; channels <= 4; ++channels) { + TestConfig config; + config.input.color_space = JCS_UNKNOWN; + config.input.components = channels; + config.max_bpp = 1.35 * channels; + config.max_dist = 1.4; + all_tests.push_back(config); + } + for (size_t r : {1, 3, 17, 1024}) { + for (int progr : {0, 2}) { + TestConfig config; + config.jparams.restart_interval = r; + config.jparams.progressive_mode = progr; + config.max_bpp = 1.58 + 5.5 / r; + config.max_dist = 2.2; + all_tests.push_back(config); + } + } + for (size_t rr : {1, 3, 8, 100}) { + TestConfig config; + config.jparams.restart_in_rows = rr; + config.max_bpp = 1.6; + config.max_dist = 2.2; + all_tests.push_back(config); + } + for (int type : {0, 1, 10, 100, 10000}) { + for (int scale : {1, 50, 100, 200, 500}) { + for (bool add_raw : {false, true}) { + for (bool baseline : {true, false}) { + if (!baseline && (add_raw || type * scale < 25500)) continue; + TestConfig config; + config.input.xsize = 64; + config.input.ysize = 64; + CustomQuantTable table; + table.table_type = type; + table.scale_factor = scale; + table.force_baseline = baseline; + table.add_raw = add_raw; + table.Generate(); + config.jparams.optimize_coding = 1; + config.jparams.quant_tables.push_back(table); + config.jparams.quant_indexes = {0, 0, 0}; + float q = (type == 0 ? 16 : type) * scale * 0.01f; + if (baseline && !add_raw) q = std::max(1.0f, std::min(255.0f, q)); + config.max_bpp = 1.5f + 25.0f / q; + config.max_dist = 0.6f + 0.25f * q; + all_tests.push_back(config); + } + } + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + if (qidx == 3) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + config.max_bpp = 2.25; + config.max_dist = 2.8; + all_tests.push_back(config); + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (int slot_idx = 0; slot_idx < 2; ++slot_idx) { + if (qidx == 0 && slot_idx == 0) continue; + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + CustomQuantTable table; + table.slot_idx = slot_idx; + table.Generate(); + config.jparams.quant_tables.push_back(table); + config.max_bpp = 2.3; + config.max_dist = 2.9; + all_tests.push_back(config); + } + } + for (int qidx = 0; qidx < 8; ++qidx) { + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {(qidx >> 2) & 1, (qidx >> 1) & 1, + (qidx >> 0) & 1}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.max_bpp = 2.0; + config.max_dist = 3.85; + all_tests.push_back(config); + } + } + for (bool xyb : {false, true}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.jparams.xyb_mode = xyb; + config.jparams.quant_indexes = {0, 1, 2}; + { + CustomQuantTable table; + table.slot_idx = 0; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 1; + table.table_type = 20; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + { + CustomQuantTable table; + table.slot_idx = 2; + table.table_type = 30; + table.Generate(); + config.jparams.quant_tables.push_back(table); + } + config.max_bpp = 1.5; + config.max_dist = 3.75; + all_tests.push_back(config); + } + { + TestConfig config; + config.jparams.comp_ids = {7, 17, 177}; + config.input.xsize = config.input.ysize = 128; + config.max_bpp = 2.25; + config.max_dist = 2.4; + all_tests.push_back(config); + } + for (int override_JFIF : {-1, 0, 1}) { + for (int override_Adobe : {-1, 0, 1}) { + if (override_JFIF == -1 && override_Adobe == -1) continue; + TestConfig config; + config.input.xsize = config.input.ysize = 128; + config.jparams.override_JFIF = override_JFIF; + config.jparams.override_Adobe = override_Adobe; + config.max_bpp = 2.25; + config.max_dist = 2.4; + all_tests.push_back(config); + } + } + { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.max_bpp = 1.85; + config.max_dist = 2.05; + config.jparams.add_marker = true; + all_tests.push_back(config); + } + for (size_t icc_size : {728, 70000, 1000000}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.max_dist = 2.05; + config.jparams.icc.resize(icc_size); + for (size_t i = 0; i < icc_size; ++i) { + config.jparams.icc[i] = (i * 17) & 0xff; + } + all_tests.push_back(config); + } + for (JpegIOMode input_mode : {PIXELS, RAW_DATA, COEFFICIENTS}) { + TestConfig config; + config.input.xsize = config.input.ysize = 256; + config.input_mode = input_mode; + if (input_mode == RAW_DATA) { + config.input.color_space = JCS_YCbCr; + } + config.jparams.progressive_mode = 0; + config.jparams.optimize_coding = 0; + config.max_bpp = 1.85; + config.max_dist = 2.05; + if (input_mode == COEFFICIENTS) { + config.max_bpp = 3.5; + config.max_dist = 0.0; + } + all_tests.push_back(config); + config.jparams.use_flat_dc_luma_code = true; + all_tests.push_back(config); + } + for (int xsize : {640, 641, 648, 649}) { + for (int ysize : {640, 641, 648, 649}) { + for (int h_sampling : {1, 2}) { + for (int v_sampling : {1, 2}) { + if (h_sampling == 1 && v_sampling == 1) continue; + for (int progr : {0, 2}) { + TestConfig config; + config.input.xsize = xsize; + config.input.ysize = ysize; + config.input.color_space = JCS_YCbCr; + config.jparams.h_sampling = {h_sampling, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + config.jparams.progressive_mode = progr; + config.input_mode = RAW_DATA; + config.max_bpp = 1.75; + config.max_dist = 2.0; + all_tests.push_back(config); + config.input_mode = COEFFICIENTS; + if (xsize & 1) { + config.jparams.add_marker = true; + } + config.max_bpp = 24.0; + all_tests.push_back(config); + } + } + } + } + } + for (JpegliDataType data_type : {JPEGLI_TYPE_UINT16, JPEGLI_TYPE_FLOAT}) { + for (JpegliEndianness endianness : + {JPEGLI_LITTLE_ENDIAN, JPEGLI_BIG_ENDIAN, JPEGLI_NATIVE_ENDIAN}) { + J_COLOR_SPACE colorspace[4] = {JCS_GRAYSCALE, JCS_UNKNOWN, JCS_RGB, + JCS_CMYK}; + float max_bpp[4] = {1.32, 2.7, 1.6, 4.0}; + for (int channels = 1; channels <= 4; ++channels) { + TestConfig config; + config.input.data_type = data_type; + config.input.endianness = endianness; + config.input.components = channels; + config.input.color_space = colorspace[channels - 1]; + config.max_bpp = max_bpp[channels - 1]; + config.max_dist = 2.2; + all_tests.push_back(config); + } + } + } + for (int smoothing : {1, 5, 50, 100}) { + for (int h_sampling : {1, 2}) { + for (int v_sampling : {1, 2}) { + TestConfig config; + config.input.xsize = 257; + config.input.ysize = 265; + config.jparams.smoothing_factor = smoothing; + config.jparams.h_sampling = {h_sampling, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + config.max_bpp = 1.85; + config.max_dist = 3.05f; + all_tests.push_back(config); + } + } + } + return all_tests; +}; + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + if (c.input_mode == RAW_DATA) { + os << "RawDataIn"; + } else if (c.input_mode == COEFFICIENTS) { + os << "WriteCoeffs"; + } + return os; +} + +std::string TestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(EncodeAPITest, EncodeAPITestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_finish.cc b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.cc new file mode 100644 index 0000000000..955676bdee --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.cc @@ -0,0 +1,230 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode_finish.h" + +#include +#include + +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jpegli/quant.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/encode_finish.cc" +#include +#include + +#include "lib/jpegli/dct-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::GetLane; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +using DI16 = Rebind; + +void ReQuantizeBlock(int16_t* block, const float* qmc, float aq_strength, + const float* zero_bias_offset, + const float* zero_bias_mul) { + D d; + DI di; + DI16 di16; + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto in = Load(di16, block + k); + const auto val = ConvertTo(d, PromoteTo(di, in)); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto iqval = IfThenElseZero(nzero_mask, Round(qval)); + Store(DemoteTo(di16, ConvertTo(di, iqval)), di16, block + k); + } +} + +float BlockError(const int16_t* block, const float* qmc, const float* iqmc, + const float aq_strength, const float* zero_bias_offset, + const float* zero_bias_mul) { + D d; + DI di; + DI16 di16; + auto err = Zero(d); + const auto scale = Set(d, 1.0 / 16); + const auto aq_mul = Set(d, aq_strength); + for (size_t k = 0; k < DCTSIZE2; k += Lanes(d)) { + const auto in = Load(di16, block + k); + const auto val = ConvertTo(d, PromoteTo(di, in)); + const auto q = Load(d, qmc + k); + const auto qval = Mul(val, q); + const auto zb_offset = Load(d, zero_bias_offset + k); + const auto zb_mul = Load(d, zero_bias_mul + k); + const auto threshold = Add(zb_offset, Mul(zb_mul, aq_mul)); + const auto nzero_mask = Ge(Abs(qval), threshold); + const auto iqval = IfThenElseZero(nzero_mask, Round(qval)); + const auto invq = Load(d, iqmc + k); + const auto rval = Mul(iqval, invq); + const auto diff = Mul(Sub(val, rval), scale); + err = Add(err, Mul(diff, diff)); + } + return GetLane(SumOfLanes(d, err)); +} + +void ComputeInverseWeights(const float* qmc, float* iqmc) { + for (int k = 0; k < 64; ++k) { + iqmc[k] = 1.0f / qmc[k]; + } +} + +float ComputePSNR(j_compress_ptr cinfo, int sampling) { + jpeg_comp_master* m = cinfo->master; + InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS); + double error = 0.0; + size_t num = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const float* qmc = m->quant_mul[c]; + const int h_factor = m->h_factor[c]; + const int v_factor = m->v_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + HWY_ALIGN float iqmc[64]; + ComputeInverseWeights(qmc, iqmc); + for (JDIMENSION by = 0; by < comp->height_in_blocks; by += sampling) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + const float* qf = m->quant_field.Row(by * v_factor); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; bx += sampling) { + error += BlockError(&ba[0][bx][0], qmc, iqmc, qf[bx * h_factor], + zero_bias_offset, zero_bias_mul); + num += DCTSIZE2; + } + } + } + return 4.3429448f * log(num / (error / 255. / 255.)); +} + +void ReQuantizeCoeffs(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + InitQuantizer(cinfo, QuantPass::SEARCH_SECOND_PASS); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + const float* qmc = m->quant_mul[c]; + const int h_factor = m->h_factor[c]; + const int v_factor = m->v_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = GetBlockRow(cinfo, c, by); + const float* qf = m->quant_field.Row(by * v_factor); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + ReQuantizeBlock(&ba[0][bx][0], qmc, qf[bx * h_factor], zero_bias_offset, + zero_bias_mul); + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +namespace { +HWY_EXPORT(ComputePSNR); +HWY_EXPORT(ReQuantizeCoeffs); + +void ReQuantizeCoeffs(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ReQuantizeCoeffs)(cinfo); +} + +float ComputePSNR(j_compress_ptr cinfo, int sampling) { + return HWY_DYNAMIC_DISPATCH(ComputePSNR)(cinfo, sampling); +} + +void UpdateDistance(j_compress_ptr cinfo, float distance) { + float distances[NUM_QUANT_TBLS] = {distance, distance, distance}; + SetQuantMatrices(cinfo, distances, /*add_two_chroma_tables=*/true); +} + +float Clamp(float val, float minval, float maxval) { + return std::max(minval, std::min(maxval, val)); +} + +#define PSNR_SEARCH_DBG 0 + +float FindDistanceForPSNR(j_compress_ptr cinfo) { + constexpr int kMaxIters = 20; + const float psnr_target = cinfo->master->psnr_target; + const float tolerance = cinfo->master->psnr_tolerance; + const float min_dist = cinfo->master->min_distance; + const float max_dist = cinfo->master->max_distance; + float d = Clamp(1.0f, min_dist, max_dist); + for (int sampling : {4, 1}) { + float best_diff = std::numeric_limits::max(); + float best_distance = 0.0f; + float best_psnr = 0.0; + float dmin = min_dist; + float dmax = max_dist; + bool found_lower_bound = false; + bool found_upper_bound = false; + for (int i = 0; i < kMaxIters; ++i) { + UpdateDistance(cinfo, d); + float psnr = ComputePSNR(cinfo, sampling); + if (psnr > psnr_target) { + dmin = d; + found_lower_bound = true; + } else { + dmax = d; + found_upper_bound = true; + } +#if (PSNR_SEARCH_DBG > 1) + printf("sampling %d iter %2d d %7.4f psnr %.2f", sampling, i, d, psnr); + if (found_upper_bound && found_lower_bound) { + printf(" d-interval: [ %7.4f .. %7.4f ]", dmin, dmax); + } + printf("\n"); +#endif + float diff = std::abs(psnr - psnr_target); + if (diff < best_diff) { + best_diff = diff; + best_distance = d; + best_psnr = psnr; + } + if (diff < tolerance * psnr_target || dmin == dmax) { + break; + } + if (!found_lower_bound || !found_upper_bound) { + d *= std::exp(0.15f * (psnr - psnr_target)); + } else { + d = 0.5f * (dmin + dmax); + } + d = Clamp(d, min_dist, max_dist); + } + d = best_distance; + if (sampling == 1 && PSNR_SEARCH_DBG) { + printf("Final PSNR %.2f at distance %.4f\n", best_psnr, d); + } + } + return d; +} + +} // namespace + +void QuantizetoPSNR(j_compress_ptr cinfo) { + float distance = FindDistanceForPSNR(cinfo); + UpdateDistance(cinfo, distance); + ReQuantizeCoeffs(cinfo); +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_finish.h b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.h new file mode 100644 index 0000000000..f6862decb9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode_finish.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENCODE_FINISH_H_ +#define LIB_JPEGLI_ENCODE_FINISH_H_ + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void QuantizetoPSNR(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ENCODE_FINISH_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_internal.h b/third-party/libjxl/libjxl/lib/jpegli/encode_internal.h new file mode 100644 index 0000000000..4dbef97538 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode_internal.h @@ -0,0 +1,141 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENCODE_INTERNAL_H_ +#define LIB_JPEGLI_ENCODE_INTERNAL_H_ + +#include + +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/common.h" +#include "lib/jpegli/common_internal.h" +#include "lib/jpegli/encode.h" + +namespace jpegli { + +constexpr unsigned char kICCSignature[12] = { + 0x49, 0x43, 0x43, 0x5F, 0x50, 0x52, 0x4F, 0x46, 0x49, 0x4C, 0x45, 0x00}; +constexpr int kICCMarker = JPEG_APP0 + 2; + +constexpr int kDefaultProgressiveLevel = 0; + +typedef int16_t coeff_t; + +struct HuffmanCodeTable { + int depth[256]; + int code[256]; +}; + +struct Token { + uint8_t context; + uint8_t symbol; + uint16_t bits; + Token(int c, int s, int b) : context(c), symbol(s), bits(b) {} +}; + +struct TokenArray { + Token* tokens; + size_t num_tokens; +}; + +struct RefToken { + uint8_t symbol; + uint8_t refbits; +}; + +struct ScanTokenInfo { + RefToken* tokens; + size_t num_tokens; + uint8_t* refbits; + uint16_t* eobruns; + size_t* restarts; + size_t num_restarts; + size_t num_nonzeros; + size_t num_future_nonzeros; + size_t token_offset; + size_t restart_interval; + size_t MCUs_per_row; + size_t MCU_rows_in_scan; + size_t blocks_in_MCU; + size_t num_blocks; +}; + +} // namespace jpegli + +struct jpeg_comp_master { + jpegli::RowBuffer input_buffer[jpegli::kMaxComponents]; + jpegli::RowBuffer* smooth_input[jpegli::kMaxComponents]; + jpegli::RowBuffer* raw_data[jpegli::kMaxComponents]; + bool force_baseline; + bool xyb_mode; + uint8_t cicp_transfer_function; + bool use_std_tables; + bool use_adaptive_quantization; + int progressive_level; + size_t xsize_blocks; + size_t ysize_blocks; + size_t blocks_per_iMCU_row; + jpegli::ScanTokenInfo* scan_token_info; + JpegliDataType data_type; + JpegliEndianness endianness; + void (*input_method)(const uint8_t* row_in, size_t len, + float* row_out[jpegli::kMaxComponents]); + void (*color_transform)(float* row[jpegli::kMaxComponents], size_t len); + void (*downsample_method[jpegli::kMaxComponents])( + float* rows_in[MAX_SAMP_FACTOR], size_t len, float* row_out); + float* quant_mul[jpegli::kMaxComponents]; + float* zero_bias_offset[jpegli::kMaxComponents]; + float* zero_bias_mul[jpegli::kMaxComponents]; + int h_factor[jpegli::kMaxComponents]; + int v_factor[jpegli::kMaxComponents]; + // Array of Huffman tables that will be encoded in one or more DHT segments. + // In progressive mode we compute all Huffman tables that will be used in any + // of the scans, thus we can have more than 4 tables here. + JHUFF_TBL* huffman_tables; + size_t num_huffman_tables; + // Array of num_huffman_tables slot ids, where the ith element is the slot id + // of the ith Huffman table, as it appears in the DHT segment. The range of + // the slot ids is 0..3 for DC and 16..19 for AC Huffman codes. + uint8_t* slot_id_map; + // Maps context ids to an index in the huffman_tables array. Each component in + // each scan has a DC and AC context id, which are defined as follows: + // - DC context id is the component index (relative to cinfo->comp_info) of + // the scan component + // - AC context ids start at 4 and are increased for each component of each + // scan that have AC components (i.e. Se > 0) + uint8_t* context_map; + size_t num_contexts; + // Array of cinfo->num_scans context ids, where the ith element is the context + // id of the first AC component of the ith scan. + uint8_t* ac_ctx_offset; + // Array of num_huffman tables derived coding tables. + jpegli::HuffmanCodeTable* coding_tables; + float* diff_buffer; + jpegli::RowBuffer fuzzy_erosion_tmp; + jpegli::RowBuffer pre_erosion; + jpegli::RowBuffer quant_field; + jvirt_barray_ptr* coeff_buffers; + size_t next_input_row; + size_t next_iMCU_row; + size_t next_dht_index; + size_t last_restart_interval; + JCOEF last_dc_coeff[MAX_COMPS_IN_SCAN]; + jpegli::JpegBitWriter bw; + float* dct_buffer; + int32_t* block_tmp; + jpegli::TokenArray* token_arrays; + size_t cur_token_array; + jpegli::Token* next_token; + size_t num_tokens; + size_t total_num_tokens; + jpegli::RefToken* next_refinement_token; + uint8_t* next_refinement_bit; + float psnr_target; + float psnr_tolerance; + float min_distance; + float max_distance; +}; + +#endif // LIB_JPEGLI_ENCODE_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.cc b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.cc new file mode 100644 index 0000000000..89dbd813f8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.cc @@ -0,0 +1,259 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode_streaming.h" + +#include + +#include "lib/jpegli/bit_writer.h" +#include "lib/jpegli/bitstream.h" +#include "lib/jpegli/entropy_coding.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jxl/base/bits.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/encode_streaming.cc" +#include +#include + +#include "lib/jpegli/dct-inl.h" +#include "lib/jpegli/entropy_coding-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +static const int kStreamingModeCoefficients = 0; +static const int kStreamingModeTokens = 1; +static const int kStreamingModeBits = 2; + +namespace { +void ZigZagShuffle(int32_t* JXL_RESTRICT block) { + // TODO(szabadka) SIMDify this. + int32_t tmp[DCTSIZE2]; + tmp[0] = block[0]; + tmp[1] = block[1]; + tmp[2] = block[8]; + tmp[3] = block[16]; + tmp[4] = block[9]; + tmp[5] = block[2]; + tmp[6] = block[3]; + tmp[7] = block[10]; + tmp[8] = block[17]; + tmp[9] = block[24]; + tmp[10] = block[32]; + tmp[11] = block[25]; + tmp[12] = block[18]; + tmp[13] = block[11]; + tmp[14] = block[4]; + tmp[15] = block[5]; + tmp[16] = block[12]; + tmp[17] = block[19]; + tmp[18] = block[26]; + tmp[19] = block[33]; + tmp[20] = block[40]; + tmp[21] = block[48]; + tmp[22] = block[41]; + tmp[23] = block[34]; + tmp[24] = block[27]; + tmp[25] = block[20]; + tmp[26] = block[13]; + tmp[27] = block[6]; + tmp[28] = block[7]; + tmp[29] = block[14]; + tmp[30] = block[21]; + tmp[31] = block[28]; + tmp[32] = block[35]; + tmp[33] = block[42]; + tmp[34] = block[49]; + tmp[35] = block[56]; + tmp[36] = block[57]; + tmp[37] = block[50]; + tmp[38] = block[43]; + tmp[39] = block[36]; + tmp[40] = block[29]; + tmp[41] = block[22]; + tmp[42] = block[15]; + tmp[43] = block[23]; + tmp[44] = block[30]; + tmp[45] = block[37]; + tmp[46] = block[44]; + tmp[47] = block[51]; + tmp[48] = block[58]; + tmp[49] = block[59]; + tmp[50] = block[52]; + tmp[51] = block[45]; + tmp[52] = block[38]; + tmp[53] = block[31]; + tmp[54] = block[39]; + tmp[55] = block[46]; + tmp[56] = block[53]; + tmp[57] = block[60]; + tmp[58] = block[61]; + tmp[59] = block[54]; + tmp[60] = block[47]; + tmp[61] = block[55]; + tmp[62] = block[62]; + tmp[63] = block[63]; + memcpy(block, tmp, DCTSIZE2 * sizeof(tmp[0])); +} +} // namespace + +template +void ProcessiMCURow(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + JpegBitWriter* bw = &m->bw; + int xsize_mcus = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor); + int ysize_mcus = DivCeil(cinfo->image_height, 8 * cinfo->max_v_samp_factor); + int mcu_y = m->next_iMCU_row; + int32_t* block = m->block_tmp; + int32_t* symbols = m->block_tmp + DCTSIZE2; + int32_t* nonzero_idx = m->block_tmp + 3 * DCTSIZE2; + coeff_t* JXL_RESTRICT last_dc_coeff = m->last_dc_coeff; + bool adaptive_quant = m->use_adaptive_quantization && m->psnr_target == 0; + JBLOCKARRAY ba[kMaxComponents]; + if (kMode == kStreamingModeCoefficients) { + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + int by0 = mcu_y * comp->v_samp_factor; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); + ba[c] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coeff_buffers[c], by0, + max_block_rows, true); + } + } + if (kMode == kStreamingModeTokens) { + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo); + if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = + EstimateNumTokens(cinfo, mcu_y, ysize_mcus, m->total_num_tokens, + max_tokens_per_mcu_row); + ta->tokens = Allocate(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + } + const float* imcu_start[kMaxComponents]; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + imcu_start[c] = m->raw_data[c]->Row(mcu_y * comp->v_samp_factor * DCTSIZE); + } + const float* qf = nullptr; + if (adaptive_quant) { + qf = m->quant_field.Row(0); + } + HuffmanCodeTable* dc_code = nullptr; + HuffmanCodeTable* ac_code = nullptr; + const size_t qf_stride = m->quant_field.stride(); + for (int mcu_x = 0; mcu_x < xsize_mcus; ++mcu_x) { + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + if (kMode == kStreamingModeBits) { + dc_code = &m->coding_tables[m->context_map[c]]; + ac_code = &m->coding_tables[m->context_map[c + 4]]; + } + float* JXL_RESTRICT qmc = m->quant_mul[c]; + const size_t stride = m->raw_data[c]->stride(); + const int h_factor = m->h_factor[c]; + const float* zero_bias_offset = m->zero_bias_offset[c]; + const float* zero_bias_mul = m->zero_bias_mul[c]; + float aq_strength = 0.0f; + for (int iy = 0; iy < comp->v_samp_factor; ++iy) { + for (int ix = 0; ix < comp->h_samp_factor; ++ix) { + size_t by = mcu_y * comp->v_samp_factor + iy; + size_t bx = mcu_x * comp->h_samp_factor + ix; + if (bx >= comp->width_in_blocks || by >= comp->height_in_blocks) { + if (kMode == kStreamingModeTokens) { + *m->next_token++ = Token(c, 0, 0); + *m->next_token++ = Token(c + 4, 0, 0); + } else if (kMode == kStreamingModeBits) { + WriteBits(bw, dc_code->depth[0], dc_code->code[0]); + WriteBits(bw, ac_code->depth[0], ac_code->code[0]); + } + continue; + } + if (adaptive_quant) { + aq_strength = qf[iy * qf_stride + bx * h_factor]; + } + const float* pixels = imcu_start[c] + (iy * stride + bx) * DCTSIZE; + ComputeCoefficientBlock(pixels, stride, qmc, last_dc_coeff[c], + aq_strength, zero_bias_offset, zero_bias_mul, + m->dct_buffer, block); + if (kMode == kStreamingModeCoefficients) { + JCOEF* cblock = &ba[c][iy][bx][0]; + for (int k = 0; k < DCTSIZE2; ++k) { + cblock[k] = block[kJPEGNaturalOrder[k]]; + } + } + block[0] -= last_dc_coeff[c]; + last_dc_coeff[c] += block[0]; + if (kMode == kStreamingModeTokens) { + ComputeTokensForBlock(block, 0, c, c + 4, + &m->next_token); + } else if (kMode == kStreamingModeBits) { + ZigZagShuffle(block); + const int num_nonzeros = CompactBlock(block, nonzero_idx); + const bool emit_eob = nonzero_idx[num_nonzeros - 1] < 1008; + ComputeSymbols(num_nonzeros, nonzero_idx, block, symbols); + WriteBlock(symbols, block, num_nonzeros, emit_eob, dc_code, ac_code, + bw); + } + } + } + } + } + if (kMode == kStreamingModeTokens) { + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + ta->num_tokens = m->next_token - ta->tokens; + ScanTokenInfo* sti = &m->scan_token_info[0]; + sti->num_tokens = m->total_num_tokens + ta->num_tokens; + sti->restarts[0] = sti->num_tokens; + } +} + +void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) { + ProcessiMCURow(cinfo); +} + +void ComputeTokensForiMCURow(j_compress_ptr cinfo) { + ProcessiMCURow(cinfo); +} + +void WriteiMCURow(j_compress_ptr cinfo) { + ProcessiMCURow(cinfo); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +HWY_EXPORT(ComputeCoefficientsForiMCURow); +HWY_EXPORT(ComputeTokensForiMCURow); +HWY_EXPORT(WriteiMCURow); + +void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ComputeCoefficientsForiMCURow)(cinfo); +} + +void ComputeTokensForiMCURow(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(ComputeTokensForiMCURow)(cinfo); +} + +void WriteiMCURow(j_compress_ptr cinfo) { + HWY_DYNAMIC_DISPATCH(WriteiMCURow)(cinfo); +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.h b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.h new file mode 100644 index 0000000000..69acff4eaf --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/encode_streaming.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENCODE_STREAMING_H_ +#define LIB_JPEGLI_ENCODE_STREAMING_H_ + +#include "lib/jpegli/encode_internal.h" + +namespace jpegli { + +void ComputeCoefficientsForiMCURow(j_compress_ptr cinfo); + +void ComputeTokensForiMCURow(j_compress_ptr cinfo); + +void WriteiMCURow(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ENCODE_STREAMING_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/entropy_coding-inl.h b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding-inl.h new file mode 100644 index 0000000000..bfb436d795 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding-inl.h @@ -0,0 +1,213 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JPEGLI_ENTROPY_CODING_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JPEGLI_ENTROPY_CODING_INL_H_ +#undef LIB_JPEGLI_ENTROPY_CODING_INL_H_ +#else +#define LIB_JPEGLI_ENTROPY_CODING_INL_H_ +#endif + +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::AndNot; +using hwy::HWY_NAMESPACE::Compress; +using hwy::HWY_NAMESPACE::CountTrue; +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::MaskFromVec; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Not; +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Shl; +using hwy::HWY_NAMESPACE::Sub; + +using DI = HWY_FULL(int32_t); +constexpr DI di; + +template +JXL_INLINE V NumBits(DI di, const V x) { + // TODO(szabadka) Add faster implementations for some specific architectures. + const auto b1 = And(x, Set(di, 1)); + const auto b2 = And(x, Set(di, 2)); + const auto b3 = Sub((And(x, Set(di, 4))), Set(di, 1)); + const auto b4 = Sub((And(x, Set(di, 8))), Set(di, 4)); + const auto b5 = Sub((And(x, Set(di, 16))), Set(di, 11)); + const auto b6 = Sub((And(x, Set(di, 32))), Set(di, 26)); + const auto b7 = Sub((And(x, Set(di, 64))), Set(di, 57)); + const auto b8 = Sub((And(x, Set(di, 128))), Set(di, 120)); + const auto b9 = Sub((And(x, Set(di, 256))), Set(di, 247)); + const auto b10 = Sub((And(x, Set(di, 512))), Set(di, 502)); + const auto b11 = Sub((And(x, Set(di, 1024))), Set(di, 1013)); + const auto b12 = Sub((And(x, Set(di, 2048))), Set(di, 2036)); + return Max(Max(Max(Max(b1, b2), Max(b3, b4)), Max(Max(b5, b6), Max(b7, b8))), + Max(Max(b9, b10), Max(b11, b12))); +} + +// Coefficient indexes pre-multiplied by 16 for the symbol calculation. +HWY_ALIGN constexpr int32_t kIndexes[64] = { + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, + 208, 224, 240, 256, 272, 288, 304, 320, 336, 352, 368, 384, 400, + 416, 432, 448, 464, 480, 496, 512, 528, 544, 560, 576, 592, 608, + 624, 640, 656, 672, 688, 704, 720, 736, 752, 768, 784, 800, 816, + 832, 848, 864, 880, 896, 912, 928, 944, 960, 976, 992, 1008, +}; + +JXL_INLINE int CompactBlock(int32_t* JXL_RESTRICT block, + int32_t* JXL_RESTRICT nonzero_idx) { + const auto zero = Zero(di); + HWY_ALIGN constexpr int32_t dc_mask_lanes[HWY_LANES(DI)] = {-1}; + const auto dc_mask = MaskFromVec(Load(di, dc_mask_lanes)); + int num_nonzeros = 0; + int k = 0; + { + const auto coef = Load(di, block); + const auto idx = Load(di, kIndexes); + const auto nonzero_mask = Or(dc_mask, Not(Eq(coef, zero))); + const auto nzero_coef = Compress(coef, nonzero_mask); + const auto nzero_idx = Compress(idx, nonzero_mask); + StoreU(nzero_coef, di, &block[num_nonzeros]); + StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]); + num_nonzeros += CountTrue(di, nonzero_mask); + k += Lanes(di); + } + for (; k < DCTSIZE2; k += Lanes(di)) { + const auto coef = Load(di, &block[k]); + const auto idx = Load(di, &kIndexes[k]); + const auto nonzero_mask = Not(Eq(coef, zero)); + const auto nzero_coef = Compress(coef, nonzero_mask); + const auto nzero_idx = Compress(idx, nonzero_mask); + StoreU(nzero_coef, di, &block[num_nonzeros]); + StoreU(nzero_idx, di, &nonzero_idx[num_nonzeros]); + num_nonzeros += CountTrue(di, nonzero_mask); + } + return num_nonzeros; +} + +JXL_INLINE void ComputeSymbols(const int num_nonzeros, + int32_t* JXL_RESTRICT nonzero_idx, + int32_t* JXL_RESTRICT block, + int32_t* JXL_RESTRICT symbols) { + nonzero_idx[-1] = -16; + const auto one = Set(di, 1); + const auto offset = Set(di, 16); + for (int i = 0; i < num_nonzeros; i += Lanes(di)) { + const auto idx = Load(di, &nonzero_idx[i]); + const auto prev_idx = LoadU(di, &nonzero_idx[i - 1]); + const auto coeff = Load(di, &block[i]); + const auto nbits = NumBits(di, Abs(coeff)); + const auto mask = ShiftRight<8 * sizeof(int32_t) - 1>(coeff); + const auto bits = And(Add(coeff, mask), Sub(Shl(one, nbits), one)); + const auto symbol = Sub(Add(nbits, idx), Add(prev_idx, offset)); + Store(symbol, di, symbols + i); + Store(bits, di, block + i); + } +} + +template +int NumNonZero8x8ExceptDC(const T* block) { + const HWY_CAPPED(T, 8) di; + + const auto zero = Zero(di); + // Add FFFF for every zero coefficient, negate to get #zeros. + auto neg_sum_zero = zero; + { + // First row has DC, so mask + const size_t y = 0; + HWY_ALIGN const T dc_mask_lanes[8] = {-1}; + + for (size_t x = 0; x < 8; x += Lanes(di)) { + const auto dc_mask = Load(di, dc_mask_lanes + x); + + // DC counts as zero so we don't include it in nzeros. + const auto coef = AndNot(dc_mask, Load(di, &block[y * 8 + x])); + + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + // Remaining rows: no mask + for (size_t y = 1; y < 8; y++) { + for (size_t x = 0; x < 8; x += Lanes(di)) { + const auto coef = Load(di, &block[y * 8 + x]); + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + + // We want 64 - sum_zero, add because neg_sum_zero is already negated. + return kDCTBlockSize + GetLane(SumOfLanes(di, neg_sum_zero)); +} + +template +void ComputeTokensForBlock(const T* block, int last_dc, int dc_ctx, int ac_ctx, + Token** tokens_ptr) { + Token* next_token = *tokens_ptr; + coeff_t temp2; + coeff_t temp; + temp = block[0] - last_dc; + if (temp == 0) { + *next_token++ = Token(dc_ctx, 0, 0); + } else { + temp2 = temp; + if (temp < 0) { + temp = -temp; + temp2--; + } + int dc_nbits = jxl::FloorLog2Nonzero(temp) + 1; + int dc_mask = (1 << dc_nbits) - 1; + *next_token++ = Token(dc_ctx, dc_nbits, temp2 & dc_mask); + } + int num_nonzeros = NumNonZero8x8ExceptDC(block); + for (int k = 1; k < 64; ++k) { + if (num_nonzeros == 0) { + *next_token++ = Token(ac_ctx, 0, 0); + break; + } + int r = 0; + if (zig_zag_order) { + while ((temp = block[k]) == 0) { + r++; + k++; + } + } else { + while ((temp = block[kJPEGNaturalOrder[k]]) == 0) { + r++; + k++; + } + } + --num_nonzeros; + if (temp < 0) { + temp = -temp; + temp2 = ~temp; + } else { + temp2 = temp; + } + while (r > 15) { + *next_token++ = Token(ac_ctx, 0xf0, 0); + r -= 16; + } + int ac_nbits = jxl::FloorLog2Nonzero(temp) + 1; + int ac_mask = (1 << ac_nbits) - 1; + int symbol = (r << 4u) + ac_nbits; + *next_token++ = Token(ac_ctx, symbol, temp2 & ac_mask); + } + *tokens_ptr = next_token; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); +#endif // LIB_JPEGLI_ENTROPY_CODING_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.cc b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.cc new file mode 100644 index 0000000000..149768fd30 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.cc @@ -0,0 +1,837 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/entropy_coding.h" + +#include + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/huffman.h" +#include "lib/jxl/base/bits.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/entropy_coding.cc" +#include +#include + +#include "lib/jpegli/entropy_coding-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +void ComputeTokensSequential(const coeff_t* block, int last_dc, int dc_ctx, + int ac_ctx, Token** tokens_ptr) { + ComputeTokensForBlock(block, last_dc, dc_ctx, ac_ctx, + tokens_ptr); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo) { + int MCUs_per_row = DivCeil(cinfo->image_width, 8 * cinfo->max_h_samp_factor); + size_t blocks_per_mcu = 0; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + blocks_per_mcu += comp->h_samp_factor * comp->v_samp_factor; + } + return kDCTBlockSize * blocks_per_mcu * MCUs_per_row; +} + +size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus, + size_t num_tokens, size_t max_per_row) { + size_t estimate; + if (mcu_y == 0) { + estimate = 16 * max_per_row; + } else { + estimate = (4 * ysize_mcus * num_tokens) / (3 * mcu_y); + } + size_t mcus_left = ysize_mcus - mcu_y; + return std::min(mcus_left * max_per_row, + std::max(max_per_row, estimate - num_tokens)); +} + +namespace { +HWY_EXPORT(ComputeTokensSequential); + +void TokenizeProgressiveDC(const coeff_t* coeffs, int context, int Al, + coeff_t* last_dc_coeff, Token** next_token) { + coeff_t temp2; + coeff_t temp; + temp2 = coeffs[0] >> Al; + temp = temp2 - *last_dc_coeff; + *last_dc_coeff = temp2; + temp2 = temp; + if (temp < 0) { + temp = -temp; + temp2--; + } + int nbits = (temp == 0) ? 0 : (jxl::FloorLog2Nonzero(temp) + 1); + int bits = temp2 & ((1 << nbits) - 1); + *(*next_token)++ = Token(context, nbits, bits); +} + +void TokenizeACProgressiveScan(j_compress_ptr cinfo, int scan_index, + int context, ScanTokenInfo* sti) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const int comp_idx = scan_info->component_index[0]; + const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + const int Al = scan_info->Al; + const int Ss = scan_info->Ss; + const int Se = scan_info->Se; + const size_t restart_interval = sti->restart_interval; + int restarts_to_go = restart_interval; + size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks; + size_t num_restarts = + restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1; + size_t restart_idx = 0; + int eob_run = 0; + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + sti->token_offset = m->total_num_tokens + ta->num_tokens; + sti->restarts = Allocate(cinfo, num_restarts, JPOOL_IMAGE); + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coeff_buffers[comp_idx], by, + 1, false); + // Each coefficient can appear in at most one token, but we have to reserve + // one extra EOBrun token that was rolled over from the previous block-row + // and has to be flushed at the end. + int max_tokens_per_row = 1 + comp->width_in_blocks * (Se - Ss + 1); + if (ta->num_tokens + max_tokens_per_row > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = + EstimateNumTokens(cinfo, by, comp->height_in_blocks, + m->total_num_tokens, max_tokens_per_row); + ta->tokens = Allocate(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + if (restart_interval > 0 && restarts_to_go == 0) { + if (eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = + Token(context, symbol, eob_run & ((1 << nbits) - 1)); + eob_run = 0; + } + ta->num_tokens = m->next_token - ta->tokens; + sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens; + restarts_to_go = restart_interval; + } + const coeff_t* block = &ba[0][bx][0]; + coeff_t temp2; + coeff_t temp; + int r = 0; + int num_nzeros = 0; + int num_future_nzeros = 0; + for (int k = Ss; k <= Se; ++k) { + if ((temp = block[k]) == 0) { + r++; + continue; + } + if (temp < 0) { + temp = -temp; + temp >>= Al; + temp2 = ~temp; + } else { + temp >>= Al; + temp2 = temp; + } + if (temp == 0) { + r++; + num_future_nzeros++; + continue; + } + if (eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = + Token(context, symbol, eob_run & ((1 << nbits) - 1)); + eob_run = 0; + } + while (r > 15) { + *m->next_token++ = Token(context, 0xf0, 0); + r -= 16; + } + int nbits = jxl::FloorLog2Nonzero(temp) + 1; + int symbol = (r << 4u) + nbits; + *m->next_token++ = Token(context, symbol, temp2 & ((1 << nbits) - 1)); + ++num_nzeros; + r = 0; + } + if (r > 0) { + ++eob_run; + if (eob_run == 0x7FFF) { + int nbits = jxl::FloorLog2Nonzero(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = + Token(context, symbol, eob_run & ((1 << nbits) - 1)); + eob_run = 0; + } + } + sti->num_nonzeros += num_nzeros; + sti->num_future_nonzeros += num_future_nzeros; + --restarts_to_go; + } + ta->num_tokens = m->next_token - ta->tokens; + } + if (eob_run > 0) { + int nbits = jxl::FloorLog2Nonzero(eob_run); + int symbol = nbits << 4u; + *m->next_token++ = Token(context, symbol, eob_run & ((1 << nbits) - 1)); + ++ta->num_tokens; + eob_run = 0; + } + sti->num_tokens = m->total_num_tokens + ta->num_tokens - sti->token_offset; + sti->restarts[restart_idx++] = m->total_num_tokens + ta->num_tokens; +} + +void TokenizeACRefinementScan(j_compress_ptr cinfo, int scan_index, + ScanTokenInfo* sti) { + jpeg_comp_master* m = cinfo->master; + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + const int comp_idx = scan_info->component_index[0]; + const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + const int Al = scan_info->Al; + const int Ss = scan_info->Ss; + const int Se = scan_info->Se; + const size_t restart_interval = sti->restart_interval; + int restarts_to_go = restart_interval; + RefToken token; + int eob_run = 0; + int eob_refbits = 0; + size_t num_blocks = comp->height_in_blocks * comp->width_in_blocks; + size_t num_restarts = + restart_interval > 0 ? DivCeil(num_blocks, restart_interval) : 1; + sti->tokens = m->next_refinement_token; + sti->refbits = m->next_refinement_bit; + sti->eobruns = Allocate(cinfo, num_blocks / 2, JPOOL_IMAGE); + sti->restarts = Allocate(cinfo, num_restarts, JPOOL_IMAGE); + RefToken* next_token = sti->tokens; + RefToken* next_eob_token = next_token; + uint8_t* next_ref_bit = sti->refbits; + uint16_t* next_eobrun = sti->eobruns; + size_t restart_idx = 0; + for (JDIMENSION by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coeff_buffers[comp_idx], by, + 1, false); + for (JDIMENSION bx = 0; bx < comp->width_in_blocks; ++bx) { + if (restart_interval > 0 && restarts_to_go == 0) { + sti->restarts[restart_idx++] = next_token - sti->tokens; + restarts_to_go = restart_interval; + next_eob_token = next_token; + eob_run = eob_refbits = 0; + } + const coeff_t* block = &ba[0][bx][0]; + int num_eob_refinement_bits = 0; + int num_refinement_bits = 0; + int num_nzeros = 0; + int r = 0; + for (int k = Ss; k <= Se; ++k) { + int absval = block[k]; + if (absval == 0) { + r++; + continue; + } + const int mask = absval >> (8 * sizeof(int) - 1); + absval += mask; + absval ^= mask; + absval >>= Al; + if (absval == 0) { + r++; + continue; + } + while (r > 15) { + token.symbol = 0xf0; + token.refbits = num_refinement_bits; + *next_token++ = token; + r -= 16; + num_eob_refinement_bits += num_refinement_bits; + num_refinement_bits = 0; + } + if (absval > 1) { + *next_ref_bit++ = absval & 1u; + ++num_refinement_bits; + continue; + } + int symbol = (r << 4u) + 1 + ((mask + 1) << 1); + token.symbol = symbol; + token.refbits = num_refinement_bits; + *next_token++ = token; + ++num_nzeros; + num_refinement_bits = 0; + num_eob_refinement_bits = 0; + r = 0; + next_eob_token = next_token; + eob_run = eob_refbits = 0; + } + if (r > 0 || num_eob_refinement_bits + num_refinement_bits > 0) { + ++eob_run; + eob_refbits += num_eob_refinement_bits + num_refinement_bits; + if (eob_refbits > 255) { + ++next_eob_token; + eob_refbits = num_eob_refinement_bits + num_refinement_bits; + eob_run = 1; + } + next_token = next_eob_token; + next_token->refbits = eob_refbits; + if (eob_run == 1) { + next_token->symbol = 0; + } else if (eob_run == 2) { + next_token->symbol = 16; + *next_eobrun++ = 0; + } else if ((eob_run & (eob_run - 1)) == 0) { + next_token->symbol += 16; + next_eobrun[-1] = 0; + } else { + ++next_eobrun[-1]; + } + ++next_token; + if (eob_run == 0x7fff) { + next_eob_token = next_token; + eob_run = eob_refbits = 0; + } + } + sti->num_nonzeros += num_nzeros; + --restarts_to_go; + } + } + sti->num_tokens = next_token - sti->tokens; + sti->restarts[restart_idx++] = sti->num_tokens; + m->next_refinement_token = next_token; + m->next_refinement_bit = next_ref_bit; +} + +void TokenizeScan(j_compress_ptr cinfo, size_t scan_index, int ac_ctx_offset, + ScanTokenInfo* sti) { + const jpeg_scan_info* scan_info = &cinfo->scan_info[scan_index]; + if (scan_info->Ss > 0) { + if (scan_info->Ah == 0) { + TokenizeACProgressiveScan(cinfo, scan_index, ac_ctx_offset, sti); + } else { + TokenizeACRefinementScan(cinfo, scan_index, sti); + } + return; + } + + jpeg_comp_master* m = cinfo->master; + size_t restart_interval = sti->restart_interval; + int restarts_to_go = restart_interval; + coeff_t last_dc_coeff[MAX_COMPS_IN_SCAN] = {0}; + + // "Non-interleaved" means color data comes in separate scans, in other words + // each scan can contain only one color component. + const bool is_interleaved = (scan_info->comps_in_scan > 1); + const bool is_progressive = cinfo->progressive_mode; + const int Ah = scan_info->Ah; + const int Al = scan_info->Al; + HWY_ALIGN constexpr coeff_t kDummyBlock[DCTSIZE2] = {0}; + + size_t restart_idx = 0; + TokenArray* ta = &m->token_arrays[m->cur_token_array]; + sti->token_offset = Ah > 0 ? 0 : m->total_num_tokens + ta->num_tokens; + + if (Ah > 0) { + sti->refbits = Allocate(cinfo, sti->num_blocks, JPOOL_IMAGE); + } else if (cinfo->progressive_mode) { + if (ta->num_tokens + sti->num_blocks > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = sti->num_blocks; + ta->tokens = Allocate(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + } + + JBLOCKARRAY ba[MAX_COMPS_IN_SCAN]; + size_t block_idx = 0; + for (size_t mcu_y = 0; mcu_y < sti->MCU_rows_in_scan; ++mcu_y) { + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; + int by0 = mcu_y * n_blocks_y; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(n_blocks_y, block_rows_left); + ba[i] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coeff_buffers[comp_idx], + by0, max_block_rows, false); + } + if (!cinfo->progressive_mode) { + int max_tokens_per_mcu_row = MaxNumTokensPerMCURow(cinfo); + if (ta->num_tokens + max_tokens_per_mcu_row > m->num_tokens) { + if (ta->tokens) { + m->total_num_tokens += ta->num_tokens; + ++m->cur_token_array; + ta = &m->token_arrays[m->cur_token_array]; + } + m->num_tokens = + EstimateNumTokens(cinfo, mcu_y, sti->MCU_rows_in_scan, + m->total_num_tokens, max_tokens_per_mcu_row); + ta->tokens = Allocate(cinfo, m->num_tokens, JPOOL_IMAGE); + m->next_token = ta->tokens; + } + } + for (size_t mcu_x = 0; mcu_x < sti->MCUs_per_row; ++mcu_x) { + // Possibly emit a restart marker. + if (restart_interval > 0 && restarts_to_go == 0) { + restarts_to_go = restart_interval; + memset(last_dc_coeff, 0, sizeof(last_dc_coeff)); + ta->num_tokens = m->next_token - ta->tokens; + sti->restarts[restart_idx++] = + Ah > 0 ? block_idx : m->total_num_tokens + ta->num_tokens; + } + // Encode one MCU + for (int i = 0; i < scan_info->comps_in_scan; ++i) { + int comp_idx = scan_info->component_index[i]; + jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + int n_blocks_y = is_interleaved ? comp->v_samp_factor : 1; + int n_blocks_x = is_interleaved ? comp->h_samp_factor : 1; + for (int iy = 0; iy < n_blocks_y; ++iy) { + for (int ix = 0; ix < n_blocks_x; ++ix) { + size_t block_y = mcu_y * n_blocks_y + iy; + size_t block_x = mcu_x * n_blocks_x + ix; + const coeff_t* block; + if (block_x >= comp->width_in_blocks || + block_y >= comp->height_in_blocks) { + block = kDummyBlock; + } else { + block = &ba[i][iy][block_x][0]; + } + if (!is_progressive) { + HWY_DYNAMIC_DISPATCH(ComputeTokensSequential) + (block, last_dc_coeff[i], comp_idx, ac_ctx_offset + i, + &m->next_token); + last_dc_coeff[i] = block[0]; + } else { + if (Ah == 0) { + TokenizeProgressiveDC(block, comp_idx, Al, last_dc_coeff + i, + &m->next_token); + } else { + sti->refbits[block_idx] = (block[0] >> Al) & 1; + } + } + ++block_idx; + } + } + } + --restarts_to_go; + } + ta->num_tokens = m->next_token - ta->tokens; + } + JXL_DASSERT(block_idx == sti->num_blocks); + sti->num_tokens = + Ah > 0 ? sti->num_blocks + : m->total_num_tokens + ta->num_tokens - sti->token_offset; + sti->restarts[restart_idx++] = + Ah > 0 ? sti->num_blocks : m->total_num_tokens + ta->num_tokens; + if (Ah == 0 && cinfo->progressive_mode) { + JXL_DASSERT(sti->num_blocks == sti->num_tokens); + } +} + +} // namespace + +void TokenizeJpeg(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + std::vector processed(cinfo->num_scans); + size_t max_refinement_tokens = 0; + size_t num_refinement_bits = 0; + int num_refinement_scans[DCTSIZE2] = {}; + int max_num_refinement_scans = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + ScanTokenInfo* sti = &m->scan_token_info[i]; + if (si->Ss > 0 && si->Ah == 0 && si->Al > 0) { + int offset = m->ac_ctx_offset[i]; + TokenizeScan(cinfo, i, offset, sti); + processed[i] = 1; + max_refinement_tokens += sti->num_future_nonzeros; + for (int k = si->Ss; k <= si->Se; ++k) { + num_refinement_scans[k] = si->Al; + } + max_num_refinement_scans = std::max(max_num_refinement_scans, si->Al); + num_refinement_bits += sti->num_nonzeros; + } + if (si->Ss > 0 && si->Ah > 0) { + int comp_idx = si->component_index[0]; + const jpeg_component_info* comp = &cinfo->comp_info[comp_idx]; + size_t num_blocks = comp->width_in_blocks * comp->height_in_blocks; + max_refinement_tokens += (1 + (si->Se - si->Ss) / 16) * num_blocks; + } + } + if (max_refinement_tokens > 0) { + m->next_refinement_token = + Allocate(cinfo, max_refinement_tokens, JPOOL_IMAGE); + } + for (int j = 0; j < max_num_refinement_scans; ++j) { + uint8_t* refinement_bits = + Allocate(cinfo, num_refinement_bits, JPOOL_IMAGE); + m->next_refinement_bit = refinement_bits; + size_t new_refinement_bits = 0; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + ScanTokenInfo* sti = &m->scan_token_info[i]; + if (si->Ss > 0 && si->Ah > 0 && + si->Ah == num_refinement_scans[si->Ss] - j) { + int offset = m->ac_ctx_offset[i]; + TokenizeScan(cinfo, i, offset, sti); + processed[i] = 1; + new_refinement_bits += sti->num_nonzeros; + } + } + JXL_DASSERT(m->next_refinement_bit == + refinement_bits + num_refinement_bits); + num_refinement_bits += new_refinement_bits; + } + for (int i = 0; i < cinfo->num_scans; ++i) { + if (processed[i]) { + continue; + } + int offset = m->ac_ctx_offset[i]; + TokenizeScan(cinfo, i, offset, &m->scan_token_info[i]); + processed[i] = 1; + } +} + +namespace { + +struct Histogram { + int count[kJpegHuffmanAlphabetSize]; + Histogram() { memset(count, 0, sizeof(count)); } +}; + +void BuildHistograms(j_compress_ptr cinfo, Histogram* histograms) { + jpeg_comp_master* m = cinfo->master; + size_t num_token_arrays = m->cur_token_array + 1; + for (size_t i = 0; i < num_token_arrays; ++i) { + Token* tokens = m->token_arrays[i].tokens; + size_t num_tokens = m->token_arrays[i].num_tokens; + for (size_t j = 0; j < num_tokens; ++j) { + Token t = tokens[j]; + ++histograms[t.context].count[t.symbol]; + } + } + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info& si = cinfo->scan_info[i]; + const ScanTokenInfo& sti = m->scan_token_info[i]; + if (si.Ss > 0 && si.Ah > 0) { + int context = m->ac_ctx_offset[i]; + int* ac_histo = &histograms[context].count[0]; + for (size_t j = 0; j < sti.num_tokens; ++j) { + ++ac_histo[sti.tokens[j].symbol & 253]; + } + } + } +} + +struct JpegClusteredHistograms { + std::vector histograms; + std::vector histogram_indexes; + std::vector slot_ids; +}; + +float HistogramCost(const Histogram& histo) { + std::vector counts(kJpegHuffmanAlphabetSize + 1); + std::vector depths(kJpegHuffmanAlphabetSize + 1); + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + counts[i] = histo.count[i]; + } + counts[kJpegHuffmanAlphabetSize] = 1; + CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, + &depths[0]); + size_t header_bits = (1 + kJpegHuffmanMaxBitLength) * 8; + size_t data_bits = 0; + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + header_bits += 8; + data_bits += counts[i] * depths[i]; + } + } + return header_bits + data_bits; +} + +void AddHistograms(const Histogram& a, const Histogram& b, Histogram* c) { + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + c->count[i] = a.count[i] + b.count[i]; + } +} + +bool IsEmptyHistogram(const Histogram& histo) { + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (histo.count[i]) return false; + } + return true; +} + +void ClusterJpegHistograms(const Histogram* histograms, size_t num, + JpegClusteredHistograms* clusters) { + clusters->histogram_indexes.resize(num); + std::vector slot_histograms; + std::vector slot_costs; + for (size_t i = 0; i < num; ++i) { + const Histogram& cur = histograms[i]; + if (IsEmptyHistogram(cur)) { + continue; + } + float best_cost = HistogramCost(cur); + size_t best_slot = slot_histograms.size(); + for (size_t j = 0; j < slot_histograms.size(); ++j) { + size_t prev_idx = slot_histograms[j]; + const Histogram& prev = clusters->histograms[prev_idx]; + Histogram combined; + AddHistograms(prev, cur, &combined); + float combined_cost = HistogramCost(combined); + float cost = combined_cost - slot_costs[j]; + if (cost < best_cost) { + best_cost = cost; + best_slot = j; + } + } + if (best_slot == slot_histograms.size()) { + // Create new histogram. + size_t histogram_index = clusters->histograms.size(); + clusters->histograms.push_back(cur); + clusters->histogram_indexes[i] = histogram_index; + if (best_slot < 4) { + // We have a free slot, so we put the new histogram there. + slot_histograms.push_back(histogram_index); + slot_costs.push_back(best_cost); + } else { + // TODO(szabadka) Find the best histogram to replce. + best_slot = (clusters->slot_ids.back() + 1) % 4; + } + slot_histograms[best_slot] = histogram_index; + slot_costs[best_slot] = best_cost; + clusters->slot_ids.push_back(best_slot); + } else { + // Merge this histogram with a previous one. + size_t histogram_index = slot_histograms[best_slot]; + const Histogram& prev = clusters->histograms[histogram_index]; + AddHistograms(prev, cur, &clusters->histograms[histogram_index]); + clusters->histogram_indexes[i] = histogram_index; + JXL_ASSERT(clusters->slot_ids[histogram_index] == best_slot); + slot_costs[best_slot] += best_cost; + } + } +} + +void CopyHuffmanTable(j_compress_ptr cinfo, int index, bool is_dc, + int* inv_slot_map, uint8_t* slot_id_map, + JHUFF_TBL* huffman_tables, size_t* num_huffman_tables) { + const char* type = is_dc ? "DC" : "AC"; + if (index < 0 || index >= NUM_HUFF_TBLS) { + JPEGLI_ERROR("Invalid %s Huffman table index %d", type, index); + } + // Check if we have already copied this Huffman table. + int slot_idx = index + (is_dc ? 0 : NUM_HUFF_TBLS); + if (inv_slot_map[slot_idx] != -1) { + return; + } + inv_slot_map[slot_idx] = *num_huffman_tables; + // Look up and validate Huffman table. + JHUFF_TBL* table = + is_dc ? cinfo->dc_huff_tbl_ptrs[index] : cinfo->ac_huff_tbl_ptrs[index]; + if (table == nullptr) { + JPEGLI_ERROR("Missing %s Huffman table %d", type, index); + } + ValidateHuffmanTable(reinterpret_cast(cinfo), table, is_dc); + // Copy Huffman table to the end of the list and save slot id. + slot_id_map[*num_huffman_tables] = index + (is_dc ? 0 : 0x10); + memcpy(&huffman_tables[*num_huffman_tables], table, sizeof(JHUFF_TBL)); + ++(*num_huffman_tables); +} + +void BuildJpegHuffmanTable(const Histogram& histo, JHUFF_TBL* table) { + std::vector counts(kJpegHuffmanAlphabetSize + 1); + std::vector depths(kJpegHuffmanAlphabetSize + 1); + for (size_t j = 0; j < kJpegHuffmanAlphabetSize; ++j) { + counts[j] = histo.count[j]; + } + counts[kJpegHuffmanAlphabetSize] = 1; + CreateHuffmanTree(counts.data(), counts.size(), kJpegHuffmanMaxBitLength, + &depths[0]); + memset(table, 0, sizeof(JHUFF_TBL)); + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + ++table->bits[depths[i]]; + } + } + int offset[kJpegHuffmanMaxBitLength + 1] = {0}; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + offset[i] = offset[i - 1] + table->bits[i - 1]; + } + for (size_t i = 0; i < kJpegHuffmanAlphabetSize; ++i) { + if (depths[i] > 0) { + table->huffval[offset[depths[i]]++] = i; + } + } +} + +} // namespace + +void CopyHuffmanTables(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + size_t max_huff_tables = 2 * cinfo->num_components; + // Copy Huffman tables and save slot ids. + m->huffman_tables = Allocate(cinfo, max_huff_tables, JPOOL_IMAGE); + m->slot_id_map = Allocate(cinfo, max_huff_tables, JPOOL_IMAGE); + m->num_huffman_tables = 0; + int inv_slot_map[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + CopyHuffmanTable(cinfo, comp->dc_tbl_no, /*is_dc=*/true, &inv_slot_map[0], + m->slot_id_map, m->huffman_tables, &m->num_huffman_tables); + CopyHuffmanTable(cinfo, comp->ac_tbl_no, /*is_dc=*/false, &inv_slot_map[0], + m->slot_id_map, m->huffman_tables, &m->num_huffman_tables); + } + // Compute context map. + m->context_map = Allocate(cinfo, 8, JPOOL_IMAGE); + memset(m->context_map, 0, 8); + for (int c = 0; c < cinfo->num_components; ++c) { + m->context_map[c] = inv_slot_map[cinfo->comp_info[c].dc_tbl_no]; + } + int ac_ctx = 4; + for (int i = 0; i < cinfo->num_scans; ++i) { + const jpeg_scan_info* si = &cinfo->scan_info[i]; + if (si->Se > 0) { + for (int j = 0; j < si->comps_in_scan; ++j) { + int c = si->component_index[j]; + jpeg_component_info* comp = &cinfo->comp_info[c]; + m->context_map[ac_ctx++] = inv_slot_map[comp->ac_tbl_no + 4]; + } + } + } +} + +void OptimizeHuffmanCodes(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + // Build DC and AC histograms. + std::vector histograms(m->num_contexts); + BuildHistograms(cinfo, &histograms[0]); + + // Cluster DC histograms. + JpegClusteredHistograms dc_clusters; + ClusterJpegHistograms(histograms.data(), cinfo->num_components, &dc_clusters); + + // Cluster AC histograms. + JpegClusteredHistograms ac_clusters; + ClusterJpegHistograms(histograms.data() + 4, m->num_contexts - 4, + &ac_clusters); + + // Create Huffman tables and slot ids clusters. + size_t num_dc_huff = dc_clusters.histograms.size(); + m->num_huffman_tables = num_dc_huff + ac_clusters.histograms.size(); + m->huffman_tables = + Allocate(cinfo, m->num_huffman_tables, JPOOL_IMAGE); + m->slot_id_map = Allocate(cinfo, m->num_huffman_tables, JPOOL_IMAGE); + for (size_t i = 0; i < m->num_huffman_tables; ++i) { + JHUFF_TBL huff_table = {}; + if (i < dc_clusters.histograms.size()) { + m->slot_id_map[i] = i; + BuildJpegHuffmanTable(dc_clusters.histograms[i], &huff_table); + } else { + m->slot_id_map[i] = 16 + ac_clusters.slot_ids[i - num_dc_huff]; + BuildJpegHuffmanTable(ac_clusters.histograms[i - num_dc_huff], + &huff_table); + } + memcpy(&m->huffman_tables[i], &huff_table, sizeof(huff_table)); + } + + // Create context map from clustered histogram indexes. + m->context_map = Allocate(cinfo, m->num_contexts, JPOOL_IMAGE); + memset(m->context_map, 0, m->num_contexts); + for (size_t i = 0; i < m->num_contexts; ++i) { + if (i < (size_t)cinfo->num_components) { + m->context_map[i] = dc_clusters.histogram_indexes[i]; + } else if (i >= 4) { + m->context_map[i] = num_dc_huff + ac_clusters.histogram_indexes[i - 4]; + } + } +} + +namespace { + +constexpr uint8_t kNumExtraBits[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 4, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 6, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 7, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 11, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 13, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // +}; + +void BuildHuffmanCodeTable(const JHUFF_TBL& table, HuffmanCodeTable* code) { + int huff_code[kJpegHuffmanAlphabetSize]; + // +1 for a sentinel element. + uint32_t huff_size[kJpegHuffmanAlphabetSize + 1]; + int p = 0; + for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) { + int i = table.bits[l]; + while (i--) huff_size[p++] = l; + } + + // Reuse sentinel element. + int last_p = p; + huff_size[last_p] = 0; + + int next_code = 0; + uint32_t si = huff_size[0]; + p = 0; + while (huff_size[p]) { + while ((huff_size[p]) == si) { + huff_code[p++] = next_code; + next_code++; + } + next_code <<= 1; + si++; + } + for (p = 0; p < last_p; p++) { + int i = table.huffval[p]; + int nbits = kNumExtraBits[i]; + code->depth[i] = huff_size[p] + nbits; + code->code[i] = huff_code[p] << nbits; + } +} + +} // namespace + +void InitEntropyCoder(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + m->coding_tables = + Allocate(cinfo, m->num_huffman_tables, JPOOL_IMAGE); + for (size_t i = 0; i < m->num_huffman_tables; ++i) { + BuildHuffmanCodeTable(m->huffman_tables[i], &m->coding_tables[i]); + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.h b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.h new file mode 100644 index 0000000000..a552219ec3 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/entropy_coding.h @@ -0,0 +1,28 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ENTROPY_CODING_H_ +#define LIB_JPEGLI_ENTROPY_CODING_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +size_t MaxNumTokensPerMCURow(j_compress_ptr cinfo); + +size_t EstimateNumTokens(j_compress_ptr cinfo, size_t mcu_y, size_t ysize_mcus, + size_t num_tokens, size_t max_per_row); + +void TokenizeJpeg(j_compress_ptr cinfo); + +void CopyHuffmanTables(j_compress_ptr cinfo); + +void OptimizeHuffmanCodes(j_compress_ptr cinfo); + +void InitEntropyCoder(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_ENTROPY_CODING_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/error.cc b/third-party/libjxl/libjxl/lib/jpegli/error.cc new file mode 100644 index 0000000000..289261672d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/error.cc @@ -0,0 +1,102 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/error.h" + +#include +#include +#include + +#include + +#include "lib/jpegli/common.h" + +namespace jpegli { + +const char* const kErrorMessageTable[] = { + "Message codes are not supported, error message is in msg_parm.s string", +}; + +bool FormatString(char* buffer, const char* format, ...) { + va_list args; + va_start(args, format); + vsnprintf(buffer, JMSG_STR_PARM_MAX, format, args); + va_end(args); + return false; +} + +void ExitWithAbort(j_common_ptr cinfo) { + (*cinfo->err->output_message)(cinfo); + jpegli_destroy(cinfo); + exit(EXIT_FAILURE); +} + +void EmitMessage(j_common_ptr cinfo, int msg_level) { + if (msg_level < 0) { + if (cinfo->err->num_warnings <= 5 || cinfo->err->trace_level >= 3) { + (*cinfo->err->output_message)(cinfo); + } + ++cinfo->err->num_warnings; + } else if (cinfo->err->trace_level >= msg_level) { + (*cinfo->err->output_message)(cinfo); + } +} + +void OutputMessage(j_common_ptr cinfo) { + char buffer[JMSG_LENGTH_MAX]; + (*cinfo->err->format_message)(cinfo, buffer); + fprintf(stderr, "%s\n", buffer); +} + +void FormatMessage(j_common_ptr cinfo, char* buffer) { + jpeg_error_mgr* err = cinfo->err; + int code = err->msg_code; + if (code == 0) { + memcpy(buffer, cinfo->err->msg_parm.s, JMSG_STR_PARM_MAX); + } else if (err->addon_message_table != nullptr && + code >= err->first_addon_message && + code <= err->last_addon_message) { + std::string msg(err->addon_message_table[code - err->first_addon_message]); + if (msg.find("%s") != std::string::npos) { + snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.s); + } else { + snprintf(buffer, JMSG_LENGTH_MAX, msg.data(), err->msg_parm.i[0], + err->msg_parm.i[1], err->msg_parm.i[2], err->msg_parm.i[3], + err->msg_parm.i[4], err->msg_parm.i[5], err->msg_parm.i[6], + err->msg_parm.i[7]); + } + } else { + snprintf(buffer, JMSG_LENGTH_MAX, "%s", kErrorMessageTable[0]); + } +} + +void ResetErrorManager(j_common_ptr cinfo) { + memset(cinfo->err->msg_parm.s, 0, JMSG_STR_PARM_MAX); + cinfo->err->msg_code = 0; + cinfo->err->num_warnings = 0; +} + +} // namespace jpegli + +struct jpeg_error_mgr* jpegli_std_error(struct jpeg_error_mgr* err) { + err->error_exit = jpegli::ExitWithAbort; + err->emit_message = jpegli::EmitMessage; + err->output_message = jpegli::OutputMessage; + err->format_message = jpegli::FormatMessage; + err->reset_error_mgr = jpegli::ResetErrorManager; + memset(err->msg_parm.s, 0, JMSG_STR_PARM_MAX); + err->trace_level = 0; + err->num_warnings = 0; + // We don't support message codes and message table, but we define one here + // in case the application has a custom format_message and tries to access + // these fields there. + err->msg_code = 0; + err->jpeg_message_table = jpegli::kErrorMessageTable; + err->last_jpeg_message = 0; + err->addon_message_table = nullptr; + err->first_addon_message = 0; + err->last_addon_message = 0; + return err; +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/error.h b/third-party/libjxl/libjxl/lib/jpegli/error.h new file mode 100644 index 0000000000..4451abd416 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/error.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_ERROR_H_ +#define LIB_JPEGLI_ERROR_H_ + +#include +#include + +#include "lib/jpegli/common.h" + +namespace jpegli { + +bool FormatString(char* buffer, const char* format, ...); + +} // namespace jpegli + +#define JPEGLI_ERROR(format, ...) \ + jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \ + __LINE__, ##__VA_ARGS__), \ + (*cinfo->err->error_exit)(reinterpret_cast(cinfo)) + +#define JPEGLI_WARN(format, ...) \ + jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \ + __LINE__, ##__VA_ARGS__), \ + (*cinfo->err->emit_message)(reinterpret_cast(cinfo), -1) + +#define JPEGLI_TRACE(level, format, ...) \ + if (cinfo->err->trace_level >= (level)) \ + jpegli::FormatString(cinfo->err->msg_parm.s, ("%s:%d: " format), __FILE__, \ + __LINE__, ##__VA_ARGS__), \ + (*cinfo->err->emit_message)(reinterpret_cast(cinfo), \ + (level)) + +#endif // LIB_JPEGLI_ERROR_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/error_handling_test.cc b/third-party/libjxl/libjxl/lib/jpegli/error_handling_test.cc new file mode 100644 index 0000000000..0d481c572a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/error_handling_test.cc @@ -0,0 +1,1276 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +TEST(EncoderErrorHandlingTest, MinimalSuccess) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + } + TestImage output; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), nullptr, 0, buffer, + buffer_size, &output); + EXPECT_EQ(1, output.xsize); + EXPECT_EQ(1, output.ysize); + EXPECT_EQ(1, output.components); + EXPECT_EQ(0, output.pixels[0]); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoDestination) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +TEST(EncoderErrorHandlingTest, NoImageDimensions) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, ImageTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 100000; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoInputComponents) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, TooManyInputComponents) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1000; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoSetDefaults) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoStartCompress) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoWriteScanlines) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NoWriteAllScanlines) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 2; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidQuantValue) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.quant_tbl_ptrs[0] = jpegli_alloc_quant_table((j_common_ptr)&cinfo); + for (size_t k = 0; k < DCTSIZE2; ++k) { + cinfo.quant_tbl_ptrs[0]->quantval[k] = 0; + } + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidQuantTableIndex) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].quant_tbl_no = 3; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch1) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 100; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch2) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 2; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch3) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 2; + cinfo.comp_info[1].h_samp_factor = cinfo.comp_info[1].v_samp_factor = 1; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch4) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[1] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch5) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_GRAYSCALE; + jpegli_set_defaults(&cinfo); + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NumberOfComponentsMismatch6) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + cinfo.num_components = 2; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidColorTransform) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_YCbCr; + jpegli_set_defaults(&cinfo); + cinfo.jpeg_color_space = JCS_RGB; + jpegli_start_compress(&cinfo, TRUE); + JSAMPLE image[3] = {0}; + JSAMPROW row[] = {image}; + jpegli_write_scanlines(&cinfo, row, 1); + jpegli_finish_compress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, DuplicateComponentIds) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].component_id = 0; + cinfo.comp_info[1].component_id = 0; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidComponentIndex) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].component_index = 17; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, ArithmeticCoding) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.arith_code = TRUE; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, CCIR601Sampling) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.CCIR601_sampling = TRUE; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript1) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = 0; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript2) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{2, {0, 1}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript3) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{5, {0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript4) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{2, {0, 0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript5) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{2, {1, 0}, 0, 63, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript6) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{1, {0}, 0, 64, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript7) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = {{1, {0}, 2, 1, 0, 0}}; // + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript8) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 63, 0, 0}, {1, {1}, 0, 0, 0, 0}, {1, {1}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript9) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 1, 0, 0}, {1, {0}, 2, 63, 0, 0}, // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript10) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 2; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {2, {0, 1}, 0, 0, 0, 0}, {2, {0, 1}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript11) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 1, 63, 0, 0}, {1, {0}, 0, 0, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript12) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 0, 10, 1}, {1, {0}, 0, 0, 1, 0}, {1, {0}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, InvalidScanScript13) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + static constexpr jpeg_scan_info kScript[] = { + {1, {0}, 0, 0, 0, 2}, + {1, {0}, 0, 0, 1, 0}, + {1, {0}, 0, 0, 2, 1}, // + {1, {0}, 1, 63, 0, 0} // + }; + cinfo.scan_info = kScript; + cinfo.num_scans = ARRAY_SIZE(kScript); + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, MCUSizeTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + jpegli_set_progressive_level(&cinfo, 0); + cinfo.comp_info[0].h_samp_factor = 3; + cinfo.comp_info[0].v_samp_factor = 3; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, RestartIntervalTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 1; + jpegli_set_defaults(&cinfo); + cinfo.restart_interval = 1000000; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, SamplingFactorTooBig) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].h_samp_factor = 5; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +TEST(EncoderErrorHandlingTest, NonIntegralSamplingRatio) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + cinfo.image_width = 1; + cinfo.image_height = 1; + cinfo.input_components = 3; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].h_samp_factor = 3; + cinfo.comp_info[1].h_samp_factor = 2; + jpegli_start_compress(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + if (buffer) free(buffer); +} + +constexpr const char* kAddOnTable[] = {"First message", + "Second message with int param %d", + "Third message with string param %s"}; + +TEST(EncoderErrorHandlingTest, AddOnTableNoParam) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.err->addon_message_table = kAddOnTable; + cinfo.err->first_addon_message = 10000; + cinfo.err->last_addon_message = 10002; + cinfo.err->msg_code = 10000; + (*cinfo.err->error_exit)(reinterpret_cast(&cinfo)); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +TEST(EncoderErrorHandlingTest, AddOnTableIntParam) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.err->addon_message_table = kAddOnTable; + cinfo.err->first_addon_message = 10000; + cinfo.err->last_addon_message = 10002; + cinfo.err->msg_code = 10001; + cinfo.err->msg_parm.i[0] = 17; + (*cinfo.err->error_exit)(reinterpret_cast(&cinfo)); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +TEST(EncoderErrorHandlingTest, AddOnTableNoStringParam) { + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.err->addon_message_table = kAddOnTable; + cinfo.err->first_addon_message = 10000; + cinfo.err->last_addon_message = 10002; + cinfo.err->msg_code = 10002; + memcpy(cinfo.err->msg_parm.s, "MESSAGE PARAM", 14); + (*cinfo.err->error_exit)(reinterpret_cast(&cinfo)); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_compress(&cinfo); +} + +static const uint8_t kCompressed0[] = { + // SOI + 0xff, 0xd8, // + // DQT + 0xff, 0xdb, 0x00, 0x43, 0x00, 0x03, 0x02, 0x02, 0x03, 0x02, // + 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x03, 0x03, 0x04, 0x05, // + 0x08, 0x05, 0x05, 0x04, 0x04, 0x05, 0x0a, 0x07, 0x07, 0x06, // + 0x08, 0x0c, 0x0a, 0x0c, 0x0c, 0x0b, 0x0a, 0x0b, 0x0b, 0x0d, // + 0x0e, 0x12, 0x10, 0x0d, 0x0e, 0x11, 0x0e, 0x0b, 0x0b, 0x10, // + 0x16, 0x10, 0x11, 0x13, 0x14, 0x15, 0x15, 0x15, 0x0c, 0x0f, // + 0x17, 0x18, 0x16, 0x14, 0x18, 0x12, 0x14, 0x15, 0x14, // + // SOF + 0xff, 0xc0, 0x00, 0x0b, 0x08, 0x00, 0x01, 0x00, 0x01, 0x01, // + 0x01, 0x11, 0x00, // + // DHT + 0xff, 0xc4, 0x00, 0xd2, 0x00, 0x00, 0x01, 0x05, 0x01, 0x01, // + 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // + 0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, // + 0x09, 0x0a, 0x0b, 0x10, 0x00, 0x02, 0x01, 0x03, 0x03, 0x02, // + 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7d, // + 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, // + 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, // + 0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, // + 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, // + 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, // + 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, // + 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, // + 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, // + 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, // + 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, // + 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, // + 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, // + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, // + 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, // + 0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, // + 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, // + 0xf9, 0xfa, // + // SOS + 0xff, 0xda, 0x00, 0x08, 0x01, 0x01, 0x00, 0x00, 0x3f, 0x00, // + // entropy coded data + 0xfc, 0xaa, 0xaf, // + // EOI + 0xff, 0xd9, // +}; +static const size_t kLen0 = sizeof(kCompressed0); + +static const size_t kDQTOffset = 2; +static const size_t kSOFOffset = 71; +static const size_t kDHTOffset = 84; +static const size_t kSOSOffset = 296; + +TEST(DecoderErrorHandlingTest, MinimalSuccess) { + JXL_CHECK(kCompressed0[kDQTOffset] == 0xff); + JXL_CHECK(kCompressed0[kSOFOffset] == 0xff); + JXL_CHECK(kCompressed0[kDHTOffset] == 0xff); + JXL_CHECK(kCompressed0[kSOSOffset] == 0xff); + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + jpegli_start_decompress(&cinfo); + JSAMPLE image[1]; + JSAMPROW row[] = {image}; + jpegli_read_scanlines(&cinfo, row, 1); + EXPECT_EQ(0, image[0]); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoSource) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_read_header(&cinfo, TRUE); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoReadHeader) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_start_decompress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoStartDecompress) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + JSAMPLE image[1]; + JSAMPROW row[] = {image}; + jpegli_read_scanlines(&cinfo, row, 1); + EXPECT_EQ(0, image[0]); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +TEST(DecoderErrorHandlingTest, NoReadScanlines) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, kCompressed0, kLen0); + jpegli_read_header(&cinfo, TRUE); + EXPECT_EQ(1, cinfo.image_width); + EXPECT_EQ(1, cinfo.image_height); + jpegli_start_decompress(&cinfo); + jpegli_finish_decompress(&cinfo); + return true; + }; + EXPECT_FALSE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); +} + +static const size_t kMaxImageWidth = 0xffff; +JSAMPLE kOutputBuffer[MAX_COMPONENTS * kMaxImageWidth]; + +bool ParseCompressed(const std::vector& compressed) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, compressed.data(), compressed.size()); + jpegli_read_header(&cinfo, TRUE); + jpegli_start_decompress(&cinfo); + for (JDIMENSION i = 0; i < cinfo.output_height; ++i) { + JSAMPROW row[] = {kOutputBuffer}; + jpegli_read_scanlines(&cinfo, row, 1); + } + jpegli_finish_decompress(&cinfo); + return true; + }; + bool retval = try_catch_block(); + jpegli_destroy_decompress(&cinfo); + return retval; +} + +TEST(DecoderErrorHandlingTest, NoSOI) { + for (int pos : {0, 1}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[pos] = 0; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidDQT) { + // Bad marker length + for (int diff : {-2, -1, 1, 2}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDQTOffset + 3] += diff; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // inavlid table index / precision + for (int val : {0x20, 0x05}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDQTOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // zero quant value + for (int k : {0, 1, 17, 63}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDQTOffset + 5 + k] = 0; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidSOF) { + // Bad marker length + for (int diff : {-2, -1, 1, 2}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 3] += diff; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // zero width, height or num_components + for (int pos : {6, 8, 9}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + pos] = 0; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid data precision + for (int val : {0, 1, 127}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // too many num_components + for (int val : {5, 255}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 9] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid sampling factors + for (int val : {0x00, 0x01, 0x10, 0x15, 0x51}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 11] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid quant table index + for (int val : {5, 17}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOFOffset + 12] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidDHT) { + // Bad marker length + for (int diff : {-2, -1, 1, 2}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDHTOffset + 3] += diff; + EXPECT_FALSE(ParseCompressed(compressed)); + } + { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDHTOffset + 2] += 17; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // inavlid table slot_id + for (int val : {0x05, 0x15, 0x20}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kDHTOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, InvalidSOS) { + // Invalid comps_in_scan + for (int val : {2, 5, 17}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOSOffset + 4] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid Huffman table indexes + for (int val : {0x05, 0x50, 0x15, 0x51}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOSOffset + 6] = val; + EXPECT_FALSE(ParseCompressed(compressed)); + } + // invalid Ss/Se + for (int pos : {7, 8}) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + compressed[kSOSOffset + pos] = 64; + EXPECT_FALSE(ParseCompressed(compressed)); + } +} + +TEST(DecoderErrorHandlingTest, MutateSingleBytes) { + for (size_t pos = 0; pos < kLen0; ++pos) { + std::vector compressed(kCompressed0, kCompressed0 + kLen0); + for (int val : {0x00, 0x0f, 0xf0, 0xff}) { + compressed[pos] = val; + ParseCompressed(compressed); + } + } +} + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/huffman.cc b/third-party/libjxl/libjxl/lib/jpegli/huffman.cc new file mode 100644 index 0000000000..1cf88a5536 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/huffman.cc @@ -0,0 +1,321 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/huffman.h" + +#include +#include + +#include "lib/jpegli/common.h" +#include "lib/jpegli/error.h" + +namespace jpegli { + +// Returns the table width of the next 2nd level table, count is the histogram +// of bit lengths for the remaining symbols, len is the code length of the next +// processed symbol. +static inline int NextTableBitSize(const int* count, int len) { + int left = 1 << (len - kJpegHuffmanRootTableBits); + while (len < static_cast(kJpegHuffmanMaxBitLength)) { + left -= count[len]; + if (left <= 0) break; + ++len; + left <<= 1; + } + return len - kJpegHuffmanRootTableBits; +} + +void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols, + HuffmanTableEntry* lut) { + HuffmanTableEntry code; // current table entry + HuffmanTableEntry* table; // next available space in table + int len; // current code length + int idx; // symbol index + int key; // prefix code + int reps; // number of replicate key values in current table + int low; // low bits for current root entry + int table_bits; // key length of current table + int table_size; // size of current table + + // Make a local copy of the input bit length histogram. + int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0}; + int total_count = 0; + for (len = 1; len <= static_cast(kJpegHuffmanMaxBitLength); ++len) { + tmp_count[len] = count[len]; + total_count += tmp_count[len]; + } + + table = lut; + table_bits = kJpegHuffmanRootTableBits; + table_size = 1 << table_bits; + + // Special case code with only one value. + if (total_count == 1) { + code.bits = 0; + code.value = symbols[0]; + for (key = 0; key < table_size; ++key) { + table[key] = code; + } + return; + } + + // Fill in root table. + key = 0; + idx = 0; + for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) { + for (; tmp_count[len] > 0; --tmp_count[len]) { + code.bits = len; + code.value = symbols[idx++]; + reps = 1 << (kJpegHuffmanRootTableBits - len); + while (reps--) { + table[key++] = code; + } + } + } + + // Fill in 2nd level tables and add pointers to root table. + table += table_size; + table_size = 0; + low = 0; + for (len = kJpegHuffmanRootTableBits + 1; + len <= static_cast(kJpegHuffmanMaxBitLength); ++len) { + for (; tmp_count[len] > 0; --tmp_count[len]) { + // Start a new sub-table if the previous one is full. + if (low >= table_size) { + table += table_size; + table_bits = NextTableBitSize(tmp_count, len); + table_size = 1 << table_bits; + low = 0; + lut[key].bits = table_bits + kJpegHuffmanRootTableBits; + lut[key].value = (table - lut) - key; + ++key; + } + code.bits = len - kJpegHuffmanRootTableBits; + code.value = symbols[idx++]; + reps = 1 << (table_bits - code.bits); + while (reps--) { + table[low++] = code; + } + } + } +} + +// A node of a Huffman tree. +struct HuffmanTree { + HuffmanTree(uint32_t count, int16_t left, int16_t right) + : total_count(count), index_left(left), index_right_or_value(right) {} + uint32_t total_count; + int16_t index_left; + int16_t index_right_or_value; +}; + +void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, + uint8_t level) { + if (p.index_left >= 0) { + ++level; + SetDepth(pool[p.index_left], pool, depth, level); + SetDepth(pool[p.index_right_or_value], pool, depth, level); + } else { + depth[p.index_right_or_value] = level; + } +} + +// Sort the root nodes, least popular first. +static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) { + return v0.total_count < v1.total_count; +} + +// This function will create a Huffman tree. +// +// The catch here is that the tree cannot be arbitrarily deep. +// Brotli specifies a maximum depth of 15 bits for "code trees" +// and 7 bits for "code length code trees." +// +// count_limit is the value that is to be faked as the minimum value +// and this minimum value is raised until the tree matches the +// maximum length requirement. +// +// This algorithm is not of excellent performance for very long data blocks, +// especially when population counts are longer than 2**tree_limit, but +// we are not planning to use this with extremely long blocks. +// +// See http://en.wikipedia.org/wiki/Huffman_coding +void CreateHuffmanTree(const uint32_t* data, const size_t length, + const int tree_limit, uint8_t* depth) { + // For block sizes below 64 kB, we never need to do a second iteration + // of this loop. Probably all of our block sizes will be smaller than + // that, so this loop is mostly of academic interest. If we actually + // would need this, we would be better off with the Katajainen algorithm. + for (uint32_t count_limit = 1;; count_limit *= 2) { + std::vector tree; + tree.reserve(2 * length + 1); + + for (size_t i = length; i != 0;) { + --i; + if (data[i]) { + const uint32_t count = std::max(data[i], count_limit - 1); + tree.emplace_back(count, -1, static_cast(i)); + } + } + + const size_t n = tree.size(); + if (n == 1) { + // Fake value; will be fixed on upper level. + depth[tree[0].index_right_or_value] = 1; + break; + } + + std::stable_sort(tree.begin(), tree.end(), Compare); + + // The nodes are: + // [0, n): the sorted leaf nodes that we start with. + // [n]: we add a sentinel here. + // [n + 1, 2n): new parent nodes are added here, starting from + // (n+1). These are naturally in ascending order. + // [2n]: we add a sentinel at the end as well. + // There will be (2n+1) elements at the end. + const HuffmanTree sentinel(std::numeric_limits::max(), -1, -1); + tree.push_back(sentinel); + tree.push_back(sentinel); + + size_t i = 0; // Points to the next leaf node. + size_t j = n + 1; // Points to the next non-leaf node. + for (size_t k = n - 1; k != 0; --k) { + size_t left, right; + if (tree[i].total_count <= tree[j].total_count) { + left = i; + ++i; + } else { + left = j; + ++j; + } + if (tree[i].total_count <= tree[j].total_count) { + right = i; + ++i; + } else { + right = j; + ++j; + } + + // The sentinel node becomes the parent node. + size_t j_end = tree.size() - 1; + tree[j_end].total_count = + tree[left].total_count + tree[right].total_count; + tree[j_end].index_left = static_cast(left); + tree[j_end].index_right_or_value = static_cast(right); + + // Add back the last sentinel node. + tree.push_back(sentinel); + } + JXL_DASSERT(tree.size() == 2 * n + 1); + SetDepth(tree[2 * n - 1], &tree[0], depth, 0); + + // We need to pack the Huffman tree in tree_limit bits. + // If this was not successful, add fake entities to the lowest values + // and retry. + if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { + break; + } + } +} + +void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table, + bool is_dc) { + size_t total_symbols = 0; + size_t total_p = 0; + size_t max_depth = 0; + for (size_t d = 1; d <= kJpegHuffmanMaxBitLength; ++d) { + uint8_t count = table->bits[d]; + if (count) { + total_symbols += count; + total_p += (1u << (kJpegHuffmanMaxBitLength - d)) * count; + max_depth = d; + } + } + total_p += 1u << (kJpegHuffmanMaxBitLength - max_depth); // sentinel symbol + if (total_symbols == 0) { + JPEGLI_ERROR("Empty Huffman table"); + } + if (total_symbols > kJpegHuffmanAlphabetSize) { + JPEGLI_ERROR("Too many symbols in Huffman table"); + } + if (total_p != (1u << kJpegHuffmanMaxBitLength)) { + JPEGLI_ERROR("Invalid bit length distribution"); + } + uint8_t symbol_seen[kJpegHuffmanAlphabetSize] = {}; + for (size_t i = 0; i < total_symbols; ++i) { + uint8_t symbol = table->huffval[i]; + if (symbol_seen[symbol]) { + JPEGLI_ERROR("Duplicate symbol %d in Huffman table", symbol); + } + symbol_seen[symbol] = 1; + } +} + +void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc) { + // Huffman tables from the JPEG standard. + static constexpr JHUFF_TBL kStandardDCTables[2] = { + // DC luma + {{0, 0, 1, 5, 1, 1, 1, 1, 1, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + FALSE}, + // DC chroma + {{0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + FALSE}}; + static constexpr JHUFF_TBL kStandardACTables[2] = { + // AC luma + {{0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125}, + {0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, + 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, + 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, + 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, + 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, + 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, + 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, + 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, + 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, + 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, + 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}, + FALSE}, + // AC chroma + {{0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119}, + {0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, + 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, + 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, + 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, + 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, + 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, + 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, + 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, + 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, + 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, + 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, + 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}, + FALSE}}; + const JHUFF_TBL* std_tables = is_dc ? kStandardDCTables : kStandardACTables; + JHUFF_TBL** tables; + if (cinfo->is_decompressor) { + j_decompress_ptr cinfo_d = reinterpret_cast(cinfo); + tables = is_dc ? cinfo_d->dc_huff_tbl_ptrs : cinfo_d->ac_huff_tbl_ptrs; + } else { + j_compress_ptr cinfo_c = reinterpret_cast(cinfo); + tables = is_dc ? cinfo_c->dc_huff_tbl_ptrs : cinfo_c->ac_huff_tbl_ptrs; + } + for (int i = 0; i < 2; ++i) { + if (tables[i] == nullptr) { + tables[i] = jpegli_alloc_huff_table(cinfo); + memcpy(tables[i], &std_tables[i], sizeof(JHUFF_TBL)); + ValidateHuffmanTable(cinfo, tables[i], is_dc); + } + } +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/huffman.h b/third-party/libjxl/libjxl/lib/jpegli/huffman.h new file mode 100644 index 0000000000..f0e5e1de40 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/huffman.h @@ -0,0 +1,50 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_HUFFMAN_H_ +#define LIB_JPEGLI_HUFFMAN_H_ + +#include +#include + +#include "lib/jpegli/common_internal.h" + +namespace jpegli { + +constexpr int kJpegHuffmanRootTableBits = 8; +// Maximum huffman lookup table size. +// According to zlib/examples/enough.c, 758 entries are always enough for +// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and +// max bit length 16 if the root table has 8 bits. +constexpr int kJpegHuffmanLutSize = 758; + +struct HuffmanTableEntry { + uint8_t bits; // number of bits used for this symbol + uint16_t value; // symbol value or table offset +}; + +void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols, + HuffmanTableEntry* lut); + +// This function will create a Huffman tree. +// +// The (data,length) contains the population counts. +// The tree_limit is the maximum bit depth of the Huffman codes. +// +// The depth contains the tree, i.e., how many bits are used for +// the symbol. +// +// See http://en.wikipedia.org/wiki/Huffman_coding +void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit, + uint8_t* depth); + +void ValidateHuffmanTable(j_common_ptr cinfo, const JHUFF_TBL* table, + bool is_dc); + +void AddStandardHuffmanTables(j_common_ptr cinfo, bool is_dc); + +} // namespace jpegli + +#endif // LIB_JPEGLI_HUFFMAN_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/idct.cc b/third-party/libjxl/libjxl/lib/jpegli/idct.cc new file mode 100644 index 0000000000..4d10563583 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/idct.cc @@ -0,0 +1,692 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/idct.h" + +#include + +#include "lib/jpegli/decode_internal.h" +#include "lib/jxl/base/status.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/idct.cc" +#include +#include + +#include "lib/jpegli/transpose-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::Xor; + +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +constexpr D d; +constexpr DI di; + +using D8 = HWY_CAPPED(float, 8); +constexpr D8 d8; + +void DequantBlock(const int16_t* JXL_RESTRICT qblock, + const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, float* JXL_RESTRICT block) { + for (size_t k = 0; k < 64; k += Lanes(d)) { + const auto mul = Load(d, dequant + k); + const auto bias = Load(d, biases + k); + const Rebind di16; + const Vec quant_i = PromoteTo(di, Load(di16, qblock + k)); + const Rebind df; + const auto quant = ConvertTo(df, quant_i); + const auto abs_quant = Abs(quant); + const auto not_0 = Gt(abs_quant, Zero(df)); + const auto sign_quant = Xor(quant, abs_quant); + const auto biased_quant = Sub(quant, Xor(bias, sign_quant)); + const auto dequant = IfThenElseZero(not_0, Mul(biased_quant, mul)); + Store(dequant, d, block + k); + } +} + +template +void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride, + float* JXL_RESTRICT aout) { + for (size_t i = 0; i < N / 2; i++) { + auto in1 = LoadU(d8, ain + 2 * i * ain_stride); + Store(in1, d8, aout + i * 8); + } + for (size_t i = N / 2; i < N; i++) { + auto in1 = LoadU(d8, ain + (2 * (i - N / 2) + 1) * ain_stride); + Store(in1, d8, aout + i * 8); + } +} + +template +void BTranspose(float* JXL_RESTRICT coeff) { + for (size_t i = N - 1; i > 0; i--) { + auto in1 = Load(d8, coeff + i * 8); + auto in2 = Load(d8, coeff + (i - 1) * 8); + Store(Add(in1, in2), d8, coeff + i * 8); + } + constexpr float kSqrt2 = 1.41421356237f; + auto sqrt2 = Set(d8, kSqrt2); + auto in1 = Load(d8, coeff); + Store(Mul(in1, sqrt2), d8, coeff); +} + +// Constants for DCT implementation. Generated by the following snippet: +// for i in range(N // 2): +// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ") +template +struct WcMultipliers; + +template <> +struct WcMultipliers<4> { + static constexpr float kMultipliers[] = { + 0.541196100146197, + 1.3065629648763764, + }; +}; + +template <> +struct WcMultipliers<8> { + static constexpr float kMultipliers[] = { + 0.5097955791041592, + 0.6013448869350453, + 0.8999762231364156, + 2.5629154477415055, + }; +}; + +constexpr float WcMultipliers<4>::kMultipliers[]; +constexpr float WcMultipliers<8>::kMultipliers[]; + +template +void MultiplyAndAdd(const float* JXL_RESTRICT coeff, float* JXL_RESTRICT out, + size_t out_stride) { + for (size_t i = 0; i < N / 2; i++) { + auto mul = Set(d8, WcMultipliers::kMultipliers[i]); + auto in1 = Load(d8, coeff + i * 8); + auto in2 = Load(d8, coeff + (N / 2 + i) * 8); + auto out1 = MulAdd(mul, in2, in1); + auto out2 = NegMulAdd(mul, in2, in1); + StoreU(out1, d8, out + i * out_stride); + StoreU(out2, d8, out + (N - i - 1) * out_stride); + } +} + +template +struct IDCT1DImpl; + +template <> +struct IDCT1DImpl<1> { + JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + StoreU(LoadU(d8, from), d8, to); + } +}; + +template <> +struct IDCT1DImpl<2> { + JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + JXL_DASSERT(from_stride >= 8); + JXL_DASSERT(to_stride >= 8); + auto in1 = LoadU(d8, from); + auto in2 = LoadU(d8, from + from_stride); + StoreU(Add(in1, in2), d8, to); + StoreU(Sub(in1, in2), d8, to + to_stride); + } +}; + +template +struct IDCT1DImpl { + void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + JXL_DASSERT(from_stride >= 8); + JXL_DASSERT(to_stride >= 8); + HWY_ALIGN float tmp[64]; + ForwardEvenOdd(from, from_stride, tmp); + IDCT1DImpl()(tmp, 8, tmp, 8); + BTranspose(tmp + N * 4); + IDCT1DImpl()(tmp + N * 4, 8, tmp + N * 4, 8); + MultiplyAndAdd(tmp, to, to_stride); + } +}; + +template +void IDCT1D(float* JXL_RESTRICT from, float* JXL_RESTRICT output, + size_t output_stride) { + for (size_t i = 0; i < 8; i += Lanes(d8)) { + IDCT1DImpl()(from + i, 8, output + i, output_stride); + } +} + +void ComputeScaledIDCT(float* JXL_RESTRICT block0, float* JXL_RESTRICT block1, + float* JXL_RESTRICT output, size_t output_stride) { + Transpose8x8Block(block0, block1); + IDCT1D<8>(block1, block0, 8); + Transpose8x8Block(block0, block1); + IDCT1D<8>(block1, output, output_stride); +} + +void InverseTransformBlock8x8(const int16_t* JXL_RESTRICT qblock, + const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, + float* JXL_RESTRICT scratch_space, + float* JXL_RESTRICT output, size_t output_stride, + size_t dctsize) { + float* JXL_RESTRICT block0 = scratch_space; + float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2; + DequantBlock(qblock, dequant, biases, block0); + ComputeScaledIDCT(block0, block1, output, output_stride); +} + +// Computes the N-point IDCT of in[], and stores the result in out[]. The in[] +// array is at most 8 values long, values in[8:N-1] are assumed to be 0. +void Compute1dIDCT(float* in, float* out, size_t N) { + switch (N) { + case 3: { + static constexpr float kC3[3] = { + 1.414213562373, + 1.224744871392, + 0.707106781187, + }; + float even0 = in[0] + kC3[2] * in[2]; + float even1 = in[0] - kC3[0] * in[2]; + float odd0 = kC3[1] * in[1]; + out[0] = even0 + odd0; + out[2] = even0 - odd0; + out[1] = even1; + break; + } + case 5: { + static constexpr float kC5[5] = { + 1.414213562373, 1.344997023928, 1.144122805635, + 0.831253875555, 0.437016024449, + }; + float even0 = in[0] + kC5[2] * in[2] + kC5[4] * in[4]; + float even1 = in[0] - kC5[4] * in[2] - kC5[2] * in[4]; + float even2 = in[0] - kC5[0] * in[2] + kC5[0] * in[4]; + float odd0 = kC5[1] * in[1] + kC5[3] * in[3]; + float odd1 = kC5[3] * in[1] - kC5[1] * in[3]; + out[0] = even0 + odd0; + out[4] = even0 - odd0; + out[1] = even1 + odd1; + out[3] = even1 - odd1; + out[2] = even2; + break; + } + case 6: { + static constexpr float kC6[6] = { + 1.414213562373, 1.366025403784, 1.224744871392, + 1.000000000000, 0.707106781187, 0.366025403784, + }; + float even0 = in[0] + kC6[2] * in[2] + kC6[4] * in[4]; + float even1 = in[0] - kC6[0] * in[4]; + float even2 = in[0] - kC6[2] * in[2] + kC6[4] * in[4]; + float odd0 = kC6[1] * in[1] + kC6[3] * in[3] + kC6[5] * in[5]; + float odd1 = kC6[3] * in[1] - kC6[3] * in[3] - kC6[3] * in[5]; + float odd2 = kC6[5] * in[1] - kC6[3] * in[3] + kC6[1] * in[5]; + out[0] = even0 + odd0; + out[5] = even0 - odd0; + out[1] = even1 + odd1; + out[4] = even1 - odd1; + out[2] = even2 + odd2; + out[3] = even2 - odd2; + break; + } + case 7: { + static constexpr float kC7[7] = { + 1.414213562373, 1.378756275744, 1.274162392264, 1.105676685997, + 0.881747733790, 0.613604268353, 0.314692122713, + }; + float even0 = in[0] + kC7[2] * in[2] + kC7[4] * in[4] + kC7[6] * in[6]; + float even1 = in[0] + kC7[6] * in[2] - kC7[2] * in[4] - kC7[4] * in[6]; + float even2 = in[0] - kC7[4] * in[2] - kC7[6] * in[4] + kC7[2] * in[6]; + float even3 = in[0] - kC7[0] * in[2] + kC7[0] * in[4] - kC7[0] * in[6]; + float odd0 = kC7[1] * in[1] + kC7[3] * in[3] + kC7[5] * in[5]; + float odd1 = kC7[3] * in[1] - kC7[5] * in[3] - kC7[1] * in[5]; + float odd2 = kC7[5] * in[1] - kC7[1] * in[3] + kC7[3] * in[5]; + out[0] = even0 + odd0; + out[6] = even0 - odd0; + out[1] = even1 + odd1; + out[5] = even1 - odd1; + out[2] = even2 + odd2; + out[4] = even2 - odd2; + out[3] = even3; + break; + } + case 9: { + static constexpr float kC9[9] = { + 1.414213562373, 1.392728480640, 1.328926048777, + 1.224744871392, 1.083350440839, 0.909038955344, + 0.707106781187, 0.483689525296, 0.245575607938, + }; + float even0 = in[0] + kC9[2] * in[2] + kC9[4] * in[4] + kC9[6] * in[6]; + float even1 = in[0] + kC9[6] * in[2] - kC9[6] * in[4] - kC9[0] * in[6]; + float even2 = in[0] - kC9[8] * in[2] - kC9[2] * in[4] + kC9[6] * in[6]; + float even3 = in[0] - kC9[4] * in[2] + kC9[8] * in[4] + kC9[6] * in[6]; + float even4 = in[0] - kC9[0] * in[2] + kC9[0] * in[4] - kC9[0] * in[6]; + float odd0 = + kC9[1] * in[1] + kC9[3] * in[3] + kC9[5] * in[5] + kC9[7] * in[7]; + float odd1 = kC9[3] * in[1] - kC9[3] * in[5] - kC9[3] * in[7]; + float odd2 = + kC9[5] * in[1] - kC9[3] * in[3] - kC9[7] * in[5] + kC9[1] * in[7]; + float odd3 = + kC9[7] * in[1] - kC9[3] * in[3] + kC9[1] * in[5] - kC9[5] * in[7]; + out[0] = even0 + odd0; + out[8] = even0 - odd0; + out[1] = even1 + odd1; + out[7] = even1 - odd1; + out[2] = even2 + odd2; + out[6] = even2 - odd2; + out[3] = even3 + odd3; + out[5] = even3 - odd3; + out[4] = even4; + break; + } + case 10: { + static constexpr float kC10[10] = { + 1.414213562373, 1.396802246667, 1.344997023928, 1.260073510670, + 1.144122805635, 1.000000000000, 0.831253875555, 0.642039521920, + 0.437016024449, 0.221231742082, + }; + float even0 = in[0] + kC10[2] * in[2] + kC10[4] * in[4] + kC10[6] * in[6]; + float even1 = in[0] + kC10[6] * in[2] - kC10[8] * in[4] - kC10[2] * in[6]; + float even2 = in[0] - kC10[0] * in[4]; + float even3 = in[0] - kC10[6] * in[2] - kC10[8] * in[4] + kC10[2] * in[6]; + float even4 = in[0] - kC10[2] * in[2] + kC10[4] * in[4] - kC10[6] * in[6]; + float odd0 = + kC10[1] * in[1] + kC10[3] * in[3] + kC10[5] * in[5] + kC10[7] * in[7]; + float odd1 = + kC10[3] * in[1] + kC10[9] * in[3] - kC10[5] * in[5] - kC10[1] * in[7]; + float odd2 = + kC10[5] * in[1] - kC10[5] * in[3] - kC10[5] * in[5] + kC10[5] * in[7]; + float odd3 = + kC10[7] * in[1] - kC10[1] * in[3] + kC10[5] * in[5] + kC10[9] * in[7]; + float odd4 = + kC10[9] * in[1] - kC10[7] * in[3] + kC10[5] * in[5] - kC10[3] * in[7]; + out[0] = even0 + odd0; + out[9] = even0 - odd0; + out[1] = even1 + odd1; + out[8] = even1 - odd1; + out[2] = even2 + odd2; + out[7] = even2 - odd2; + out[3] = even3 + odd3; + out[6] = even3 - odd3; + out[4] = even4 + odd4; + out[5] = even4 - odd4; + break; + } + case 11: { + static constexpr float kC11[11] = { + 1.414213562373, 1.399818907436, 1.356927976287, 1.286413904599, + 1.189712155524, 1.068791297809, 0.926112931411, 0.764581576418, + 0.587485545401, 0.398430002847, 0.201263574413, + }; + float even0 = in[0] + kC11[2] * in[2] + kC11[4] * in[4] + kC11[6] * in[6]; + float even1 = + in[0] + kC11[6] * in[2] - kC11[10] * in[4] - kC11[4] * in[6]; + float even2 = + in[0] + kC11[10] * in[2] - kC11[2] * in[4] - kC11[8] * in[6]; + float even3 = in[0] - kC11[8] * in[2] - kC11[6] * in[4] + kC11[2] * in[6]; + float even4 = + in[0] - kC11[4] * in[2] + kC11[8] * in[4] + kC11[10] * in[6]; + float even5 = in[0] - kC11[0] * in[2] + kC11[0] * in[4] - kC11[0] * in[6]; + float odd0 = + kC11[1] * in[1] + kC11[3] * in[3] + kC11[5] * in[5] + kC11[7] * in[7]; + float odd1 = + kC11[3] * in[1] + kC11[9] * in[3] - kC11[7] * in[5] - kC11[1] * in[7]; + float odd2 = + kC11[5] * in[1] - kC11[7] * in[3] - kC11[3] * in[5] + kC11[9] * in[7]; + float odd3 = + kC11[7] * in[1] - kC11[1] * in[3] + kC11[9] * in[5] + kC11[5] * in[7]; + float odd4 = + kC11[9] * in[1] - kC11[5] * in[3] + kC11[1] * in[5] - kC11[3] * in[7]; + out[0] = even0 + odd0; + out[10] = even0 - odd0; + out[1] = even1 + odd1; + out[9] = even1 - odd1; + out[2] = even2 + odd2; + out[8] = even2 - odd2; + out[3] = even3 + odd3; + out[7] = even3 - odd3; + out[4] = even4 + odd4; + out[6] = even4 - odd4; + out[5] = even5; + break; + } + case 12: { + static constexpr float kC12[12] = { + 1.414213562373, 1.402114769300, 1.366025403784, 1.306562964876, + 1.224744871392, 1.121971053594, 1.000000000000, 0.860918669154, + 0.707106781187, 0.541196100146, 0.366025403784, 0.184591911283, + }; + float even0 = in[0] + kC12[2] * in[2] + kC12[4] * in[4] + kC12[6] * in[6]; + float even1 = in[0] + kC12[6] * in[2] - kC12[6] * in[6]; + float even2 = + in[0] + kC12[10] * in[2] - kC12[4] * in[4] - kC12[6] * in[6]; + float even3 = + in[0] - kC12[10] * in[2] - kC12[4] * in[4] + kC12[6] * in[6]; + float even4 = in[0] - kC12[6] * in[2] + kC12[6] * in[6]; + float even5 = in[0] - kC12[2] * in[2] + kC12[4] * in[4] - kC12[6] * in[6]; + float odd0 = + kC12[1] * in[1] + kC12[3] * in[3] + kC12[5] * in[5] + kC12[7] * in[7]; + float odd1 = + kC12[3] * in[1] + kC12[9] * in[3] - kC12[9] * in[5] - kC12[3] * in[7]; + float odd2 = kC12[5] * in[1] - kC12[9] * in[3] - kC12[1] * in[5] - + kC12[11] * in[7]; + float odd3 = kC12[7] * in[1] - kC12[3] * in[3] - kC12[11] * in[5] + + kC12[1] * in[7]; + float odd4 = + kC12[9] * in[1] - kC12[3] * in[3] + kC12[3] * in[5] - kC12[9] * in[7]; + float odd5 = kC12[11] * in[1] - kC12[9] * in[3] + kC12[7] * in[5] - + kC12[5] * in[7]; + out[0] = even0 + odd0; + out[11] = even0 - odd0; + out[1] = even1 + odd1; + out[10] = even1 - odd1; + out[2] = even2 + odd2; + out[9] = even2 - odd2; + out[3] = even3 + odd3; + out[8] = even3 - odd3; + out[4] = even4 + odd4; + out[7] = even4 - odd4; + out[5] = even5 + odd5; + out[6] = even5 - odd5; + break; + } + case 13: { + static constexpr float kC13[13] = { + 1.414213562373, 1.403902353238, 1.373119086479, 1.322312651445, + 1.252223920364, 1.163874944761, 1.058554051646, 0.937797056801, + 0.803364869133, 0.657217812653, 0.501487040539, 0.338443458124, + 0.170464607981, + }; + float even0 = in[0] + kC13[2] * in[2] + kC13[4] * in[4] + kC13[6] * in[6]; + float even1 = + in[0] + kC13[6] * in[2] + kC13[12] * in[4] - kC13[8] * in[6]; + float even2 = + in[0] + kC13[10] * in[2] - kC13[6] * in[4] - kC13[4] * in[6]; + float even3 = + in[0] - kC13[12] * in[2] - kC13[2] * in[4] + kC13[10] * in[6]; + float even4 = + in[0] - kC13[8] * in[2] - kC13[10] * in[4] + kC13[2] * in[6]; + float even5 = + in[0] - kC13[4] * in[2] + kC13[8] * in[4] - kC13[12] * in[6]; + float even6 = in[0] - kC13[0] * in[2] + kC13[0] * in[4] - kC13[0] * in[6]; + float odd0 = + kC13[1] * in[1] + kC13[3] * in[3] + kC13[5] * in[5] + kC13[7] * in[7]; + float odd1 = kC13[3] * in[1] + kC13[9] * in[3] - kC13[11] * in[5] - + kC13[5] * in[7]; + float odd2 = kC13[5] * in[1] - kC13[11] * in[3] - kC13[1] * in[5] - + kC13[9] * in[7]; + float odd3 = + kC13[7] * in[1] - kC13[5] * in[3] - kC13[9] * in[5] + kC13[3] * in[7]; + float odd4 = kC13[9] * in[1] - kC13[1] * in[3] + kC13[7] * in[5] + + kC13[11] * in[7]; + float odd5 = kC13[11] * in[1] - kC13[7] * in[3] + kC13[3] * in[5] - + kC13[1] * in[7]; + out[0] = even0 + odd0; + out[12] = even0 - odd0; + out[1] = even1 + odd1; + out[11] = even1 - odd1; + out[2] = even2 + odd2; + out[10] = even2 - odd2; + out[3] = even3 + odd3; + out[9] = even3 - odd3; + out[4] = even4 + odd4; + out[8] = even4 - odd4; + out[5] = even5 + odd5; + out[7] = even5 - odd5; + out[6] = even6; + break; + } + case 14: { + static constexpr float kC14[14] = { + 1.414213562373, 1.405321284327, 1.378756275744, 1.334852607020, + 1.274162392264, 1.197448846138, 1.105676685997, 1.000000000000, + 0.881747733790, 0.752406978226, 0.613604268353, 0.467085128785, + 0.314692122713, 0.158341680609, + }; + float even0 = in[0] + kC14[2] * in[2] + kC14[4] * in[4] + kC14[6] * in[6]; + float even1 = + in[0] + kC14[6] * in[2] + kC14[12] * in[4] - kC14[10] * in[6]; + float even2 = + in[0] + kC14[10] * in[2] - kC14[8] * in[4] - kC14[2] * in[6]; + float even3 = in[0] - kC14[0] * in[4]; + float even4 = + in[0] - kC14[10] * in[2] - kC14[8] * in[4] + kC14[2] * in[6]; + float even5 = + in[0] - kC14[6] * in[2] + kC14[12] * in[4] + kC14[10] * in[6]; + float even6 = in[0] - kC14[2] * in[2] + kC14[4] * in[4] - kC14[6] * in[6]; + float odd0 = + kC14[1] * in[1] + kC14[3] * in[3] + kC14[5] * in[5] + kC14[7] * in[7]; + float odd1 = kC14[3] * in[1] + kC14[9] * in[3] - kC14[13] * in[5] - + kC14[7] * in[7]; + float odd2 = kC14[5] * in[1] - kC14[13] * in[3] - kC14[3] * in[5] - + kC14[7] * in[7]; + float odd3 = + kC14[7] * in[1] - kC14[7] * in[3] - kC14[7] * in[5] + kC14[7] * in[7]; + float odd4 = kC14[9] * in[1] - kC14[1] * in[3] + kC14[11] * in[5] + + kC14[7] * in[7]; + float odd5 = kC14[11] * in[1] - kC14[5] * in[3] + kC14[1] * in[5] - + kC14[7] * in[7]; + float odd6 = kC14[13] * in[1] - kC14[11] * in[3] + kC14[9] * in[5] - + kC14[7] * in[7]; + out[0] = even0 + odd0; + out[13] = even0 - odd0; + out[1] = even1 + odd1; + out[12] = even1 - odd1; + out[2] = even2 + odd2; + out[11] = even2 - odd2; + out[3] = even3 + odd3; + out[10] = even3 - odd3; + out[4] = even4 + odd4; + out[9] = even4 - odd4; + out[5] = even5 + odd5; + out[8] = even5 - odd5; + out[6] = even6 + odd6; + out[7] = even6 - odd6; + break; + } + case 15: { + static constexpr float kC15[15] = { + 1.414213562373, 1.406466352507, 1.383309602960, 1.344997023928, + 1.291948376043, 1.224744871392, 1.144122805635, 1.050965490998, + 0.946293578512, 0.831253875555, 0.707106781187, 0.575212476952, + 0.437016024449, 0.294031532930, 0.147825570407, + }; + float even0 = in[0] + kC15[2] * in[2] + kC15[4] * in[4] + kC15[6] * in[6]; + float even1 = + in[0] + kC15[6] * in[2] + kC15[12] * in[4] - kC15[12] * in[6]; + float even2 = + in[0] + kC15[10] * in[2] - kC15[10] * in[4] - kC15[0] * in[6]; + float even3 = + in[0] + kC15[14] * in[2] - kC15[2] * in[4] - kC15[12] * in[6]; + float even4 = + in[0] - kC15[12] * in[2] - kC15[6] * in[4] + kC15[6] * in[6]; + float even5 = + in[0] - kC15[8] * in[2] - kC15[14] * in[4] + kC15[6] * in[6]; + float even6 = + in[0] - kC15[4] * in[2] + kC15[8] * in[4] - kC15[12] * in[6]; + float even7 = in[0] - kC15[0] * in[2] + kC15[0] * in[4] - kC15[0] * in[6]; + float odd0 = + kC15[1] * in[1] + kC15[3] * in[3] + kC15[5] * in[5] + kC15[7] * in[7]; + float odd1 = kC15[3] * in[1] + kC15[9] * in[3] - kC15[9] * in[7]; + float odd2 = kC15[5] * in[1] - kC15[5] * in[5] - kC15[5] * in[7]; + float odd3 = kC15[7] * in[1] - kC15[9] * in[3] - kC15[5] * in[5] + + kC15[11] * in[7]; + float odd4 = kC15[9] * in[1] - kC15[3] * in[3] + kC15[3] * in[7]; + float odd5 = kC15[11] * in[1] - kC15[3] * in[3] + kC15[5] * in[5] - + kC15[13] * in[7]; + float odd6 = kC15[13] * in[1] - kC15[9] * in[3] + kC15[5] * in[5] - + kC15[1] * in[7]; + out[0] = even0 + odd0; + out[14] = even0 - odd0; + out[1] = even1 + odd1; + out[13] = even1 - odd1; + out[2] = even2 + odd2; + out[12] = even2 - odd2; + out[3] = even3 + odd3; + out[11] = even3 - odd3; + out[4] = even4 + odd4; + out[10] = even4 - odd4; + out[5] = even5 + odd5; + out[9] = even5 - odd5; + out[6] = even6 + odd6; + out[8] = even6 - odd6; + out[7] = even7; + break; + } + case 16: { + static constexpr float kC16[16] = { + 1.414213562373, 1.407403737526, 1.387039845322, 1.353318001174, + 1.306562964876, 1.247225012987, 1.175875602419, 1.093201867002, + 1.000000000000, 0.897167586343, 0.785694958387, 0.666655658478, + 0.541196100146, 0.410524527522, 0.275899379283, 0.138617169199, + }; + float even0 = in[0] + kC16[2] * in[2] + kC16[4] * in[4] + kC16[6] * in[6]; + float even1 = + in[0] + kC16[6] * in[2] + kC16[12] * in[4] - kC16[14] * in[6]; + float even2 = + in[0] + kC16[10] * in[2] - kC16[12] * in[4] - kC16[2] * in[6]; + float even3 = + in[0] + kC16[14] * in[2] - kC16[4] * in[4] - kC16[10] * in[6]; + float even4 = + in[0] - kC16[14] * in[2] - kC16[4] * in[4] + kC16[10] * in[6]; + float even5 = + in[0] - kC16[10] * in[2] - kC16[12] * in[4] + kC16[2] * in[6]; + float even6 = + in[0] - kC16[6] * in[2] + kC16[12] * in[4] + kC16[14] * in[6]; + float even7 = in[0] - kC16[2] * in[2] + kC16[4] * in[4] - kC16[6] * in[6]; + float odd0 = (kC16[1] * in[1] + kC16[3] * in[3] + kC16[5] * in[5] + + kC16[7] * in[7]); + float odd1 = (kC16[3] * in[1] + kC16[9] * in[3] + kC16[15] * in[5] - + kC16[11] * in[7]); + float odd2 = (kC16[5] * in[1] + kC16[15] * in[3] - kC16[7] * in[5] - + kC16[3] * in[7]); + float odd3 = (kC16[7] * in[1] - kC16[11] * in[3] - kC16[3] * in[5] + + kC16[15] * in[7]); + float odd4 = (kC16[9] * in[1] - kC16[5] * in[3] - kC16[13] * in[5] + + kC16[1] * in[7]); + float odd5 = (kC16[11] * in[1] - kC16[1] * in[3] + kC16[9] * in[5] + + kC16[13] * in[7]); + float odd6 = (kC16[13] * in[1] - kC16[7] * in[3] + kC16[1] * in[5] - + kC16[5] * in[7]); + float odd7 = (kC16[15] * in[1] - kC16[13] * in[3] + kC16[11] * in[5] - + kC16[9] * in[7]); + out[0] = even0 + odd0; + out[15] = even0 - odd0; + out[1] = even1 + odd1; + out[14] = even1 - odd1; + out[2] = even2 + odd2; + out[13] = even2 - odd2; + out[3] = even3 + odd3; + out[12] = even3 - odd3; + out[4] = even4 + odd4; + out[11] = even4 - odd4; + out[5] = even5 + odd5; + out[10] = even5 - odd5; + out[6] = even6 + odd6; + out[9] = even6 - odd6; + out[7] = even7 + odd7; + out[8] = even7 - odd7; + break; + } + } +} + +void InverseTransformBlockGeneric(const int16_t* JXL_RESTRICT qblock, + const float* JXL_RESTRICT dequant, + const float* JXL_RESTRICT biases, + float* JXL_RESTRICT scratch_space, + float* JXL_RESTRICT output, + size_t output_stride, size_t dctsize) { + float* JXL_RESTRICT block0 = scratch_space; + float* JXL_RESTRICT block1 = scratch_space + DCTSIZE2; + DequantBlock(qblock, dequant, biases, block0); + if (dctsize == 1) { + *output = *block0; + } else if (dctsize == 2 || dctsize == 4) { + float* JXL_RESTRICT block2 = scratch_space + 2 * DCTSIZE2; + ComputeScaledIDCT(block0, block1, block2, 8); + if (dctsize == 4) { + for (size_t iy = 0; iy < 4; ++iy) { + for (size_t ix = 0; ix < 4; ++ix) { + float* block = &block2[16 * iy + 2 * ix]; + output[iy * output_stride + ix] = + 0.25f * (block[0] + block[1] + block[8] + block[9]); + } + } + } else { + for (size_t iy = 0; iy < 2; ++iy) { + for (size_t ix = 0; ix < 2; ++ix) { + float* block = &block2[32 * iy + 4 * ix]; + output[iy * output_stride + ix] = + 0.0625f * + (block[0] + block[1] + block[2] + block[3] + block[8] + block[9] + + block[10] + block[11] + block[16] + block[17] + block[18] + + block[19] + block[24] + block[25] + block[26] + block[27]); + } + } + } + } else { + float dctin[DCTSIZE]; + float dctout[DCTSIZE * 2]; + size_t insize = std::min(dctsize, DCTSIZE); + for (size_t ix = 0; ix < insize; ++ix) { + for (size_t iy = 0; iy < insize; ++iy) { + dctin[iy] = block0[iy * DCTSIZE + ix]; + } + Compute1dIDCT(dctin, dctout, dctsize); + for (size_t iy = 0; iy < dctsize; ++iy) { + block1[iy * dctsize + ix] = dctout[iy]; + } + } + for (size_t iy = 0; iy < dctsize; ++iy) { + Compute1dIDCT(block1 + iy * dctsize, output + iy * output_stride, + dctsize); + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(InverseTransformBlock8x8); +HWY_EXPORT(InverseTransformBlockGeneric); + +void ChooseInverseTransform(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + for (int c = 0; c < cinfo->num_components; ++c) { + if (m->scaled_dct_size[c] == DCTSIZE) { + m->inverse_transform[c] = HWY_DYNAMIC_DISPATCH(InverseTransformBlock8x8); + } else { + m->inverse_transform[c] = + HWY_DYNAMIC_DISPATCH(InverseTransformBlockGeneric); + } + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/idct.h b/third-party/libjxl/libjxl/lib/jpegli/idct.h new file mode 100644 index 0000000000..c2ec6d18dc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/idct.h @@ -0,0 +1,18 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_IDCT_H_ +#define LIB_JPEGLI_IDCT_H_ + +#include "lib/jpegli/common.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +void ChooseInverseTransform(j_decompress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_IDCT_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/input.cc b/third-party/libjxl/libjxl/lib/jpegli/input.cc new file mode 100644 index 0000000000..765bf98946 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/input.cc @@ -0,0 +1,414 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/input.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/input.cc" +#include +#include + +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_FULL(float); +using DU = HWY_FULL(uint32_t); +using DU8 = Rebind; +using DU16 = Rebind; + +constexpr D d; +constexpr DU du; +constexpr DU8 du8; +constexpr DU16 du16; + +static constexpr double kMul16 = 1.0 / 257.0; +static constexpr double kMulFloat = 255.0; + +template +void ReadUint8Row(const uint8_t* row_in, size_t x0, size_t len, + float* row_out[kMaxComponents]) { + for (size_t x = x0; x < len; ++x) { + for (size_t c = 0; c < C; ++c) { + row_out[c][x] = row_in[C * x + c]; + } + } +} + +template +void ReadUint16Row(const uint8_t* row_in, size_t x0, size_t len, + float* row_out[kMaxComponents]) { + const uint16_t* row16 = reinterpret_cast(row_in); + for (size_t x = x0; x < len; ++x) { + for (size_t c = 0; c < C; ++c) { + uint16_t val = row16[C * x + c]; + if (swap_endianness) val = JXL_BSWAP16(val); + row_out[c][x] = val * kMul16; + } + } +} + +template +void ReadFloatRow(const uint8_t* row_in, size_t x0, size_t len, + float* row_out[kMaxComponents]) { + const float* rowf = reinterpret_cast(row_in); + for (size_t x = x0; x < len; ++x) { + for (size_t c = 0; c < C; ++c) { + float val = rowf[C * x + c]; + if (swap_endianness) val = BSwapFloat(val); + row_out[c][x] = val * kMulFloat; + } + } +} + +void ReadUint8RowSingle(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + for (size_t x = 0; x < simd_len; x += N) { + Store(ConvertTo(d, PromoteTo(du, LoadU(du8, row_in + x))), d, row0 + x); + } + ReadUint8Row<1>(row_in, simd_len, len, row_out); +} + +void ReadUint8RowInterleaved2(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + Vec out0, out1; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved2(du8, row_in + 2 * x, out0, out1); + Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x); + Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x); + } + ReadUint8Row<2>(row_in, simd_len, len, row_out); +} + +void ReadUint8RowInterleaved3(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + Vec out0, out1, out2; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved3(du8, row_in + 3 * x, out0, out1, out2); + Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x); + Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x); + Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x); + } + ReadUint8Row<3>(row_in, simd_len, len, row_out); +} + +void ReadUint8RowInterleaved4(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + float* JXL_RESTRICT const row3 = row_out[3]; + Vec out0, out1, out2, out3; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved4(du8, row_in + 4 * x, out0, out1, out2, out3); + Store(ConvertTo(d, PromoteTo(du, out0)), d, row0 + x); + Store(ConvertTo(d, PromoteTo(du, out1)), d, row1 + x); + Store(ConvertTo(d, PromoteTo(du, out2)), d, row2 + x); + Store(ConvertTo(d, PromoteTo(du, out3)), d, row3 + x); + } + ReadUint8Row<4>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowSingle(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + for (size_t x = 0; x < simd_len; x += N) { + Store(Mul(mul, ConvertTo(d, PromoteTo(du, LoadU(du16, row + x)))), d, + row0 + x); + } + ReadUint16Row<1>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowInterleaved2(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + Vec out0, out1; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved2(du16, row + 2 * x, out0, out1); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x); + } + ReadUint16Row<2>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowInterleaved3(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + Vec out0, out1, out2; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved3(du16, row + 3 * x, out0, out1, out2); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x); + } + ReadUint16Row<3>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowInterleaved4(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMul16); + const uint16_t* JXL_RESTRICT const row = + reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + float* JXL_RESTRICT const row3 = row_out[3]; + Vec out0, out1, out2, out3; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved4(du16, row + 4 * x, out0, out1, out2, out3); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out0))), d, row0 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out1))), d, row1 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out2))), d, row2 + x); + Store(Mul(mul, ConvertTo(d, PromoteTo(du, out3))), d, row3 + x); + } + ReadUint16Row<4>(row_in, simd_len, len, row_out); +} + +void ReadUint16RowSingleSwap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<1, true>(row_in, 0, len, row_out); +} + +void ReadUint16RowInterleaved2Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<2, true>(row_in, 0, len, row_out); +} + +void ReadUint16RowInterleaved3Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<3, true>(row_in, 0, len, row_out); +} + +void ReadUint16RowInterleaved4Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadUint16Row<4, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowSingle(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + for (size_t x = 0; x < simd_len; x += N) { + Store(Mul(mul, LoadU(d, row + x)), d, row0 + x); + } + ReadFloatRow<1>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowInterleaved2(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + Vec out0, out1; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved2(d, row + 2 * x, out0, out1); + Store(Mul(mul, out0), d, row0 + x); + Store(Mul(mul, out1), d, row1 + x); + } + ReadFloatRow<2>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowInterleaved3(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + Vec out0, out1, out2; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved3(d, row + 3 * x, out0, out1, out2); + Store(Mul(mul, out0), d, row0 + x); + Store(Mul(mul, out1), d, row1 + x); + Store(Mul(mul, out2), d, row2 + x); + } + ReadFloatRow<3>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowInterleaved4(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + const size_t N = Lanes(d); + const size_t simd_len = len & (~(N - 1)); + const auto mul = Set(d, kMulFloat); + const float* JXL_RESTRICT const row = reinterpret_cast(row_in); + float* JXL_RESTRICT const row0 = row_out[0]; + float* JXL_RESTRICT const row1 = row_out[1]; + float* JXL_RESTRICT const row2 = row_out[2]; + float* JXL_RESTRICT const row3 = row_out[3]; + Vec out0, out1, out2, out3; + for (size_t x = 0; x < simd_len; x += N) { + LoadInterleaved4(d, row + 4 * x, out0, out1, out2, out3); + Store(Mul(mul, out0), d, row0 + x); + Store(Mul(mul, out1), d, row1 + x); + Store(Mul(mul, out2), d, row2 + x); + Store(Mul(mul, out3), d, row3 + x); + } + ReadFloatRow<4>(row_in, simd_len, len, row_out); +} + +void ReadFloatRowSingleSwap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<1, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowInterleaved2Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<2, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowInterleaved3Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<3, true>(row_in, 0, len, row_out); +} + +void ReadFloatRowInterleaved4Swap(const uint8_t* row_in, size_t len, + float* row_out[kMaxComponents]) { + ReadFloatRow<4, true>(row_in, 0, len, row_out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(ReadUint8RowSingle); +HWY_EXPORT(ReadUint8RowInterleaved2); +HWY_EXPORT(ReadUint8RowInterleaved3); +HWY_EXPORT(ReadUint8RowInterleaved4); +HWY_EXPORT(ReadUint16RowSingle); +HWY_EXPORT(ReadUint16RowInterleaved2); +HWY_EXPORT(ReadUint16RowInterleaved3); +HWY_EXPORT(ReadUint16RowInterleaved4); +HWY_EXPORT(ReadUint16RowSingleSwap); +HWY_EXPORT(ReadUint16RowInterleaved2Swap); +HWY_EXPORT(ReadUint16RowInterleaved3Swap); +HWY_EXPORT(ReadUint16RowInterleaved4Swap); +HWY_EXPORT(ReadFloatRowSingle); +HWY_EXPORT(ReadFloatRowInterleaved2); +HWY_EXPORT(ReadFloatRowInterleaved3); +HWY_EXPORT(ReadFloatRowInterleaved4); +HWY_EXPORT(ReadFloatRowSingleSwap); +HWY_EXPORT(ReadFloatRowInterleaved2Swap); +HWY_EXPORT(ReadFloatRowInterleaved3Swap); +HWY_EXPORT(ReadFloatRowInterleaved4Swap); + +void ChooseInputMethod(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + bool swap_endianness = + (m->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) || + (m->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian()); + m->input_method = nullptr; + if (m->data_type == JPEGLI_TYPE_UINT8) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowSingle); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved2); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved3); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint8RowInterleaved4); + } + } else if (m->data_type == JPEGLI_TYPE_UINT16 && !swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingle); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4); + } + } else if (m->data_type == JPEGLI_TYPE_UINT16 && swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowSingleSwap); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved2Swap); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved3Swap); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadUint16RowInterleaved4Swap); + } + } else if (m->data_type == JPEGLI_TYPE_FLOAT && !swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingle); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4); + } + } else if (m->data_type == JPEGLI_TYPE_FLOAT && swap_endianness) { + if (cinfo->raw_data_in || cinfo->input_components == 1) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowSingleSwap); + } else if (cinfo->input_components == 2) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved2Swap); + } else if (cinfo->input_components == 3) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved3Swap); + } else if (cinfo->input_components == 4) { + m->input_method = HWY_DYNAMIC_DISPATCH(ReadFloatRowInterleaved4Swap); + } + } + if (m->input_method == nullptr) { + JPEGLI_ERROR("Could not find input method."); + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/input.h b/third-party/libjxl/libjxl/lib/jpegli/input.h new file mode 100644 index 0000000000..f54d0bee43 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/input.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_INPUT_H_ +#define LIB_JPEGLI_INPUT_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void ChooseInputMethod(j_compress_ptr cinfo); + +} // namespace jpegli + +#endif // LIB_JPEGLI_INPUT_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/input_suspension_test.cc b/third-party/libjxl/libjxl/lib/jpegli/input_suspension_test.cc new file mode 100644 index 0000000000..565559bcce --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/input_suspension_test.cc @@ -0,0 +1,613 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +namespace jpegli { +namespace { + +static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9}; + +struct SourceManager { + SourceManager(const uint8_t* data, size_t len, size_t max_chunk_size, + bool is_partial_file) + : data_(data), + len_(len), + pos_(0), + max_chunk_size_(max_chunk_size), + is_partial_file_(is_partial_file) { + pub_.init_source = init_source; + pub_.fill_input_buffer = fill_input_buffer; + pub_.next_input_byte = nullptr; + pub_.bytes_in_buffer = 0; + pub_.skip_input_data = skip_input_data; + pub_.resync_to_restart = jpegli_resync_to_restart; + pub_.term_source = term_source; + if (max_chunk_size_ == 0) max_chunk_size_ = len; + } + + ~SourceManager() { + EXPECT_EQ(0, pub_.bytes_in_buffer); + if (!is_partial_file_) { + EXPECT_EQ(len_, pos_); + } + } + + bool LoadNextChunk() { + if (pos_ >= len_ && !is_partial_file_) { + return false; + } + if (pub_.bytes_in_buffer > 0) { + EXPECT_LE(pub_.bytes_in_buffer, buffer_.size()); + memmove(&buffer_[0], pub_.next_input_byte, pub_.bytes_in_buffer); + } + size_t chunk_size = + pos_ < len_ ? std::min(len_ - pos_, max_chunk_size_) : 2; + buffer_.resize(pub_.bytes_in_buffer + chunk_size); + memcpy(&buffer_[pub_.bytes_in_buffer], + pos_ < len_ ? data_ + pos_ : kFakeEoiMarker, chunk_size); + pub_.next_input_byte = &buffer_[0]; + pub_.bytes_in_buffer += chunk_size; + pos_ += chunk_size; + return true; + } + + private: + jpeg_source_mgr pub_; + std::vector buffer_; + const uint8_t* data_; + size_t len_; + size_t pos_; + size_t max_chunk_size_; + bool is_partial_file_; + + static void init_source(j_decompress_ptr cinfo) { + auto src = reinterpret_cast(cinfo->src); + src->pub_.next_input_byte = nullptr; + src->pub_.bytes_in_buffer = 0; + } + + static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; } + + static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + auto src = reinterpret_cast(cinfo->src); + if (num_bytes <= 0) { + return; + } + if (src->pub_.bytes_in_buffer >= static_cast(num_bytes)) { + src->pub_.bytes_in_buffer -= num_bytes; + src->pub_.next_input_byte += num_bytes; + } else { + src->pos_ += num_bytes - src->pub_.bytes_in_buffer; + src->pub_.bytes_in_buffer = 0; + } + } + + static void term_source(j_decompress_ptr cinfo) {} +}; + +uint8_t markers_seen[kMarkerSequenceLen]; +size_t num_markers_seen = 0; + +uint8_t get_next_byte(j_decompress_ptr cinfo) { + cinfo->src->bytes_in_buffer--; + return *cinfo->src->next_input_byte++; +} + +boolean test_marker_processor(j_decompress_ptr cinfo) { + markers_seen[num_markers_seen] = cinfo->unread_marker; + if (cinfo->src->bytes_in_buffer < 2) { + return FALSE; + } + size_t marker_len = (get_next_byte(cinfo) << 8) + get_next_byte(cinfo); + EXPECT_EQ(2 + ((num_markers_seen + 2) % sizeof(kMarkerData)), marker_len); + if (marker_len > 2) { + (*cinfo->src->skip_input_data)(cinfo, marker_len - 2); + } + ++num_markers_seen; + return TRUE; +} + +void ReadOutputImage(const DecompressParams& dparams, j_decompress_ptr cinfo, + SourceManager* src, TestImage* output) { + output->ysize = cinfo->output_height; + output->xsize = cinfo->output_width; + output->components = cinfo->num_components; + if (cinfo->raw_data_out) { + output->color_space = cinfo->jpeg_color_space; + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + std::vector plane(ysize * xsize); + output->raw_data.emplace_back(std::move(plane)); + } + } else { + output->color_space = cinfo->out_color_space; + output->AllocatePixels(); + } + size_t total_output_lines = 0; + while (cinfo->output_scanline < cinfo->output_height) { + size_t max_lines; + size_t num_output_lines; + if (cinfo->raw_data_out) { + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + EXPECT_EQ(cinfo->output_scanline, cinfo->output_iMCU_row * iMCU_height); + max_lines = iMCU_height; + std::vector> rowdata(cinfo->num_components); + std::vector data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE; + rowdata[c].resize(num_lines); + size_t y0 = cinfo->output_iMCU_row * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr; + } + data[c] = &rowdata[c][0]; + } + while ((num_output_lines = + jpegli_read_raw_data(cinfo, &data[0], max_lines)) == 0) { + JXL_CHECK(src && src->LoadNextChunk()); + } + } else { + size_t max_output_lines = dparams.max_output_lines; + if (max_output_lines == 0) max_output_lines = cinfo->output_height; + size_t lines_left = cinfo->output_height - cinfo->output_scanline; + max_lines = std::min(max_output_lines, lines_left); + size_t stride = cinfo->output_width * cinfo->num_components; + std::vector scanlines(max_lines); + for (size_t i = 0; i < max_lines; ++i) { + size_t yidx = cinfo->output_scanline + i; + scanlines[i] = &output->pixels[yidx * stride]; + } + while ((num_output_lines = jpegli_read_scanlines(cinfo, &scanlines[0], + max_lines)) == 0) { + JXL_CHECK(src && src->LoadNextChunk()); + } + } + total_output_lines += num_output_lines; + EXPECT_EQ(total_output_lines, cinfo->output_scanline); + if (num_output_lines < max_lines) { + JXL_CHECK(src && src->LoadNextChunk()); + } + } +} + +struct TestConfig { + std::string fn; + std::string fn_desc; + TestImage input; + CompressParams jparams; + DecompressParams dparams; + float max_rms_dist = 1.0f; +}; + +std::vector GetTestJpegData(TestConfig& config) { + if (!config.fn.empty()) { + return ReadTestData(config.fn.c_str()); + } + GeneratePixels(&config.input); + std::vector compressed; + JXL_CHECK(EncodeWithJpegli(config.input, config.jparams, &compressed)); + return compressed; +} + +bool IsSequential(const TestConfig& config) { + if (!config.fn.empty()) { + return config.fn_desc.find("PROGR") == std::string::npos; + } + return config.jparams.progressive_mode <= 0; +} + +class InputSuspensionTestParam : public ::testing::TestWithParam {}; + +TEST_P(InputSuspensionTestParam, InputOutputLockStepNonBuffered) { + TestConfig config = GetParam(); + const DecompressParams& dparams = config.dparams; + std::vector compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast(&src); + + if (config.jparams.add_marker) { + jpegli_save_markers(&cinfo, kSpecialMarker0, 0xffff); + jpegli_save_markers(&cinfo, kSpecialMarker1, 0xffff); + num_markers_seen = 0; + jpegli_set_marker_processor(&cinfo, 0xe6, test_marker_processor); + jpegli_set_marker_processor(&cinfo, 0xe7, test_marker_processor); + jpegli_set_marker_processor(&cinfo, 0xe8, test_marker_processor); + } + while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + SetDecompressParams(dparams, &cinfo); + jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness); + if (config.jparams.add_marker) { + EXPECT_EQ(num_markers_seen, kMarkerSequenceLen); + EXPECT_EQ(0, memcmp(markers_seen, kMarkerSequence, num_markers_seen)); + } + VerifyHeader(config.jparams, &cinfo); + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays; + while ((coef_arrays = jpegli_read_coefficients(&cinfo)) == nullptr) { + JXL_CHECK(src.LoadNextChunk()); + } + CopyCoefficients(&cinfo, coef_arrays, &output0); + } else { + while (!jpegli_start_decompress(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + ReadOutputImage(dparams, &cinfo, &src, &output0); + } + + while (!jpegli_finish_decompress(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1); + VerifyOutputImage(output1, output0, config.max_rms_dist); +} + +TEST_P(InputSuspensionTestParam, InputOutputLockStepBuffered) { + TestConfig config = GetParam(); + if (config.jparams.add_marker) return; + const DecompressParams& dparams = config.dparams; + std::vector compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + std::vector output_progression0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + + cinfo.src = reinterpret_cast(&src); + + while (jpegli_read_header(&cinfo, TRUE) == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + SetDecompressParams(dparams, &cinfo); + jpegli_set_output_format(&cinfo, dparams.data_type, dparams.endianness); + + cinfo.buffered_image = TRUE; + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + + EXPECT_TRUE(jpegli_start_decompress(&cinfo)); + EXPECT_FALSE(jpegli_input_complete(&cinfo)); + EXPECT_EQ(0, cinfo.output_scan_number); + + int sos_marker_cnt = 1; // read_header reads the first SOS marker + while (!jpegli_input_complete(&cinfo)) { + EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt); + EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number)); + // start output sets output_scan_number, but does not change + // input_scan_number + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt); + TestImage output; + ReadOutputImage(dparams, &cinfo, &src, &output); + output_progression0.emplace_back(std::move(output)); + // read scanlines/read raw data does not change input/output scan number + EXPECT_EQ(cinfo.input_scan_number, sos_marker_cnt); + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + while (!jpegli_finish_output(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + ++sos_marker_cnt; // finish output reads the next SOS marker or EOI + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output_progression0.back()); + } + } + + EXPECT_TRUE(jpegli_finish_decompress(&cinfo)); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + std::vector output_progression1; + DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed, + &output_progression1); + ASSERT_EQ(output_progression0.size(), output_progression1.size()); + for (size_t i = 0; i < output_progression0.size(); ++i) { + const TestImage& output = output_progression0[i]; + const TestImage& expected = output_progression1[i]; + VerifyOutputImage(expected, output, config.max_rms_dist); + } +} + +TEST_P(InputSuspensionTestParam, PreConsumeInputBuffered) { + TestConfig config = GetParam(); + if (config.jparams.add_marker) return; + const DecompressParams& dparams = config.dparams; + std::vector compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + std::vector output_progression1; + DecodeAllScansWithLibjpeg(config.jparams, dparams, compressed, + &output_progression1); + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast(&src); + + int status; + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo)); + cinfo.buffered_image = TRUE; + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + cinfo.do_block_smoothing = dparams.do_block_smoothing; + + EXPECT_TRUE(jpegli_start_decompress(&cinfo)); + EXPECT_FALSE(jpegli_input_complete(&cinfo)); + EXPECT_EQ(1, cinfo.input_scan_number); + EXPECT_EQ(0, cinfo.output_scan_number); + + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + + EXPECT_TRUE(jpegli_input_complete(&cinfo)); + EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number); + EXPECT_EQ(0, cinfo.output_scan_number); + + EXPECT_TRUE(jpegli_start_output(&cinfo, cinfo.input_scan_number)); + EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number); + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + + ReadOutputImage(dparams, &cinfo, nullptr, &output0); + EXPECT_EQ(output_progression1.size(), cinfo.input_scan_number); + EXPECT_EQ(cinfo.output_scan_number, cinfo.input_scan_number); + + EXPECT_TRUE(jpegli_finish_output(&cinfo)); + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output0); + } + EXPECT_TRUE(jpegli_finish_decompress(&cinfo)); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + VerifyOutputImage(output_progression1.back(), output0, config.max_rms_dist); +} + +TEST_P(InputSuspensionTestParam, PreConsumeInputNonBuffered) { + TestConfig config = GetParam(); + if (config.jparams.add_marker || IsSequential(config)) return; + const DecompressParams& dparams = config.dparams; + std::vector compressed = GetTestJpegData(config); + bool is_partial = config.dparams.size_factor < 1.0f; + if (is_partial) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + SourceManager src(compressed.data(), compressed.size(), dparams.chunk_size, + is_partial); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + cinfo.src = reinterpret_cast(&src); + + int status; + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_SOS) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + EXPECT_EQ(JPEG_REACHED_SOS, jpegli_consume_input(&cinfo)); + cinfo.raw_data_out = dparams.output_mode == RAW_DATA; + cinfo.do_block_smoothing = dparams.do_block_smoothing; + + if (dparams.output_mode == COEFFICIENTS) { + jpegli_read_coefficients(&cinfo); + } else { + while (!jpegli_start_decompress(&cinfo)) { + JXL_CHECK(src.LoadNextChunk()); + } + } + + while ((status = jpegli_consume_input(&cinfo)) != JPEG_REACHED_EOI) { + if (status == JPEG_SUSPENDED) { + JXL_CHECK(src.LoadNextChunk()); + } + } + + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output0); + } else { + ReadOutputImage(dparams, &cinfo, nullptr, &output0); + } + + EXPECT_TRUE(jpegli_finish_decompress(&cinfo)); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(config.jparams, dparams, compressed, &output1); + VerifyOutputImage(output1, output0, config.max_rms_dist); +} + +std::vector GenerateTests() { + std::vector all_tests; + std::vector> testfiles({ + {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"}, + {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"}, + {"jxl/flower/flower.png.im_q85_420_progr.jpg", "Q85YUV420PROGR"}, + }); + for (const auto& it : testfiles) { + for (size_t chunk_size : {1, 64, 65536}) { + for (size_t max_output_lines : {0, 1, 8, 16}) { + TestConfig config; + config.fn = it.first; + config.fn_desc = it.second; + config.dparams.chunk_size = chunk_size; + config.dparams.max_output_lines = max_output_lines; + all_tests.push_back(config); + if (max_output_lines == 16) { + config.dparams.output_mode = RAW_DATA; + all_tests.push_back(config); + config.dparams.output_mode = COEFFICIENTS; + all_tests.push_back(config); + } + } + } + } + for (size_t r : {1, 17, 1024}) { + for (size_t chunk_size : {1, 65536}) { + TestConfig config; + config.dparams.chunk_size = chunk_size; + config.jparams.progressive_mode = 2; + config.jparams.restart_interval = r; + all_tests.push_back(config); + } + } + for (size_t chunk_size : {1, 4, 1024}) { + TestConfig config; + config.input.xsize = 256; + config.input.ysize = 256; + config.dparams.chunk_size = chunk_size; + config.jparams.add_marker = true; + all_tests.push_back(config); + } + // Tests for partial input. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) { + for (int progr : {0, 1, 3}) { + for (int samp : {1, 2}) { + for (JpegIOMode output_mode : {PIXELS, RAW_DATA}) { + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = progr; + config.dparams.size_factor = size_factor; + config.dparams.output_mode = output_mode; + // The last partially available block can behave differently. + // TODO(szabadka) Figure out if we can make the behaviour more + // similar. + config.max_rms_dist = samp == 1 ? 1.75f : 3.0f; + all_tests.push_back(config); + } + } + } + } + // Tests for block smoothing. + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f, 1.0f}) { + for (int samp : {1, 2}) { + TestConfig config; + config.input.xsize = 517; + config.input.ysize = 523; + config.jparams.h_sampling = {samp, 1, 1}; + config.jparams.v_sampling = {samp, 1, 1}; + config.jparams.progressive_mode = 2; + config.dparams.size_factor = size_factor; + config.dparams.do_block_smoothing = true; + // libjpeg does smoothing for incomplete scans differently at + // the border between current and previous scans. + config.max_rms_dist = 8.0f; + all_tests.push_back(config); + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + if (!c.fn.empty()) { + os << c.fn_desc; + } else { + os << c.input; + } + os << c.jparams; + if (c.dparams.chunk_size == 0) { + os << "CompleteInput"; + } else { + os << "InputChunks" << c.dparams.chunk_size; + } + if (c.dparams.size_factor < 1.0f) { + os << "Partial" << static_cast(c.dparams.size_factor * 100) << "p"; + } + if (c.dparams.max_output_lines == 0) { + os << "CompleteOutput"; + } else { + os << "OutputLines" << c.dparams.max_output_lines; + } + if (c.dparams.output_mode == RAW_DATA) { + os << "RawDataOut"; + } else if (c.dparams.output_mode == COEFFICIENTS) { + os << "CoeffsOut"; + } + if (c.dparams.do_block_smoothing) { + os << "BlockSmoothing"; + } + return os; +} + +std::string TestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(InputSuspensionTest, InputSuspensionTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.62 b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.62 new file mode 100644 index 0000000000..3a8d1f5ec5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.62 @@ -0,0 +1,11 @@ +LIBJPEG_6.2 { + global: + jpeg*; +}; + +LIBJPEGTURBO_6.2 { + global: + jpeg_mem_src*; + jpeg_mem_dest*; + tj*; +}; \ No newline at end of file diff --git a/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.8 b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.8 new file mode 100644 index 0000000000..aa891f8571 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/jpeg.version.8 @@ -0,0 +1,9 @@ +LIBJPEG_8.0 { + global: + jpeg*; +}; + +LIBJPEGTURBO_8.0 { + global: + tj*; +}; diff --git a/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.cc b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.cc new file mode 100644 index 0000000000..de2303756e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.cc @@ -0,0 +1,261 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/libjpeg_test_util.h" + +/* clang-format off */ +#include +#include +#include +/* clang-format on */ + +#include "lib/jxl/sanitizers.h" + +namespace jpegli { + +namespace { + +#define JPEG_API_FN(name) jpeg_##name +#include "lib/jpegli/test_utils-inl.h" +#undef JPEG_API_FN + +void ReadOutputPass(j_decompress_ptr cinfo, const DecompressParams& dparams, + TestImage* output) { + JDIMENSION xoffset = 0; + JDIMENSION yoffset = 0; + JDIMENSION xsize_cropped = cinfo->output_width; + JDIMENSION ysize_cropped = cinfo->output_height; + if (dparams.crop_output) { + xoffset = xsize_cropped = cinfo->output_width / 3; + yoffset = ysize_cropped = cinfo->output_height / 3; + jpeg_crop_scanline(cinfo, &xoffset, &xsize_cropped); + JXL_CHECK(xsize_cropped == cinfo->output_width); + } + output->xsize = xsize_cropped; + output->ysize = ysize_cropped; + output->components = cinfo->out_color_components; + if (cinfo->quantize_colors) { + jxl::msan::UnpoisonMemory(cinfo->colormap, cinfo->out_color_components * + sizeof(cinfo->colormap[0])); + for (int c = 0; c < cinfo->out_color_components; ++c) { + jxl::msan::UnpoisonMemory( + cinfo->colormap[c], + cinfo->actual_number_of_colors * sizeof(cinfo->colormap[c][0])); + } + } + if (!cinfo->raw_data_out) { + size_t stride = output->xsize * output->components; + output->pixels.resize(output->ysize * stride); + output->color_space = cinfo->out_color_space; + if (yoffset > 0) { + jpeg_skip_scanlines(cinfo, yoffset); + } + for (size_t y = 0; y < output->ysize; ++y) { + JSAMPROW rows[] = { + reinterpret_cast(&output->pixels[y * stride])}; + JXL_CHECK(1 == jpeg_read_scanlines(cinfo, rows, 1)); + jxl::msan::UnpoisonMemory( + rows[0], sizeof(JSAMPLE) * cinfo->output_components * output->xsize); + if (cinfo->quantize_colors) { + UnmapColors(rows[0], cinfo->output_width, cinfo->out_color_components, + cinfo->colormap, cinfo->actual_number_of_colors); + } + } + if (cinfo->output_scanline < cinfo->output_height) { + jpeg_skip_scanlines(cinfo, cinfo->output_height - cinfo->output_scanline); + } + } else { + output->color_space = cinfo->jpeg_color_space; + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + std::vector plane(ysize * xsize); + output->raw_data.emplace_back(std::move(plane)); + } + while (cinfo->output_scanline < cinfo->output_height) { + size_t iMCU_height = cinfo->max_v_samp_factor * DCTSIZE; + JXL_CHECK(cinfo->output_scanline == cinfo->output_iMCU_row * iMCU_height); + std::vector> rowdata(cinfo->num_components); + std::vector data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t ysize = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = cinfo->comp_info[c].v_samp_factor * DCTSIZE; + rowdata[c].resize(num_lines); + size_t y0 = cinfo->output_iMCU_row * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + y0 + i < ysize ? &output->raw_data[c][(y0 + i) * xsize] : nullptr; + } + data[c] = &rowdata[c][0]; + } + JXL_CHECK(iMCU_height == + jpeg_read_raw_data(cinfo, &data[0], iMCU_height)); + } + } + JXL_CHECK(cinfo->total_iMCU_rows == + DivCeil(cinfo->image_height, cinfo->max_v_samp_factor * DCTSIZE)); +} + +void DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, j_decompress_ptr cinfo, + TestImage* output) { + if (jparams.add_marker) { + jpeg_save_markers(cinfo, kSpecialMarker0, 0xffff); + jpeg_save_markers(cinfo, kSpecialMarker1, 0xffff); + } + if (!jparams.icc.empty()) { + jpeg_save_markers(cinfo, JPEG_APP0 + 2, 0xffff); + } + JXL_CHECK(JPEG_REACHED_SOS == + jpeg_read_header(cinfo, /*require_image=*/TRUE)); + if (!jparams.icc.empty()) { + uint8_t* icc_data = nullptr; + unsigned int icc_len; + JXL_CHECK(jpeg_read_icc_profile(cinfo, &icc_data, &icc_len)); + JXL_CHECK(icc_data); + jxl::msan::UnpoisonMemory(icc_data, icc_len); + JXL_CHECK(0 == memcmp(jparams.icc.data(), icc_data, icc_len)); + free(icc_data); + } + SetDecompressParams(dparams, cinfo); + VerifyHeader(jparams, cinfo); + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(cinfo, coef_arrays, output); + } else { + JXL_CHECK(jpeg_start_decompress(cinfo)); + VerifyScanHeader(jparams, cinfo); + ReadOutputPass(cinfo, dparams, output); + } + JXL_CHECK(jpeg_finish_decompress(cinfo)); +} + +} // namespace + +// Verifies that an image encoded with libjpegli can be decoded with libjpeg, +// and checks that the jpeg coding metadata matches jparams. +void DecodeAllScansWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector& compressed, + std::vector* output_progression) { + jpeg_decompress_struct cinfo = {}; + const auto try_catch_block = [&]() { + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpeg_std_error(&jerr); + if (setjmp(env)) { + return false; + } + cinfo.client_data = reinterpret_cast(&env); + cinfo.err->error_exit = [](j_common_ptr cinfo) { + (*cinfo->err->output_message)(cinfo); + jmp_buf* env = reinterpret_cast(cinfo->client_data); + jpeg_destroy(cinfo); + longjmp(*env, 1); + }; + jpeg_create_decompress(&cinfo); + jpeg_mem_src(&cinfo, compressed.data(), compressed.size()); + if (jparams.add_marker) { + jpeg_save_markers(&cinfo, kSpecialMarker0, 0xffff); + jpeg_save_markers(&cinfo, kSpecialMarker1, 0xffff); + } + JXL_CHECK(JPEG_REACHED_SOS == + jpeg_read_header(&cinfo, /*require_image=*/TRUE)); + cinfo.buffered_image = TRUE; + SetDecompressParams(dparams, &cinfo); + VerifyHeader(jparams, &cinfo); + JXL_CHECK(jpeg_start_decompress(&cinfo)); + // start decompress should not read the whole input in buffered image mode + JXL_CHECK(!jpeg_input_complete(&cinfo)); + JXL_CHECK(cinfo.output_scan_number == 0); + int sos_marker_cnt = 1; // read header reads the first SOS marker + while (!jpeg_input_complete(&cinfo)) { + JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt); + if (dparams.skip_scans && (cinfo.input_scan_number % 2) != 1) { + int result = JPEG_SUSPENDED; + while (result != JPEG_REACHED_SOS && result != JPEG_REACHED_EOI) { + result = jpeg_consume_input(&cinfo); + } + if (result == JPEG_REACHED_SOS) ++sos_marker_cnt; + continue; + } + SetScanDecompressParams(dparams, &cinfo, cinfo.input_scan_number); + JXL_CHECK(jpeg_start_output(&cinfo, cinfo.input_scan_number)); + // start output sets output_scan_number, but does not change + // input_scan_number + JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number); + JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt); + VerifyScanHeader(jparams, &cinfo); + TestImage output; + ReadOutputPass(&cinfo, dparams, &output); + output_progression->emplace_back(std::move(output)); + // read scanlines/read raw data does not change input/output scan number + if (!cinfo.progressive_mode) { + JXL_CHECK(cinfo.input_scan_number == sos_marker_cnt); + JXL_CHECK(cinfo.output_scan_number == cinfo.input_scan_number); + } + JXL_CHECK(jpeg_finish_output(&cinfo)); + ++sos_marker_cnt; // finish output reads the next SOS marker or EOI + if (dparams.output_mode == COEFFICIENTS) { + jvirt_barray_ptr* coef_arrays = jpeg_read_coefficients(&cinfo); + JXL_CHECK(coef_arrays != nullptr); + CopyCoefficients(&cinfo, coef_arrays, &output_progression->back()); + } + } + JXL_CHECK(jpeg_finish_decompress(&cinfo)); + return true; + }; + JXL_CHECK(try_catch_block()); + jpeg_destroy_decompress(&cinfo); +} + +// Returns the number of bytes read from compressed. +size_t DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const uint8_t* table_stream, size_t table_stream_size, + const uint8_t* compressed, size_t len, + TestImage* output) { + jpeg_decompress_struct cinfo = {}; + size_t bytes_read; + const auto try_catch_block = [&]() { + jpeg_error_mgr jerr; + jmp_buf env; + cinfo.err = jpeg_std_error(&jerr); + if (setjmp(env)) { + return false; + } + cinfo.client_data = reinterpret_cast(&env); + cinfo.err->error_exit = [](j_common_ptr cinfo) { + (*cinfo->err->output_message)(cinfo); + jmp_buf* env = reinterpret_cast(cinfo->client_data); + jpeg_destroy(cinfo); + longjmp(*env, 1); + }; + jpeg_create_decompress(&cinfo); + if (table_stream != nullptr) { + jpeg_mem_src(&cinfo, table_stream, table_stream_size); + jpeg_read_header(&cinfo, FALSE); + } + jpeg_mem_src(&cinfo, compressed, len); + DecodeWithLibjpeg(jparams, dparams, &cinfo, output); + bytes_read = len - cinfo.src->bytes_in_buffer; + return true; + }; + JXL_CHECK(try_catch_block()); + jpeg_destroy_decompress(&cinfo); + return bytes_read; +} + +void DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector& compressed, + TestImage* output) { + DecodeWithLibjpeg(jparams, dparams, nullptr, 0, compressed.data(), + compressed.size(), output); +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.h b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.h new file mode 100644 index 0000000000..18cc1e57b5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_test_util.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_ +#define LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_ + +#include +#include + +#include + +#include "lib/jpegli/test_params.h" + +namespace jpegli { + +// Verifies that an image encoded with libjpegli can be decoded with libjpeg, +// and checks that the jpeg coding metadata matches jparams. +void DecodeAllScansWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector& compressed, + std::vector* output_progression); +// Returns the number of bytes read from compressed. +size_t DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const uint8_t* table_stream, size_t table_stream_size, + const uint8_t* compressed, size_t len, + TestImage* output); +void DecodeWithLibjpeg(const CompressParams& jparams, + const DecompressParams& dparams, + const std::vector& compressed, + TestImage* output); + +} // namespace jpegli + +#endif // LIB_JPEGLI_LIBJPEG_TEST_UTIL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/libjpeg_wrapper.cc b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_wrapper.cc new file mode 100644 index 0000000000..b38d16f255 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/libjpeg_wrapper.cc @@ -0,0 +1,255 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// This file contains wrapper-functions that are used to build the libjpeg.so +// shared library that is API- and ABI-compatible with libjpeg-turbo's version +// of libjpeg.so. + +#include "lib/jpegli/common.h" +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/error.h" + +struct jpeg_error_mgr *jpeg_std_error(struct jpeg_error_mgr *err) { + return jpegli_std_error(err); +} + +void jpeg_abort(j_common_ptr cinfo) { jpegli_abort(cinfo); } + +void jpeg_destroy(j_common_ptr cinfo) { jpegli_destroy(cinfo); } + +JQUANT_TBL *jpeg_alloc_quant_table(j_common_ptr cinfo) { + return jpegli_alloc_quant_table(cinfo); +} + +JHUFF_TBL *jpeg_alloc_huff_table(j_common_ptr cinfo) { + return jpegli_alloc_huff_table(cinfo); +} + +void jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, + size_t structsize) { + jpegli_CreateDecompress(cinfo, version, structsize); +} + +void jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile) { + jpegli_stdio_src(cinfo, infile); +} + +void jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer, + unsigned long insize) { + jpegli_mem_src(cinfo, inbuffer, insize); +} + +int jpeg_read_header(j_decompress_ptr cinfo, boolean require_image) { + return jpegli_read_header(cinfo, require_image); +} + +boolean jpeg_start_decompress(j_decompress_ptr cinfo) { + return jpegli_start_decompress(cinfo); +} + +JDIMENSION jpeg_read_scanlines(j_decompress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION max_lines) { + return jpegli_read_scanlines(cinfo, scanlines, max_lines); +} + +JDIMENSION jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines) { + return jpegli_skip_scanlines(cinfo, num_lines); +} + +void jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset, + JDIMENSION *width) { + jpegli_crop_scanline(cinfo, xoffset, width); +} + +boolean jpeg_finish_decompress(j_decompress_ptr cinfo) { + return jpegli_finish_decompress(cinfo); +} + +JDIMENSION jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION max_lines) { + return jpegli_read_raw_data(cinfo, data, max_lines); +} + +jvirt_barray_ptr *jpeg_read_coefficients(j_decompress_ptr cinfo) { + return jpegli_read_coefficients(cinfo); +} + +boolean jpeg_has_multiple_scans(j_decompress_ptr cinfo) { + return jpegli_has_multiple_scans(cinfo); +} + +boolean jpeg_start_output(j_decompress_ptr cinfo, int scan_number) { + return jpegli_start_output(cinfo, scan_number); +} + +boolean jpeg_finish_output(j_decompress_ptr cinfo) { + return jpegli_finish_output(cinfo); +} + +boolean jpeg_input_complete(j_decompress_ptr cinfo) { + return jpegli_input_complete(cinfo); +} + +int jpeg_consume_input(j_decompress_ptr cinfo) { + return jpegli_consume_input(cinfo); +} + +#if JPEG_LIB_VERSION >= 80 +void jpeg_core_output_dimensions(j_decompress_ptr cinfo) { + jpegli_core_output_dimensions(cinfo); +} +#endif +void jpeg_calc_output_dimensions(j_decompress_ptr cinfo) { + jpegli_calc_output_dimensions(cinfo); +} + +void jpeg_save_markers(j_decompress_ptr cinfo, int marker_code, + unsigned int length_limit) { + jpegli_save_markers(cinfo, marker_code, length_limit); +} + +void jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code, + jpeg_marker_parser_method routine) { + jpegli_set_marker_processor(cinfo, marker_code, routine); +} + +boolean jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr, + unsigned int *icc_data_len) { + return jpegli_read_icc_profile(cinfo, icc_data_ptr, icc_data_len); +} + +void jpeg_abort_decompress(j_decompress_ptr cinfo) { + return jpegli_abort_decompress(cinfo); +} + +void jpeg_destroy_decompress(j_decompress_ptr cinfo) { + return jpegli_destroy_decompress(cinfo); +} + +void jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize) { + jpegli_CreateCompress(cinfo, version, structsize); +} + +void jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile) { + jpegli_stdio_dest(cinfo, outfile); +} + +void jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer, + unsigned long *outsize) { + jpegli_mem_dest(cinfo, outbuffer, outsize); +} + +void jpeg_set_defaults(j_compress_ptr cinfo) { jpegli_set_defaults(cinfo); } + +void jpeg_default_colorspace(j_compress_ptr cinfo) { + jpegli_default_colorspace(cinfo); +} + +void jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace) { + jpegli_set_colorspace(cinfo, colorspace); +} + +void jpeg_set_quality(j_compress_ptr cinfo, int quality, + boolean force_baseline) { + jpegli_set_quality(cinfo, quality, force_baseline); +} + +void jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor, + boolean force_baseline) { + jpegli_set_linear_quality(cinfo, scale_factor, force_baseline); +} + +#if JPEG_LIB_VERSION >= 70 +void jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline) { + jpegli_default_qtables(cinfo, force_baseline); +} +#endif + +int jpeg_quality_scaling(int quality) { + return jpegli_quality_scaling(quality); +} + +void jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl, + const unsigned int *basic_table, int scale_factor, + boolean force_baseline) { + jpegli_add_quant_table(cinfo, which_tbl, basic_table, scale_factor, + force_baseline); +} + +void jpeg_simple_progression(j_compress_ptr cinfo) { + jpegli_simple_progression(cinfo); +} + +void jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress) { + jpegli_suppress_tables(cinfo, suppress); +} + +#if JPEG_LIB_VERSION >= 70 +void jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo) { + jpegli_calc_jpeg_dimensions(cinfo); +} +#endif + +void jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, + j_compress_ptr dstinfo) { + jpegli_copy_critical_parameters(srcinfo, dstinfo); +} + +void jpeg_write_m_header(j_compress_ptr cinfo, int marker, + unsigned int datalen) { + jpegli_write_m_header(cinfo, marker, datalen); +} + +void jpeg_write_m_byte(j_compress_ptr cinfo, int val) { + jpegli_write_m_byte(cinfo, val); +} + +void jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr, + unsigned int datalen) { + jpegli_write_marker(cinfo, marker, dataptr, datalen); +} + +void jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr, + unsigned int icc_data_len) { + jpegli_write_icc_profile(cinfo, icc_data_ptr, icc_data_len); +} + +void jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables) { + jpegli_start_compress(cinfo, write_all_tables); +} + +void jpeg_write_tables(j_compress_ptr cinfo) { jpegli_write_tables(cinfo); } + +JDIMENSION jpeg_write_scanlines(j_compress_ptr cinfo, JSAMPARRAY scanlines, + JDIMENSION num_lines) { + return jpegli_write_scanlines(cinfo, scanlines, num_lines); +} + +JDIMENSION jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data, + JDIMENSION num_lines) { + return jpegli_write_raw_data(cinfo, data, num_lines); +} + +void jpeg_write_coefficients(j_compress_ptr cinfo, + jvirt_barray_ptr *coef_arrays) { + jpegli_write_coefficients(cinfo, coef_arrays); +} + +void jpeg_finish_compress(j_compress_ptr cinfo) { + jpegli_finish_compress(cinfo); +} + +void jpeg_abort_compress(j_compress_ptr cinfo) { jpegli_abort_compress(cinfo); } + +void jpeg_destroy_compress(j_compress_ptr cinfo) { + jpegli_destroy_compress(cinfo); +} + +boolean jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired) { + return jpegli_resync_to_restart(cinfo, desired); +} + +void jpeg_new_colormap(j_decompress_ptr cinfo) { jpegli_new_colormap(cinfo); } diff --git a/third-party/libjxl/libjxl/lib/jpegli/memory_manager.cc b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.cc new file mode 100644 index 0000000000..3a8f230e63 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.cc @@ -0,0 +1,186 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/memory_manager.h" + +#include + +#include +#include + +#include "lib/jpegli/common_internal.h" +#include "lib/jpegli/error.h" + +struct jvirt_sarray_control { + JSAMPARRAY full_buffer; + size_t numrows; + JDIMENSION maxaccess; +}; + +struct jvirt_barray_control { + JBLOCKARRAY full_buffer; + size_t numrows; + JDIMENSION maxaccess; +}; + +namespace jpegli { + +namespace { + +struct MemoryManager { + struct jpeg_memory_mgr pub; + std::vector owned_ptrs[2 * JPOOL_NUMPOOLS]; + uint64_t pool_memory_usage[2 * JPOOL_NUMPOOLS]; + uint64_t total_memory_usage; + uint64_t peak_memory_usage; +}; + +void* Alloc(j_common_ptr cinfo, int pool_id, size_t sizeofobject) { + MemoryManager* mem = reinterpret_cast(cinfo->mem); + if (pool_id < 0 || pool_id >= 2 * JPOOL_NUMPOOLS) { + JPEGLI_ERROR("Invalid pool id %d", pool_id); + } + if (mem->pub.max_memory_to_use > 0 && + mem->total_memory_usage + static_cast(sizeofobject) > + static_cast(mem->pub.max_memory_to_use)) { + JPEGLI_ERROR("Total memory usage exceeding %ld", + mem->pub.max_memory_to_use); + } + void* p; + if (pool_id < JPOOL_NUMPOOLS) { + p = malloc(sizeofobject); + } else { + p = hwy::AllocateAlignedBytes(sizeofobject, nullptr, nullptr); + } + if (p == nullptr) { + JPEGLI_ERROR("Out of memory"); + } + mem->owned_ptrs[pool_id].push_back(p); + mem->pool_memory_usage[pool_id] += sizeofobject; + mem->total_memory_usage += sizeofobject; + mem->peak_memory_usage = + std::max(mem->peak_memory_usage, mem->total_memory_usage); + return p; +} + +constexpr size_t gcd(size_t a, size_t b) { return b == 0 ? a : gcd(b, a % b); } +constexpr size_t lcm(size_t a, size_t b) { return (a * b) / gcd(a, b); } + +template +T** Alloc2dArray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow, + JDIMENSION numrows) { + T** array = Allocate(cinfo, numrows, pool_id); + // Always use aligned allocator for large 2d arrays. + if (pool_id < JPOOL_NUMPOOLS) { + pool_id += JPOOL_NUMPOOLS; + } + size_t alignment = lcm(sizeof(T), HWY_ALIGNMENT); + size_t memstride = RoundUpTo(samplesperrow * sizeof(T), alignment); + size_t stride = memstride / sizeof(T); + T* buffer = Allocate(cinfo, numrows * stride, pool_id); + for (size_t i = 0; i < numrows; ++i) { + array[i] = &buffer[i * stride]; + } + return array; +} + +template +Control* RequestVirtualArray(j_common_ptr cinfo, int pool_id, boolean pre_zero, + JDIMENSION samplesperrow, JDIMENSION numrows, + JDIMENSION maxaccess) { + if (pool_id != JPOOL_IMAGE) { + JPEGLI_ERROR("Only image lifetime virtual arrays are supported."); + } + Control* p = Allocate(cinfo, 1, pool_id); + p->full_buffer = Alloc2dArray(cinfo, pool_id, samplesperrow, numrows); + p->numrows = numrows; + p->maxaccess = maxaccess; + if (pre_zero) { + for (size_t i = 0; i < numrows; ++i) { + memset(p->full_buffer[i], 0, samplesperrow * sizeof(T)); + } + } + return p; +} + +void RealizeVirtualArrays(j_common_ptr cinfo) { + // Nothing to do, the full arrays were realized at request time already. +} + +template +T** AccessVirtualArray(j_common_ptr cinfo, Control* ptr, JDIMENSION start_row, + JDIMENSION num_rows, boolean writable) { + if (num_rows > ptr->maxaccess) { + JPEGLI_ERROR("Invalid virtual array access, num rows %u vs max rows %u", + num_rows, ptr->maxaccess); + } + if (start_row + num_rows > ptr->numrows) { + JPEGLI_ERROR("Invalid virtual array access, %u vs %u total rows", + start_row + num_rows, ptr->numrows); + } + if (ptr->full_buffer == nullptr) { + JPEGLI_ERROR("Invalid virtual array access, array not realized."); + } + return ptr->full_buffer + start_row; +} + +void ClearPool(j_common_ptr cinfo, int pool_id) { + MemoryManager* mem = reinterpret_cast(cinfo->mem); + mem->owned_ptrs[pool_id].clear(); + mem->total_memory_usage -= mem->pool_memory_usage[pool_id]; + mem->pool_memory_usage[pool_id] = 0; +} + +void FreePool(j_common_ptr cinfo, int pool_id) { + MemoryManager* mem = reinterpret_cast(cinfo->mem); + if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS) { + JPEGLI_ERROR("Invalid pool id %d", pool_id); + } + for (void* ptr : mem->owned_ptrs[pool_id]) { + free(ptr); + } + ClearPool(cinfo, pool_id); + for (void* ptr : mem->owned_ptrs[JPOOL_NUMPOOLS + pool_id]) { + hwy::FreeAlignedBytes(ptr, nullptr, nullptr); + } + ClearPool(cinfo, JPOOL_NUMPOOLS + pool_id); +} + +void SelfDestruct(j_common_ptr cinfo) { + MemoryManager* mem = reinterpret_cast(cinfo->mem); + for (int pool_id = 0; pool_id < JPOOL_NUMPOOLS; ++pool_id) { + FreePool(cinfo, pool_id); + } + delete mem; + cinfo->mem = nullptr; +} + +} // namespace + +void InitMemoryManager(j_common_ptr cinfo) { + MemoryManager* mem = new MemoryManager; + mem->pub.alloc_small = jpegli::Alloc; + mem->pub.alloc_large = jpegli::Alloc; + mem->pub.alloc_sarray = jpegli::Alloc2dArray; + mem->pub.alloc_barray = jpegli::Alloc2dArray; + mem->pub.request_virt_sarray = + jpegli::RequestVirtualArray; + mem->pub.request_virt_barray = + jpegli::RequestVirtualArray; + mem->pub.realize_virt_arrays = jpegli::RealizeVirtualArrays; + mem->pub.access_virt_sarray = + jpegli::AccessVirtualArray; + mem->pub.access_virt_barray = + jpegli::AccessVirtualArray; + mem->pub.free_pool = jpegli::FreePool; + mem->pub.self_destruct = jpegli::SelfDestruct; + mem->pub.max_memory_to_use = 0; + mem->total_memory_usage = 0; + mem->peak_memory_usage = 0; + memset(mem->pool_memory_usage, 0, sizeof(mem->pool_memory_usage)); + cinfo->mem = reinterpret_cast(mem); +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/memory_manager.h b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.h new file mode 100644 index 0000000000..3e2bdabe06 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/memory_manager.h @@ -0,0 +1,45 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_MEMORY_MANAGER_H_ +#define LIB_JPEGLI_MEMORY_MANAGER_H_ + +#include + +#include "lib/jpegli/common.h" + +#define JPOOL_PERMANENT_ALIGNED (JPOOL_NUMPOOLS + JPOOL_PERMANENT) +#define JPOOL_IMAGE_ALIGNED (JPOOL_NUMPOOLS + JPOOL_IMAGE) + +namespace jpegli { + +void InitMemoryManager(j_common_ptr cinfo); + +template +T* Allocate(j_common_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) { + void* p = (*cinfo->mem->alloc_small)(cinfo, pool_id, len * sizeof(T)); + return reinterpret_cast(p); +} + +template +T* Allocate(j_decompress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) { + return Allocate(reinterpret_cast(cinfo), len, pool_id); +} + +template +T* Allocate(j_compress_ptr cinfo, size_t len, int pool_id = JPOOL_PERMANENT) { + return Allocate(reinterpret_cast(cinfo), len, pool_id); +} + +template +JBLOCKARRAY GetBlockRow(T cinfo, int c, JDIMENSION by) { + return (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), cinfo->master->coeff_buffers[c], + by, 1, true); +} + +} // namespace jpegli + +#endif // LIB_JPEGLI_MEMORY_MANAGER_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/output_suspension_test.cc b/third-party/libjxl/libjxl/lib/jpegli/output_suspension_test.cc new file mode 100644 index 0000000000..73db791727 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/output_suspension_test.cc @@ -0,0 +1,219 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" + +namespace jpegli { +namespace { + +static constexpr size_t kInitialBufferSize = 1024; +static constexpr size_t kFinalBufferSize = 18; + +struct DestinationManager { + jpeg_destination_mgr pub; + std::vector buffer; + + DestinationManager() { + pub.init_destination = init_destination; + pub.empty_output_buffer = empty_output_buffer; + pub.term_destination = term_destination; + } + + void Rewind() { + pub.next_output_byte = buffer.data(); + pub.free_in_buffer = buffer.size(); + } + + void EmptyTo(std::vector* output, size_t new_size = 0) { + output->insert(output->end(), buffer.data(), pub.next_output_byte); + if (new_size > 0) { + buffer.resize(new_size); + } + Rewind(); + } + + static void init_destination(j_compress_ptr cinfo) { + auto us = reinterpret_cast(cinfo->dest); + us->buffer.resize(kInitialBufferSize); + us->Rewind(); + } + + static boolean empty_output_buffer(j_compress_ptr cinfo) { return FALSE; } + + static void term_destination(j_compress_ptr cinfo) {} +}; + +struct TestConfig { + TestImage input; + CompressParams jparams; + size_t buffer_size; + size_t lines_batch_size; +}; + +class OutputSuspensionTestParam : public ::testing::TestWithParam { +}; + +TEST_P(OutputSuspensionTestParam, PixelData) { + jpeg_compress_struct cinfo = {}; + TestConfig config = GetParam(); + TestImage& input = config.input; + GeneratePixels(&input); + DestinationManager dest; + std::vector compressed; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.dest = reinterpret_cast(&dest); + + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + cinfo.in_color_space = JCS_RGB; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0]; + jpegli_set_progressive_level(&cinfo, 0); + cinfo.optimize_coding = FALSE; + jpegli_start_compress(&cinfo, TRUE); + + size_t stride = cinfo.image_width * cinfo.input_components; + std::vector row_bytes(config.lines_batch_size * stride); + while (cinfo.next_scanline < cinfo.image_height) { + size_t lines_left = cinfo.image_height - cinfo.next_scanline; + size_t num_lines = std::min(config.lines_batch_size, lines_left); + memcpy(&row_bytes[0], &input.pixels[cinfo.next_scanline * stride], + num_lines * stride); + std::vector rows(num_lines); + for (size_t i = 0; i < num_lines; ++i) { + rows[i] = &row_bytes[i * stride]; + } + size_t lines_done = 0; + while (lines_done < num_lines) { + lines_done += jpegli_write_scanlines(&cinfo, &rows[lines_done], + num_lines - lines_done); + if (lines_done < num_lines) { + dest.EmptyTo(&compressed, config.buffer_size); + } + } + } + dest.EmptyTo(&compressed, kFinalBufferSize); + jpegli_finish_compress(&cinfo); + dest.EmptyTo(&compressed); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_compress(&cinfo); + TestImage output; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output); + VerifyOutputImage(input, output, 2.5); +} + +TEST_P(OutputSuspensionTestParam, RawData) { + jpeg_compress_struct cinfo = {}; + TestConfig config = GetParam(); + if (config.lines_batch_size != 1) return; + TestImage& input = config.input; + input.color_space = JCS_YCbCr; + GeneratePixels(&input); + GenerateRawData(config.jparams, &input); + DestinationManager dest; + std::vector compressed; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + cinfo.dest = reinterpret_cast(&dest); + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + cinfo.in_color_space = JCS_YCbCr; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0]; + jpegli_set_progressive_level(&cinfo, 0); + cinfo.optimize_coding = FALSE; + cinfo.raw_data_in = TRUE; + jpegli_start_compress(&cinfo, TRUE); + + std::vector> raw_data = input.raw_data; + size_t max_lines = config.jparams.max_v_sample() * DCTSIZE; + std::vector> rowdata(cinfo.num_components); + std::vector data(cinfo.num_components); + for (int c = 0; c < cinfo.num_components; ++c) { + rowdata[c].resize(config.jparams.v_samp(c) * DCTSIZE); + data[c] = &rowdata[c][0]; + } + while (cinfo.next_scanline < cinfo.image_height) { + for (int c = 0; c < cinfo.num_components; ++c) { + size_t cwidth = cinfo.comp_info[c].width_in_blocks * DCTSIZE; + size_t cheight = cinfo.comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = config.jparams.v_samp(c) * DCTSIZE; + size_t y0 = (cinfo.next_scanline / max_lines) * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr); + } + } + while (jpegli_write_raw_data(&cinfo, &data[0], max_lines) == 0) { + dest.EmptyTo(&compressed, config.buffer_size); + } + } + dest.EmptyTo(&compressed, kFinalBufferSize); + jpegli_finish_compress(&cinfo); + dest.EmptyTo(&compressed); + return true; + }; + try_catch_block(); + jpegli_destroy_compress(&cinfo); + DecompressParams dparams; + dparams.output_mode = RAW_DATA; + TestImage output; + DecodeWithLibjpeg(CompressParams(), dparams, compressed, &output); + VerifyOutputImage(input, output, 3.5); +} + +std::vector GenerateTests() { + std::vector all_tests; + const size_t xsize0 = 1920; + const size_t ysize0 = 1080; + for (int dysize : {0, 1, 8, 9}) { + for (int v_sampling : {1, 2}) { + for (int nlines : {1, 8, 117}) { + for (int bufsize : {1, 16, 16 << 10}) { + TestConfig config; + config.lines_batch_size = nlines; + config.buffer_size = bufsize; + config.input.xsize = xsize0; + config.input.ysize = ysize0 + dysize; + config.jparams.h_sampling = {1, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + all_tests.push_back(config); + } + } + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + os << "Lines" << c.lines_batch_size; + os << "BufSize" << c.buffer_size; + return os; +} + +std::string TestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(OutputSuspensionTest, OutputSuspensionTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/quant.cc b/third-party/libjxl/libjxl/lib/jpegli/quant.cc new file mode 100644 index 0000000000..36f1df4cdd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/quant.cc @@ -0,0 +1,768 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/quant.h" + +#include +#include +#include + +#include "lib/jpegli/adaptive_quantization.h" +#include "lib/jpegli/common.h" +#include "lib/jpegli/encode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { + +namespace { + +// Global scale is chosen in a way that butteraugli 3-norm matches libjpeg +// with the same quality setting. Fitted for quality 90 on jyrki31 corpus. +constexpr float kGlobalScaleXYB = 1.43951668f; +constexpr float kGlobalScaleYCbCr = 1.73966010f; + +static constexpr float kBaseQuantMatrixXYB[] = { + // c = 0 + 7.5629935265f, + 19.8247814178f, + 22.5724945068f, + 20.6706695557f, + 22.6864585876f, + 23.5696277618f, + 25.8129081726f, + 36.3307571411f, + 19.8247814178f, + 21.5503177643f, + 19.9372234344f, + 20.5424213409f, + 21.8645496368f, + 23.9041385651f, + 28.2844066620f, + 32.6609764099f, + 22.5724945068f, + 19.9372234344f, + 21.9017257690f, + 19.1223449707f, + 21.7515811920f, + 24.6724700928f, + 25.4249649048f, + 32.6653823853f, + 20.6706695557f, + 20.5424213409f, + 19.1223449707f, + 20.1610221863f, + 25.3719692230f, + 25.9668903351f, + 30.9804954529f, + 31.3406009674f, + 22.6864585876f, + 21.8645496368f, + 21.7515811920f, + 25.3719692230f, + 26.2431850433f, + 40.5992202759f, + 43.2624626160f, + 63.3010940552f, + 23.5696277618f, + 23.9041385651f, + 24.6724700928f, + 25.9668903351f, + 40.5992202759f, + 48.3026771545f, + 34.0964355469f, + 61.9852142334f, + 25.8129081726f, + 28.2844066620f, + 25.4249649048f, + 30.9804954529f, + 43.2624626160f, + 34.0964355469f, + 34.4937438965f, + 66.9702758789f, + 36.3307571411f, + 32.6609764099f, + 32.6653823853f, + 31.3406009674f, + 63.3010940552f, + 61.9852142334f, + 66.9702758789f, + 39.9652709961f, + // c = 1 + 1.6262000799f, + 3.2199242115f, + 3.4903779030f, + 3.9148359299f, + 4.8337211609f, + 4.9108843803f, + 5.3137121201f, + 6.1676793098f, + 3.2199242115f, + 3.4547898769f, + 3.6036829948f, + 4.2652835846f, + 4.8368387222f, + 4.8226222992f, + 5.6120514870f, + 6.3431472778f, + 3.4903779030f, + 3.6036829948f, + 3.9044559002f, + 4.3374395370f, + 4.8435096741f, + 5.4057979584f, + 5.6066360474f, + 6.1075134277f, + 3.9148359299f, + 4.2652835846f, + 4.3374395370f, + 4.6064834595f, + 5.1751475334f, + 5.4013924599f, + 6.0399808884f, + 6.7825231552f, + 4.8337211609f, + 4.8368387222f, + 4.8435096741f, + 5.1751475334f, + 5.3748049736f, + 6.1410837173f, + 7.6529307365f, + 7.5235214233f, + 4.9108843803f, + 4.8226222992f, + 5.4057979584f, + 5.4013924599f, + 6.1410837173f, + 6.3431472778f, + 7.1083049774f, + 7.6008300781f, + 5.3137121201f, + 5.6120514870f, + 5.6066360474f, + 6.0399808884f, + 7.6529307365f, + 7.1083049774f, + 7.0943155289f, + 7.0478363037f, + 6.1676793098f, + 6.3431472778f, + 6.1075134277f, + 6.7825231552f, + 7.5235214233f, + 7.6008300781f, + 7.0478363037f, + 6.9186143875f, + // c = 2 + 3.3038473129f, + 10.0689258575f, + 12.2785224915f, + 14.6041173935f, + 16.2107315063f, + 19.2314529419f, + 28.0129547119f, + 55.6682891846f, + 10.0689258575f, + 11.4085016251f, + 11.3871345520f, + 15.4934167862f, + 16.5364933014f, + 14.9153423309f, + 26.3748722076f, + 40.8614425659f, + 12.2785224915f, + 11.3871345520f, + 17.0886878967f, + 13.9500350952f, + 16.0003223419f, + 28.5660629272f, + 26.2124195099f, + 30.1260128021f, + 14.6041173935f, + 15.4934167862f, + 13.9500350952f, + 21.1235027313f, + 26.1579780579f, + 25.5579223633f, + 40.6859359741f, + 33.8056335449f, + 16.2107315063f, + 16.5364933014f, + 16.0003223419f, + 26.1579780579f, + 26.8042831421f, + 26.1587715149f, + 35.7343978882f, + 43.6857032776f, + 19.2314529419f, + 14.9153423309f, + 28.5660629272f, + 25.5579223633f, + 26.1587715149f, + 34.5418128967f, + 41.3197937012f, + 48.7867660522f, + 28.0129547119f, + 26.3748722076f, + 26.2124195099f, + 40.6859359741f, + 35.7343978882f, + 41.3197937012f, + 47.6329460144f, + 55.3498458862f, + 55.6682891846f, + 40.8614425659f, + 30.1260128021f, + 33.8056335449f, + 43.6857032776f, + 48.7867660522f, + 55.3498458862f, + 63.6065597534f, +}; + +static const float kBaseQuantMatrixYCbCr[] = { + // c = 0 + 1.2397409345866273f, // + 1.7227115097630963f, // + 2.9212167156636855f, // + 2.812737435286529f, // + 3.339819711906184f, // + 3.463603762596166f, // + 3.840915217993518f, // + 3.86956f, // + 1.7227115097630963f, // + 2.0928894413636874f, // + 2.8456760904429297f, // + 2.704506820909662f, // + 3.4407673520905337f, // + 3.166232352090534f, // + 4.025208741558432f, // + 4.035324490952577f, // + 2.9212167156636855f, // + 2.8456760904429297f, // + 2.9587403520905338f, // + 3.3862948970669273f, // + 3.619523781336757f, // + 3.9046279999999998f, // + 3.757835838431854f, // + 4.237447515714274f, // + 2.812737435286529f, // + 2.704506820909662f, // + 3.3862948970669273f, // + 3.380058821812233f, // + 4.1679867415584315f, // + 4.805510627261856f, // + 4.784259f, // + 4.605934f, // + 3.339819711906184f, // + 3.4407673520905337f, // + 3.619523781336757f, // + 4.1679867415584315f, // + 4.579851258441568f, // + 4.923237f, // + 5.574107f, // + 5.48533336146308f, // + 3.463603762596166f, // + 3.166232352090534f, // + 3.9046279999999998f, // + 4.805510627261856f, // + 4.923237f, // + 5.43936f, // + 5.093895741558431f, // + 6.0872254423617225f, // + 3.840915217993518f, // + 4.025208741558432f, // + 3.757835838431854f, // + 4.784259f, // + 5.574107f, // + 5.093895741558431f, // + 5.438461f, // + 5.4037359493250845f, // + 3.86956f, // + 4.035324490952577f, // + 4.237447515714274f, // + 4.605934f, // + 5.48533336146308f, // + 6.0872254423617225f, // + 5.4037359493250845f, // + 4.37787101190424f, + // c = 1 + 2.8236197786377537f, // + 6.495639358561486f, // + 9.310489207538302f, // + 10.64747864717083f, // + 11.07419143098738f, // + 17.146390223910462f, // + 18.463982229408998f, // + 29.087001644203088f, // + 6.495639358561486f, // + 8.890103846667353f, // + 8.976895794294748f, // + 13.666270550318826f, // + 16.547071905624193f, // + 16.63871382827686f, // + 26.778396930893695f, // + 21.33034294694781f, // + 9.310489207538302f, // + 8.976895794294748f, // + 11.08737706005991f, // + 18.20548239870446f, // + 19.752481654011646f, // + 23.985660533114896f, // + 102.6457378402362f, // + 24.450989f, // + 10.64747864717083f, // + 13.666270550318826f, // + 18.20548239870446f, // + 18.628012327860365f, // + 16.042509519487183f, // + 25.04918273242625f, // + 25.017140189353015f, // + 35.79788782635831f, // + 11.07419143098738f, // + 16.547071905624193f, // + 19.752481654011646f, // + 16.042509519487183f, // + 19.373482748612577f, // + 14.677529999999999f, // + 19.94695960400931f, // + 51.094112f, // + 17.146390223910462f, // + 16.63871382827686f, // + 23.985660533114896f, // + 25.04918273242625f, // + 14.677529999999999f, // + 31.320412426835304f, // + 46.357234000000005f, // + 67.48111451705412f, // + 18.463982229408998f, // + 26.778396930893695f, // + 102.6457378402362f, // + 25.017140189353015f, // + 19.94695960400931f, // + 46.357234000000005f, // + 61.315764694388044f, // + 88.34665293823721f, // + 29.087001644203088f, // + 21.33034294694781f, // + 24.450989f, // + 35.79788782635831f, // + 51.094112f, // + 67.48111451705412f, // + 88.34665293823721f, // + 112.16099098350989f, + // c = 2 + 2.9217254961255255f, // + 4.497681013199305f, // + 7.356344520940414f, // + 6.583891506504051f, // + 8.535608740100237f, // + 8.799434353234647f, // + 9.188341534163023f, // + 9.482700481227672f, // + 4.497681013199305f, // + 6.309548851989123f, // + 7.024608962670982f, // + 7.156445324163424f, // + 8.049059218663244f, // + 7.0124290657218555f, // + 6.711923184393611f, // + 8.380307846134853f, // + 7.356344520940414f, // + 7.024608962670982f, // + 6.892101177327445f, // + 6.882819916277163f, // + 8.782226090078568f, // + 6.8774750000000004f, // + 7.8858175969577955f, // + 8.67909f, // + 6.583891506504051f, // + 7.156445324163424f, // + 6.882819916277163f, // + 7.003072944847055f, // + 7.7223464701024875f, // + 7.955425720217421f, // + 7.4734110000000005f, // + 8.362933242943903f, // + 8.535608740100237f, // + 8.049059218663244f, // + 8.782226090078568f, // + 7.7223464701024875f, // + 6.778005927001542f, // + 9.484922741558432f, // + 9.043702663686046f, // + 8.053178199770173f, // + 8.799434353234647f, // + 7.0124290657218555f, // + 6.8774750000000004f, // + 7.955425720217421f, // + 9.484922741558432f, // + 8.607606527385098f, // + 9.922697394370815f, // + 64.25135180237939f, // + 9.188341534163023f, // + 6.711923184393611f, // + 7.8858175969577955f, // + 7.4734110000000005f, // + 9.043702663686046f, // + 9.922697394370815f, // + 63.184936549738225f, // + 83.35294340273799f, // + 9.482700481227672f, // + 8.380307846134853f, // + 8.67909f, // + 8.362933242943903f, // + 8.053178199770173f, // + 64.25135180237939f, // + 83.35294340273799f, // + 114.89202448569779f, // +}; + +static const float k420GlobalScale = 1.22; +static const float k420Rescale[64] = { + 0.4093, 0.3209, 0.3477, 0.3333, 0.3144, 0.2823, 0.3214, 0.3354, // + 0.3209, 0.3111, 0.3489, 0.2801, 0.3059, 0.3119, 0.4135, 0.3445, // + 0.3477, 0.3489, 0.3586, 0.3257, 0.2727, 0.3754, 0.3369, 0.3484, // + 0.3333, 0.2801, 0.3257, 0.3020, 0.3515, 0.3410, 0.3971, 0.3839, // + 0.3144, 0.3059, 0.2727, 0.3515, 0.3105, 0.3397, 0.2716, 0.3836, // + 0.2823, 0.3119, 0.3754, 0.3410, 0.3397, 0.3212, 0.3203, 0.0726, // + 0.3214, 0.4135, 0.3369, 0.3971, 0.2716, 0.3203, 0.0798, 0.0553, // + 0.3354, 0.3445, 0.3484, 0.3839, 0.3836, 0.0726, 0.0553, 0.3368, // +}; + +static const float kBaseQuantMatrixStd[] = { + // c = 0 + 16.0f, 11.0f, 10.0f, 16.0f, 24.0f, 40.0f, 51.0f, 61.0f, // + 12.0f, 12.0f, 14.0f, 19.0f, 26.0f, 58.0f, 60.0f, 55.0f, // + 14.0f, 13.0f, 16.0f, 24.0f, 40.0f, 57.0f, 69.0f, 56.0f, // + 14.0f, 17.0f, 22.0f, 29.0f, 51.0f, 87.0f, 80.0f, 62.0f, // + 18.0f, 22.0f, 37.0f, 56.0f, 68.0f, 109.0f, 103.0f, 77.0f, // + 24.0f, 35.0f, 55.0f, 64.0f, 81.0f, 104.0f, 113.0f, 92.0f, // + 49.0f, 64.0f, 78.0f, 87.0f, 103.0f, 121.0f, 120.0f, 101.0f, // + 72.0f, 92.0f, 95.0f, 98.0f, 112.0f, 100.0f, 103.0f, 99.0f, // + // c = 1 + 17.0f, 18.0f, 24.0f, 47.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 18.0f, 21.0f, 26.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 24.0f, 26.0f, 56.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 47.0f, 66.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // + 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, 99.0f, // +}; + +static const float kZeroBiasMulYCbCrLQ[] = { + // c = 0 + 0.0000f, 0.0568f, 0.3880f, 0.6190f, 0.6190f, 0.4490f, 0.4490f, 0.6187f, // + 0.0568f, 0.5829f, 0.6189f, 0.6190f, 0.6190f, 0.7190f, 0.6190f, 0.6189f, // + 0.3880f, 0.6189f, 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.6187f, 0.6100f, // + 0.6190f, 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.3839f, 0.7160f, 0.6190f, // + 0.6190f, 0.6190f, 0.6190f, 0.5890f, 0.6190f, 0.3880f, 0.5860f, 0.4790f, // + 0.4490f, 0.7190f, 0.6190f, 0.3839f, 0.3880f, 0.6190f, 0.6190f, 0.6190f, // + 0.4490f, 0.6190f, 0.6187f, 0.7160f, 0.5860f, 0.6190f, 0.6204f, 0.6190f, // + 0.6187f, 0.6189f, 0.6100f, 0.6190f, 0.4790f, 0.6190f, 0.6190f, 0.3480f, // + // c = 1 + 0.0000f, 1.1640f, 0.9373f, 1.1319f, 0.8016f, 0.9136f, 1.1530f, 0.9430f, // + 1.1640f, 0.9188f, 0.9160f, 1.1980f, 1.1830f, 0.9758f, 0.9430f, 0.9430f, // + 0.9373f, 0.9160f, 0.8430f, 1.1720f, 0.7083f, 0.9430f, 0.9430f, 0.9430f, // + 1.1319f, 1.1980f, 1.1720f, 1.1490f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, // + 0.8016f, 1.1830f, 0.7083f, 0.8547f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, // + 0.9136f, 0.9758f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, // + 1.1530f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, // + 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9430f, 0.9480f, 0.9430f, // + // c = 2 + 0.0000f, 1.3190f, 0.4308f, 0.4460f, 0.0661f, 0.0660f, 0.2660f, 0.2960f, // + 1.3190f, 0.3280f, 0.3093f, 0.0750f, 0.0505f, 0.1594f, 0.3060f, 0.2113f, // + 0.4308f, 0.3093f, 0.3060f, 0.1182f, 0.0500f, 0.3060f, 0.3915f, 0.2426f, // + 0.4460f, 0.0750f, 0.1182f, 0.0512f, 0.0500f, 0.2130f, 0.3930f, 0.1590f, // + 0.0661f, 0.0505f, 0.0500f, 0.0500f, 0.3055f, 0.3360f, 0.5148f, 0.5403f, // + 0.0660f, 0.1594f, 0.3060f, 0.2130f, 0.3360f, 0.5060f, 0.5874f, 0.3060f, // + 0.2660f, 0.3060f, 0.3915f, 0.3930f, 0.5148f, 0.5874f, 0.3060f, 0.3060f, // + 0.2960f, 0.2113f, 0.2426f, 0.1590f, 0.5403f, 0.3060f, 0.3060f, 0.3060f, // +}; + +static const float kZeroBiasMulYCbCrHQ[] = { + // c = 0 + 0.0000f, 0.0044f, 0.2521f, 0.6547f, 0.8161f, 0.6130f, 0.8841f, 0.8155f, // + 0.0044f, 0.6831f, 0.6553f, 0.6295f, 0.7848f, 0.7843f, 0.8474f, 0.7836f, // + 0.2521f, 0.6553f, 0.7834f, 0.7829f, 0.8161f, 0.8072f, 0.7743f, 0.9242f, // + 0.6547f, 0.6295f, 0.7829f, 0.8654f, 0.7829f, 0.6986f, 0.7818f, 0.7726f, // + 0.8161f, 0.7848f, 0.8161f, 0.7829f, 0.7471f, 0.7827f, 0.7843f, 0.7653f, // + 0.6130f, 0.7843f, 0.8072f, 0.6986f, 0.7827f, 0.7848f, 0.9508f, 0.7653f, // + 0.8841f, 0.8474f, 0.7743f, 0.7818f, 0.7843f, 0.9508f, 0.7839f, 0.8437f, // + 0.8155f, 0.7836f, 0.9242f, 0.7726f, 0.7653f, 0.7653f, 0.8437f, 0.7819f, // + // c = 1 + 0.0000f, 1.0816f, 1.0556f, 1.2876f, 1.1554f, 1.1567f, 1.8851f, 0.5488f, // + 1.0816f, 1.1537f, 1.1850f, 1.0712f, 1.1671f, 2.0719f, 1.0544f, 1.4764f, // + 1.0556f, 1.1850f, 1.2870f, 1.1981f, 1.8181f, 1.2618f, 1.0564f, 1.1191f, // + 1.2876f, 1.0712f, 1.1981f, 1.4753f, 2.0609f, 1.0564f, 1.2645f, 1.0564f, // + 1.1554f, 1.1671f, 1.8181f, 2.0609f, 0.7324f, 1.1163f, 0.8464f, 1.0564f, // + 1.1567f, 2.0719f, 1.2618f, 1.0564f, 1.1163f, 1.0040f, 1.0564f, 1.0564f, // + 1.8851f, 1.0544f, 1.0564f, 1.2645f, 0.8464f, 1.0564f, 1.0564f, 1.0564f, // + 0.5488f, 1.4764f, 1.1191f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, 1.0564f, // + // c = 2 + 0.0000f, 0.5392f, 0.6659f, 0.8968f, 0.6829f, 0.6328f, 0.5802f, 0.4836f, // + 0.5392f, 0.6746f, 0.6760f, 0.6102f, 0.6015f, 0.6958f, 0.7327f, 0.4897f, // + 0.6659f, 0.6760f, 0.6957f, 0.6543f, 0.4396f, 0.6330f, 0.7081f, 0.2583f, // + 0.8968f, 0.6102f, 0.6543f, 0.5913f, 0.6457f, 0.5828f, 0.5139f, 0.3565f, // + 0.6829f, 0.6015f, 0.4396f, 0.6457f, 0.5633f, 0.4263f, 0.6371f, 0.5949f, // + 0.6328f, 0.6958f, 0.6330f, 0.5828f, 0.4263f, 0.2847f, 0.2909f, 0.6629f, // + 0.5802f, 0.7327f, 0.7081f, 0.5139f, 0.6371f, 0.2909f, 0.6644f, 0.6644f, // + 0.4836f, 0.4897f, 0.2583f, 0.3565f, 0.5949f, 0.6629f, 0.6644f, 0.6644f, // +}; + +static const float kZeroBiasOffsetYCbCrDC[] = {0.0f, 0.0f, 0.0f}; + +static const float kZeroBiasOffsetYCbCrAC[] = { + 0.59082f, + 0.58146f, + 0.57988f, +}; + +constexpr uint8_t kTransferFunctionPQ = 16; +constexpr uint8_t kTransferFunctionHLG = 18; + +float DistanceToLinearQuality(float distance) { + if (distance <= 0.1f) { + return 1.0f; + } else if (distance <= 4.6f) { + return (200.0f / 9.0f) * (distance - 0.1f); + } else if (distance <= 6.4f) { + return 5000.0f / (100.0f - (distance - 0.1f) / 0.09f); + } else if (distance < 25.0f) { + return 530000.0f / + (3450.0f - + 300.0f * std::sqrt((848.0f * distance - 5330.0f) / 120.0f)); + } else { + return 5000.0f; + } +} + +constexpr float kExponent[DCTSIZE2] = { + 1.00f, 0.51f, 0.67f, 0.74f, 1.00f, 1.00f, 1.00f, 1.00f, // + 0.51f, 0.66f, 0.69f, 0.87f, 1.00f, 1.00f, 1.00f, 1.00f, // + 0.67f, 0.69f, 0.84f, 0.83f, 0.96f, 1.00f, 1.00f, 1.00f, // + 0.74f, 0.87f, 0.83f, 1.00f, 1.00f, 0.91f, 0.91f, 1.00f, // + 1.00f, 1.00f, 0.96f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, // + 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, // + 1.00f, 1.00f, 1.00f, 0.91f, 1.00f, 1.00f, 1.00f, 1.00f, // + 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, 1.00f, // +}; +constexpr float kDist0 = 1.5f; // distance where non-linearity kicks in. + +float DistanceToScale(float distance, int k) { + if (distance < kDist0) { + return distance; + } + const float exp = kExponent[k]; + const float mul = std::pow(kDist0, 1.0 - exp); + return std::max(0.5f * distance, mul * std::pow(distance, exp)); +} + +float ScaleToDistance(float scale, int k) { + if (scale < kDist0) { + return scale; + } + const float exp = 1.0 / kExponent[k]; + const float mul = std::pow(kDist0, 1.0 - exp); + return std::min(2.0f * scale, mul * std::pow(scale, exp)); +} + +float QuantValsToDistance(j_compress_ptr cinfo) { + jpeg_comp_master* m = cinfo->master; + float global_scale = kGlobalScaleYCbCr; + if (m->cicp_transfer_function == kTransferFunctionPQ) { + global_scale *= .4f; + } else if (m->cicp_transfer_function == kTransferFunctionHLG) { + global_scale *= .5f; + } + int quant_max = m->force_baseline ? 255 : 32767U; + static const float kDistMax = 10000.0f; + float dist_min = 0.0f; + float dist_max = kDistMax; + for (int c = 0; c < cinfo->num_components; ++c) { + int quant_idx = cinfo->comp_info[c].quant_tbl_no; + uint16_t* quantval = cinfo->quant_tbl_ptrs[quant_idx]->quantval; + const float* base_qm = &kBaseQuantMatrixYCbCr[quant_idx * DCTSIZE2]; + for (int k = 0; k < DCTSIZE2; ++k) { + float dmin = 0.0; + float dmax = kDistMax; + float invq = 1.0f / base_qm[k] / global_scale; + int qval = quantval[k]; + if (qval > 1) { + float scale_min = (qval - 0.5f) * invq; + dmin = ScaleToDistance(scale_min, k); + } + if (qval < quant_max) { + float scale_max = (qval + 0.5f) * invq; + dmax = ScaleToDistance(scale_max, k); + } + if (dmin <= dist_max) { + dist_min = std::max(dmin, dist_min); + } + if (dmax >= dist_min) { + dist_max = std::min(dist_max, dmax); + } + } + } + float distance; + if (dist_min == 0) { + distance = dist_max; + } else if (dist_max == kDistMax) { + distance = dist_min; + } else { + distance = 0.5f * (dist_min + dist_max); + } + return distance; +} + +bool IsYUV420(j_compress_ptr cinfo) { + return (cinfo->jpeg_color_space == JCS_YCbCr && + cinfo->comp_info[0].h_samp_factor == 2 && + cinfo->comp_info[0].v_samp_factor == 2 && + cinfo->comp_info[1].h_samp_factor == 1 && + cinfo->comp_info[1].v_samp_factor == 1 && + cinfo->comp_info[2].h_samp_factor == 1 && + cinfo->comp_info[2].v_samp_factor == 1); +} + +} // namespace + +void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS], + bool add_two_chroma_tables) { + jpeg_comp_master* m = cinfo->master; + const bool xyb = m->xyb_mode && cinfo->jpeg_color_space == JCS_RGB; + const bool is_yuv420 = IsYUV420(cinfo); + + float global_scale; + bool non_linear_scaling = true; + const float* base_quant_matrix[NUM_QUANT_TBLS]; + int num_base_tables; + + if (xyb) { + global_scale = kGlobalScaleXYB; + num_base_tables = 3; + base_quant_matrix[0] = kBaseQuantMatrixXYB; + base_quant_matrix[1] = kBaseQuantMatrixXYB + DCTSIZE2; + base_quant_matrix[2] = kBaseQuantMatrixXYB + 2 * DCTSIZE2; + } else if (cinfo->jpeg_color_space == JCS_YCbCr && !m->use_std_tables) { + global_scale = kGlobalScaleYCbCr; + if (m->cicp_transfer_function == kTransferFunctionPQ) { + global_scale *= .4f; + } else if (m->cicp_transfer_function == kTransferFunctionHLG) { + global_scale *= .5f; + } + if (is_yuv420) { + global_scale *= k420GlobalScale; + } + if (add_two_chroma_tables) { + cinfo->comp_info[2].quant_tbl_no = 2; + num_base_tables = 3; + base_quant_matrix[0] = kBaseQuantMatrixYCbCr; + base_quant_matrix[1] = kBaseQuantMatrixYCbCr + DCTSIZE2; + base_quant_matrix[2] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2; + } else { + num_base_tables = 2; + base_quant_matrix[0] = kBaseQuantMatrixYCbCr; + // Use the Cr table for both Cb and Cr. + base_quant_matrix[1] = kBaseQuantMatrixYCbCr + 2 * DCTSIZE2; + } + } else { + global_scale = 0.01f; + non_linear_scaling = false; + num_base_tables = 2; + base_quant_matrix[0] = kBaseQuantMatrixStd; + base_quant_matrix[1] = kBaseQuantMatrixStd + DCTSIZE2; + } + + int quant_max = m->force_baseline ? 255 : 32767U; + for (int quant_idx = 0; quant_idx < num_base_tables; ++quant_idx) { + const float* base_qm = base_quant_matrix[quant_idx]; + JQUANT_TBL** qtable = &cinfo->quant_tbl_ptrs[quant_idx]; + if (*qtable == nullptr) { + *qtable = jpegli_alloc_quant_table(reinterpret_cast(cinfo)); + } + for (int k = 0; k < DCTSIZE2; ++k) { + float scale = global_scale; + if (non_linear_scaling) { + scale *= DistanceToScale(distances[quant_idx], k); + if (is_yuv420 && quant_idx > 0) { + scale *= k420Rescale[k]; + } + } else { + scale *= DistanceToLinearQuality(distances[quant_idx]); + } + int qval = std::round(scale * base_qm[k]); + (*qtable)->quantval[k] = std::max(1, std::min(qval, quant_max)); + } + (*qtable)->sent_table = FALSE; + } +} + +void InitQuantizer(j_compress_ptr cinfo, QuantPass pass) { + jpeg_comp_master* m = cinfo->master; + // Compute quantization multupliers from the quant table values. + for (int c = 0; c < cinfo->num_components; ++c) { + int quant_idx = cinfo->comp_info[c].quant_tbl_no; + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[quant_idx]; + if (!quant_table) { + JPEGLI_ERROR("Missing quantization table %d for component %d", quant_idx, + c); + } + for (size_t k = 0; k < DCTSIZE2; k++) { + int val = quant_table->quantval[k]; + if (val == 0) { + JPEGLI_ERROR("Invalid quantval 0."); + } + switch (pass) { + case QuantPass::NO_SEARCH: + m->quant_mul[c][k] = 8.0f / val; + break; + case QuantPass::SEARCH_FIRST_PASS: + m->quant_mul[c][k] = 128.0f; + break; + case QuantPass::SEARCH_SECOND_PASS: + m->quant_mul[c][kJPEGZigZagOrder[k]] = 1.0f / (16 * val); + break; + } + } + } + if (m->use_adaptive_quantization) { + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + m->zero_bias_mul[c][k] = k == 0 ? 0.0f : 0.5f; + m->zero_bias_offset[c][k] = k == 0 ? 0.0f : 0.5f; + } + } + if (cinfo->jpeg_color_space == JCS_YCbCr) { + float distance = QuantValsToDistance(cinfo); + static const float kDistHQ = 1.0f; + static const float kDistLQ = 3.0f; + float mix0 = (distance - kDistHQ) / (kDistLQ - kDistHQ); + mix0 = std::max(0.0f, std::min(1.0f, mix0)); + float mix1 = 1.0f - mix0; + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + float mul0 = kZeroBiasMulYCbCrLQ[c * DCTSIZE2 + k]; + float mul1 = kZeroBiasMulYCbCrHQ[c * DCTSIZE2 + k]; + m->zero_bias_mul[c][k] = mix0 * mul0 + mix1 * mul1; + m->zero_bias_offset[c][k] = + k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c]; + } + } + } + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + for (int c = 0; c < cinfo->num_components; ++c) { + for (int k = 0; k < DCTSIZE2; ++k) { + m->zero_bias_offset[c][k] = + k == 0 ? kZeroBiasOffsetYCbCrDC[c] : kZeroBiasOffsetYCbCrAC[c]; + } + } + } +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/quant.h b/third-party/libjxl/libjxl/lib/jpegli/quant.h new file mode 100644 index 0000000000..cb37757ae2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/quant.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_QUANT_H_ +#define LIB_JPEGLI_QUANT_H_ + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void SetQuantMatrices(j_compress_ptr cinfo, float distances[NUM_QUANT_TBLS], + bool add_two_chroma_tables); + +enum QuantPass { + NO_SEARCH, + SEARCH_FIRST_PASS, + SEARCH_SECOND_PASS, +}; + +void InitQuantizer(j_compress_ptr cinfo, QuantPass pass); + +} // namespace jpegli + +#endif // LIB_JPEGLI_QUANT_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/render.cc b/third-party/libjxl/libjxl/lib/jpegli/render.cc new file mode 100644 index 0000000000..24e7e99618 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/render.cc @@ -0,0 +1,763 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/render.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "lib/jpegli/color_quantize.h" +#include "lib/jpegli/color_transform.h" +#include "lib/jpegli/decode_internal.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/idct.h" +#include "lib/jpegli/upsample.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +#ifdef MEMORY_SANITIZER +#define JXL_MEMORY_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define JXL_MEMORY_SANITIZER 1 +#else +#define JXL_MEMORY_SANITIZER 0 +#endif +#else +#define JXL_MEMORY_SANITIZER 0 +#endif + +#if JXL_MEMORY_SANITIZER +#include "sanitizer/msan_interface.h" +#endif + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/render.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Clamp; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::NearestInt; +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftLeftSame; +using hwy::HWY_NAMESPACE::ShiftRightSame; +using hwy::HWY_NAMESPACE::Vec; +using D = HWY_FULL(float); +using DI = HWY_FULL(int32_t); +constexpr D d; +constexpr DI di; + +void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs, + const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros, + int32_t* JXL_RESTRICT sumabs) { + for (size_t i = 0; i < coeffs_size; i += Lanes(d)) { + size_t k = i % DCTSIZE2; + const Rebind di16; + const Vec coeff = PromoteTo(di, Load(di16, coeffs + i)); + const auto abs_coeff = Abs(coeff); + const auto not_0 = Gt(abs_coeff, Zero(di)); + const auto nzero = IfThenElseZero(not_0, Set(di, 1)); + Store(Add(nzero, Load(di, nonzeros + k)), di, nonzeros + k); + Store(Add(abs_coeff, Load(di, sumabs + k)), di, sumabs + k); + } +} + +void DecenterRow(float* row, size_t xsize) { + const HWY_CAPPED(float, 8) df; + const auto c128 = Set(df, 128.0f / 255); + for (size_t x = 0; x < xsize; x += Lanes(df)) { + Store(Add(Load(df, row + x), c128), df, row + x); + } +} + +void DitherRow(j_decompress_ptr cinfo, float* row, int c, size_t y, + size_t xsize) { + jpeg_decomp_master* m = cinfo->master; + if (!m->dither_[c]) return; + const float* dither_row = + &m->dither_[c][(y & m->dither_mask_) * m->dither_size_]; + for (size_t x = 0; x < xsize; ++x) { + row[x] += dither_row[x & m->dither_mask_]; + } +} + +template +void StoreUnsignedRow(float* JXL_RESTRICT input[], size_t x0, size_t len, + size_t num_channels, float multiplier, T* output) { + const HWY_CAPPED(float, 8) d; + auto zero = Zero(d); + auto mul = Set(d, multiplier); + const Rebind du; +#if JXL_MEMORY_SANITIZER + const size_t padding = hwy::RoundUpTo(len, Lanes(d)) - len; + for (size_t c = 0; c < num_channels; ++c) { + __msan_unpoison(input[c] + x0 + len, sizeof(input[c][0]) * padding); + } +#endif + if (num_channels == 1) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]); + } + } else if (num_channels == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul); + StoreInterleaved2(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), du, &output[2 * i]); + } + } else if (num_channels == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul); + auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul); + StoreInterleaved3(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), + DemoteTo(du, NearestInt(v2)), du, &output[3 * i]); + } + } else if (num_channels == 4) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Clamp(zero, Mul(LoadU(d, &input[0][x0 + i]), mul), mul); + auto v1 = Clamp(zero, Mul(LoadU(d, &input[1][x0 + i]), mul), mul); + auto v2 = Clamp(zero, Mul(LoadU(d, &input[2][x0 + i]), mul), mul); + auto v3 = Clamp(zero, Mul(LoadU(d, &input[3][x0 + i]), mul), mul); + StoreInterleaved4(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), + DemoteTo(du, NearestInt(v2)), + DemoteTo(du, NearestInt(v3)), du, &output[4 * i]); + } + } +#if JXL_MEMORY_SANITIZER + __msan_poison(output + num_channels * len, + sizeof(output[0]) * num_channels * padding); +#endif +} + +void StoreFloatRow(float* JXL_RESTRICT input[3], size_t x0, size_t len, + size_t num_channels, float* output) { + const HWY_CAPPED(float, 8) d; + if (num_channels == 1) { + memcpy(output, input[0] + x0, len * sizeof(output[0])); + } else if (num_channels == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved2(LoadU(d, &input[0][x0 + i]), + LoadU(d, &input[1][x0 + i]), d, &output[2 * i]); + } + } else if (num_channels == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved3(LoadU(d, &input[0][x0 + i]), + LoadU(d, &input[1][x0 + i]), + LoadU(d, &input[2][x0 + i]), d, &output[3 * i]); + } + } else if (num_channels == 4) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved4(LoadU(d, &input[0][x0 + i]), + LoadU(d, &input[1][x0 + i]), + LoadU(d, &input[2][x0 + i]), + LoadU(d, &input[3][x0 + i]), d, &output[4 * i]); + } + } +} + +static constexpr float kFSWeightMR = 7.0f / 16.0f; +static constexpr float kFSWeightBL = 3.0f / 16.0f; +static constexpr float kFSWeightBM = 5.0f / 16.0f; +static constexpr float kFSWeightBR = 1.0f / 16.0f; + +float LimitError(float error) { + float abserror = std::abs(error); + if (abserror > 48.0f) { + abserror = 32.0f; + } else if (abserror > 16.0f) { + abserror = 0.5f * abserror + 8.0f; + } + return error > 0.0f ? abserror : -abserror; +} + +void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[], + size_t xoffset, size_t len, size_t num_channels, + uint8_t* JXL_RESTRICT output) { + jpeg_decomp_master* m = cinfo->master; + uint8_t* JXL_RESTRICT scratch_space = m->output_scratch_; + if (cinfo->quantize_colors && m->quant_pass_ == 1) { + float* error_row[kMaxComponents]; + float* next_error_row[kMaxComponents]; + if (cinfo->dither_mode == JDITHER_ORDERED) { + for (size_t c = 0; c < num_channels; ++c) { + DitherRow(cinfo, &rows[c][xoffset], c, cinfo->output_scanline, + cinfo->output_width); + } + } else if (cinfo->dither_mode == JDITHER_FS) { + for (size_t c = 0; c < num_channels; ++c) { + if (cinfo->output_scanline % 2 == 0) { + error_row[c] = m->error_row_[c]; + next_error_row[c] = m->error_row_[c + kMaxComponents]; + } else { + error_row[c] = m->error_row_[c + kMaxComponents]; + next_error_row[c] = m->error_row_[c]; + } + memset(next_error_row[c], 0.0, cinfo->output_width * sizeof(float)); + } + } + const float mul = 255.0f; + if (cinfo->dither_mode != JDITHER_FS) { + StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space); + } + for (size_t i = 0; i < len; ++i) { + uint8_t* pixel = &scratch_space[num_channels * i]; + if (cinfo->dither_mode == JDITHER_FS) { + for (size_t c = 0; c < num_channels; ++c) { + float val = rows[c][i] * mul + LimitError(error_row[c][i]); + pixel[c] = std::round(std::min(255.0f, std::max(0.0f, val))); + } + } + int index = LookupColorIndex(cinfo, pixel); + output[i] = index; + if (cinfo->dither_mode == JDITHER_FS) { + size_t prev_i = i > 0 ? i - 1 : 0; + size_t next_i = i + 1 < len ? i + 1 : len - 1; + for (size_t c = 0; c < num_channels; ++c) { + float error = pixel[c] - cinfo->colormap[c][index]; + error_row[c][next_i] += kFSWeightMR * error; + next_error_row[c][prev_i] += kFSWeightBL * error; + next_error_row[c][i] += kFSWeightBM * error; + next_error_row[c][next_i] += kFSWeightBR * error; + } + } + } + } else if (m->output_data_type_ == JPEGLI_TYPE_UINT8) { + const float mul = 255.0; + StoreUnsignedRow(rows, xoffset, len, num_channels, mul, scratch_space); + memcpy(output, scratch_space, len * num_channels); + } else if (m->output_data_type_ == JPEGLI_TYPE_UINT16) { + const float mul = 65535.0; + uint16_t* tmp = reinterpret_cast(scratch_space); + StoreUnsignedRow(rows, xoffset, len, num_channels, mul, tmp); + if (m->swap_endianness_) { + const HWY_CAPPED(uint16_t, 8) du; + size_t output_len = len * num_channels; + for (size_t j = 0; j < output_len; j += Lanes(du)) { + auto v = LoadU(du, tmp + j); + auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8)); + StoreU(vswap, du, tmp + j); + } + } + memcpy(output, tmp, len * num_channels * 2); + } else if (m->output_data_type_ == JPEGLI_TYPE_FLOAT) { + float* tmp = reinterpret_cast(scratch_space); + StoreFloatRow(rows, xoffset, len, num_channels, tmp); + if (m->swap_endianness_) { + size_t output_len = len * num_channels; + for (size_t j = 0; j < output_len; ++j) { + tmp[j] = BSwapFloat(tmp[j]); + } + } + memcpy(output, tmp, len * num_channels * 4); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jpegli { + +HWY_EXPORT(GatherBlockStats); +HWY_EXPORT(WriteToOutput); +HWY_EXPORT(DecenterRow); + +void GatherBlockStats(const int16_t* JXL_RESTRICT coeffs, + const size_t coeffs_size, int32_t* JXL_RESTRICT nonzeros, + int32_t* JXL_RESTRICT sumabs) { + return HWY_DYNAMIC_DISPATCH(GatherBlockStats)(coeffs, coeffs_size, nonzeros, + sumabs); +} + +void WriteToOutput(j_decompress_ptr cinfo, float* JXL_RESTRICT rows[], + size_t xoffset, size_t len, size_t num_channels, + uint8_t* JXL_RESTRICT output) { + return HWY_DYNAMIC_DISPATCH(WriteToOutput)(cinfo, rows, xoffset, len, + num_channels, output); +} + +void DecenterRow(float* row, size_t xsize) { + return HWY_DYNAMIC_DISPATCH(DecenterRow)(row, xsize); +} + +bool ShouldApplyDequantBiases(j_decompress_ptr cinfo, int ci) { + const auto& compinfo = cinfo->comp_info[ci]; + return (compinfo.h_samp_factor == cinfo->max_h_samp_factor && + compinfo.v_samp_factor == cinfo->max_v_samp_factor); +} + +// See the following article for the details: +// J. R. Price and M. Rabbani, "Dequantization bias for JPEG decompression" +// Proceedings International Conference on Information Technology: Coding and +// Computing (Cat. No.PR00540), 2000, pp. 30-35, doi: 10.1109/ITCC.2000.844179. +void ComputeOptimalLaplacianBiases(const int num_blocks, const int* nonzeros, + const int* sumabs, float* biases) { + for (size_t k = 1; k < DCTSIZE2; ++k) { + if (nonzeros[k] == 0) { + biases[k] = 0.5f; + continue; + } + // Notation adapted from the article + float N = num_blocks; + float N1 = nonzeros[k]; + float N0 = num_blocks - N1; + float S = sumabs[k]; + // Compute gamma from N0, N1, N, S (eq. 11), with A and B being just + // temporary grouping of terms. + float A = 4.0 * S + 2.0 * N; + float B = 4.0 * S - 2.0 * N1; + float gamma = (-1.0 * N0 + std::sqrt(N0 * N0 * 1.0 + A * B)) / A; + float gamma2 = gamma * gamma; + // The bias is computed from gamma with (eq. 5), where the quantization + // multiplier Q can be factored out and thus the bias can be applied + // directly on the quantized coefficient. + biases[k] = + 0.5 * (((1.0 + gamma2) / (1.0 - gamma2)) + 1.0 / std::log(gamma)); + } +} + +constexpr std::array Q_POS = {0, 1, 8, 16, 9, + 2, 3, 10, 17, 24}; + +bool is_nonzero_quantizers(const JQUANT_TBL* qtable) { + return std::all_of(Q_POS.begin(), Q_POS.end(), + [&](int pos) { return qtable->quantval[pos] != 0; }); +} + +// Determine whether smoothing should be applied during decompression +bool do_smoothing(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + bool smoothing_useful = false; + + if (!cinfo->progressive_mode || cinfo->coef_bits == nullptr) { + return false; + } + auto coef_bits_latch = m->coef_bits_latch; + auto prev_coef_bits_latch = m->prev_coef_bits_latch; + + for (int ci = 0; ci < cinfo->num_components; ci++) { + jpeg_component_info* compptr = &cinfo->comp_info[ci]; + JQUANT_TBL* qtable = compptr->quant_table; + int* coef_bits = cinfo->coef_bits[ci]; + int* prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components]; + + // Return early if conditions for smoothing are not met + if (qtable == nullptr || !is_nonzero_quantizers(qtable) || + coef_bits[0] < 0) { + return false; + } + + coef_bits_latch[ci][0] = coef_bits[0]; + + for (int coefi = 1; coefi < SAVED_COEFS; coefi++) { + prev_coef_bits_latch[ci][coefi] = + cinfo->input_scan_number > 1 ? prev_coef_bits[coefi] : -1; + if (coef_bits[coefi] != 0) { + smoothing_useful = true; + } + coef_bits_latch[ci][coefi] = coef_bits[coefi]; + } + } + + return smoothing_useful; +} + +void PredictSmooth(j_decompress_ptr cinfo, JBLOCKARRAY blocks, int component, + size_t bx, int iy) { + const size_t imcu_row = cinfo->output_iMCU_row; + int16_t* scratch = cinfo->master->smoothing_scratch_; + std::vector Q_VAL(SAVED_COEFS); + int* coef_bits; + + std::array, 5> dc_values; + auto& compinfo = cinfo->comp_info[component]; + const size_t by0 = imcu_row * compinfo.v_samp_factor; + const size_t by = by0 + iy; + + int prev_iy = by > 0 ? iy - 1 : 0; + int prev_prev_iy = by > 1 ? iy - 2 : prev_iy; + int next_iy = by + 1 < compinfo.height_in_blocks ? iy + 1 : iy; + int next_next_iy = by + 2 < compinfo.height_in_blocks ? iy + 2 : next_iy; + + const int16_t* cur_row = blocks[iy][bx]; + const int16_t* prev_row = blocks[prev_iy][bx]; + const int16_t* prev_prev_row = blocks[prev_prev_iy][bx]; + const int16_t* next_row = blocks[next_iy][bx]; + const int16_t* next_next_row = blocks[next_next_iy][bx]; + + int prev_block_ind = bx ? -DCTSIZE2 : 0; + int prev_prev_block_ind = bx > 1 ? -2 * DCTSIZE2 : prev_block_ind; + int next_block_ind = bx + 1 < compinfo.width_in_blocks ? DCTSIZE2 : 0; + int next_next_block_ind = + bx + 2 < compinfo.width_in_blocks ? DCTSIZE2 * 2 : next_block_ind; + + std::array row_ptrs = {prev_prev_row, prev_row, cur_row, + next_row, next_next_row}; + std::array block_inds = {prev_prev_block_ind, prev_block_ind, 0, + next_block_ind, next_next_block_ind}; + + memcpy(scratch, cur_row, DCTSIZE2 * sizeof(cur_row[0])); + + for (int r = 0; r < 5; ++r) { + for (int c = 0; c < 5; ++c) { + dc_values[r][c] = row_ptrs[r][block_inds[c]]; + } + } + // Get the correct coef_bits: In case of an incomplete scan, we use the + // prev coeficients. + if (cinfo->output_iMCU_row + 1 > cinfo->input_iMCU_row) { + coef_bits = cinfo->master->prev_coef_bits_latch[component]; + } else { + coef_bits = cinfo->master->coef_bits_latch[component]; + } + + bool change_dc = true; + for (int i = 1; i < SAVED_COEFS; i++) { + if (coef_bits[i] != -1) { + change_dc = false; + break; + } + } + + JQUANT_TBL* quanttbl = cinfo->quant_tbl_ptrs[compinfo.quant_tbl_no]; + for (size_t i = 0; i < 6; ++i) { + Q_VAL[i] = quanttbl->quantval[Q_POS[i]]; + } + if (change_dc) { + for (size_t i = 6; i < SAVED_COEFS; ++i) { + Q_VAL[i] = quanttbl->quantval[Q_POS[i]]; + } + } + auto calculate_dct_value = [&](int coef_index) { + int64_t num = 0; + int pred; + int Al; + // we use the symmetry of the smoothing matrices by transposing the 5x5 dc + // matrix in that case. + bool swap_indices = coef_index == 2 || coef_index == 5 || coef_index == 8 || + coef_index == 9; + auto dc = [&](int i, int j) { + return swap_indices ? dc_values[j][i] : dc_values[i][j]; + }; + Al = coef_bits[coef_index]; + switch (coef_index) { + case 0: + // set the DC + num = (-2 * dc(0, 0) - 6 * dc(0, 1) - 8 * dc(0, 2) - 6 * dc(0, 3) - + 2 * dc(0, 4) - 6 * dc(1, 0) + 6 * dc(1, 1) + 42 * dc(1, 2) + + 6 * dc(1, 3) - 6 * dc(1, 4) - 8 * dc(2, 0) + 42 * dc(2, 1) + + 152 * dc(2, 2) + 42 * dc(2, 3) - 8 * dc(2, 4) - 6 * dc(3, 0) + + 6 * dc(3, 1) + 42 * dc(3, 2) + 6 * dc(3, 3) - 6 * dc(3, 4) - + 2 * dc(4, 0) - 6 * dc(4, 1) - 8 * dc(4, 2) - 6 * dc(4, 3) - + 2 * dc(4, 4)); + // special case: for the DC the dequantization is different + Al = 0; + break; + case 1: + case 2: + // set Q01 or Q10 + num = (change_dc ? (-dc(0, 0) - dc(0, 1) + dc(0, 3) + dc(0, 4) - + 3 * dc(1, 0) + 13 * dc(1, 1) - 13 * dc(1, 3) + + 3 * dc(1, 4) - 3 * dc(2, 0) + 38 * dc(2, 1) - + 38 * dc(2, 3) + 3 * dc(2, 4) - 3 * dc(3, 0) + + 13 * dc(3, 1) - 13 * dc(3, 3) + 3 * dc(3, 4) - + dc(4, 0) - dc(4, 1) + dc(4, 3) + dc(4, 4)) + : (-7 * dc(2, 0) + 50 * dc(2, 1) - 50 * dc(2, 3) + + 7 * dc(2, 4))); + break; + case 3: + case 5: + // set Q02 or Q20 + num = (change_dc + ? dc(0, 2) + 2 * dc(1, 1) + 7 * dc(1, 2) + 2 * dc(1, 3) - + 5 * dc(2, 1) - 14 * dc(2, 2) - 5 * dc(2, 3) + + 2 * dc(3, 1) + 7 * dc(3, 2) + 2 * dc(3, 3) + dc(4, 2) + : (-dc(0, 2) + 13 * dc(1, 2) - 24 * dc(2, 2) + + 13 * dc(3, 2) - dc(4, 2))); + break; + case 4: + // set Q11 + num = + (change_dc ? -dc(0, 0) + dc(0, 4) + 9 * dc(1, 1) - 9 * dc(1, 3) - + 9 * dc(3, 1) + 9 * dc(3, 3) + dc(4, 0) - dc(4, 4) + : (dc(1, 4) + dc(3, 0) - 10 * dc(3, 1) + 10 * dc(3, 3) - + dc(0, 1) - dc(3, 4) + dc(4, 1) - dc(4, 3) + dc(0, 3) - + dc(1, 0) + 10 * dc(1, 1) - 10 * dc(1, 3))); + break; + case 6: + case 9: + // set Q03 or Q30 + num = (dc(1, 1) - dc(1, 3) + 2 * dc(2, 1) - 2 * dc(2, 3) + dc(3, 1) - + dc(3, 3)); + break; + case 7: + case 8: + // set Q12 and Q21 + num = (dc(1, 1) - 3 * dc(1, 2) + dc(1, 3) - dc(3, 1) + 3 * dc(3, 2) - + dc(3, 3)); + break; + } + num = Q_VAL[0] * num; + if (num >= 0) { + pred = ((Q_VAL[coef_index] << 7) + num) / (Q_VAL[coef_index] << 8); + if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1; + } else { + pred = ((Q_VAL[coef_index] << 7) - num) / (Q_VAL[coef_index] << 8); + if (Al > 0 && pred >= (1 << Al)) pred = (1 << Al) - 1; + pred = -pred; + } + return static_cast(pred); + }; + + int loop_end = change_dc ? SAVED_COEFS : 6; + for (int i = 1; i < loop_end; ++i) { + if (coef_bits[i] != 0 && scratch[Q_POS[i]] == 0) { + scratch[Q_POS[i]] = calculate_dct_value(i); + } + } + if (change_dc) { + scratch[0] = calculate_dct_value(0); + } +} + +void PrepareForOutput(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + bool smoothing = do_smoothing(cinfo); + m->apply_smoothing = smoothing && cinfo->do_block_smoothing; + size_t coeffs_per_block = cinfo->num_components * DCTSIZE2; + memset(m->nonzeros_, 0, coeffs_per_block * sizeof(m->nonzeros_[0])); + memset(m->sumabs_, 0, coeffs_per_block * sizeof(m->sumabs_[0])); + memset(m->num_processed_blocks_, 0, sizeof(m->num_processed_blocks_)); + memset(m->biases_, 0, coeffs_per_block * sizeof(m->biases_[0])); + cinfo->output_iMCU_row = 0; + cinfo->output_scanline = 0; + const float kDequantScale = 1.0f / (8 * 255); + for (int c = 0; c < cinfo->num_components; c++) { + const auto& comp = cinfo->comp_info[c]; + JQUANT_TBL* table = comp.quant_table; + if (table == nullptr) continue; + for (size_t k = 0; k < DCTSIZE2; ++k) { + m->dequant_[c * DCTSIZE2 + k] = table->quantval[k] * kDequantScale; + } + } + ChooseInverseTransform(cinfo); + ChooseColorTransform(cinfo); +} + +void DecodeCurrentiMCURow(j_decompress_ptr cinfo) { + jpeg_decomp_master* m = cinfo->master; + const size_t imcu_row = cinfo->output_iMCU_row; + JBLOCKARRAY ba[kMaxComponents]; + for (int c = 0; c < cinfo->num_components; ++c) { + const jpeg_component_info* comp = &cinfo->comp_info[c]; + int by0 = imcu_row * comp->v_samp_factor; + int block_rows_left = comp->height_in_blocks - by0; + int max_block_rows = std::min(comp->v_samp_factor, block_rows_left); + int offset = m->streaming_mode_ ? 0 : by0; + ba[c] = (*cinfo->mem->access_virt_barray)( + reinterpret_cast(cinfo), m->coef_arrays[c], offset, + max_block_rows, false); + } + for (int c = 0; c < cinfo->num_components; ++c) { + size_t k0 = c * DCTSIZE2; + auto& compinfo = cinfo->comp_info[c]; + size_t block_row = imcu_row * compinfo.v_samp_factor; + if (ShouldApplyDequantBiases(cinfo, c)) { + // Update statistics for this iMCU row. + for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) { + size_t by = block_row + iy; + if (by >= compinfo.height_in_blocks) { + continue; + } + int16_t* JXL_RESTRICT coeffs = &ba[c][iy][0][0]; + size_t num = compinfo.width_in_blocks * DCTSIZE2; + GatherBlockStats(coeffs, num, &m->nonzeros_[k0], &m->sumabs_[k0]); + m->num_processed_blocks_[c] += compinfo.width_in_blocks; + } + if (imcu_row % 4 == 3) { + // Re-compute optimal biases every few iMCU-rows. + ComputeOptimalLaplacianBiases(m->num_processed_blocks_[c], + &m->nonzeros_[k0], &m->sumabs_[k0], + &m->biases_[k0]); + } + } + RowBuffer* raw_out = &m->raw_output_[c]; + for (int iy = 0; iy < compinfo.v_samp_factor; ++iy) { + size_t by = block_row + iy; + if (by >= compinfo.height_in_blocks) { + continue; + } + size_t dctsize = m->scaled_dct_size[c]; + int16_t* JXL_RESTRICT row_in = &ba[c][iy][0][0]; + float* JXL_RESTRICT row_out = raw_out->Row(by * dctsize); + for (size_t bx = 0; bx < compinfo.width_in_blocks; ++bx) { + if (m->apply_smoothing) { + PredictSmooth(cinfo, ba[c], c, bx, iy); + (*m->inverse_transform[c])(m->smoothing_scratch_, &m->dequant_[k0], + &m->biases_[k0], m->idct_scratch_, + &row_out[bx * dctsize], raw_out->stride(), + dctsize); + } else { + (*m->inverse_transform[c])(&row_in[bx * DCTSIZE2], &m->dequant_[k0], + &m->biases_[k0], m->idct_scratch_, + &row_out[bx * dctsize], raw_out->stride(), + dctsize); + } + } + if (m->streaming_mode_) { + memset(row_in, 0, compinfo.width_in_blocks * sizeof(JBLOCK)); + } + } + } +} + +void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data) { + jpegli::DecodeCurrentiMCURow(cinfo); + jpeg_decomp_master* m = cinfo->master; + for (int c = 0; c < cinfo->num_components; ++c) { + const auto& compinfo = cinfo->comp_info[c]; + size_t comp_width = compinfo.width_in_blocks * DCTSIZE; + size_t comp_height = compinfo.height_in_blocks * DCTSIZE; + size_t comp_nrows = compinfo.v_samp_factor * DCTSIZE; + size_t y0 = cinfo->output_iMCU_row * compinfo.v_samp_factor * DCTSIZE; + size_t y1 = std::min(y0 + comp_nrows, comp_height); + for (size_t y = y0; y < y1; ++y) { + float* rows[1] = {m->raw_output_[c].Row(y)}; + uint8_t* output = data[c][y - y0]; + DecenterRow(rows[0], comp_width); + WriteToOutput(cinfo, rows, 0, comp_width, 1, output); + } + } + ++cinfo->output_iMCU_row; + cinfo->output_scanline += cinfo->max_v_samp_factor * DCTSIZE; + if (cinfo->output_scanline >= cinfo->output_height) { + ++m->output_passes_done_; + } +} + +void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows, + JSAMPARRAY scanlines, size_t max_output_rows) { + jpeg_decomp_master* m = cinfo->master; + const int vfactor = cinfo->max_v_samp_factor; + const int hfactor = cinfo->max_h_samp_factor; + const size_t context = m->need_context_rows_ ? 1 : 0; + const size_t imcu_row = cinfo->output_iMCU_row; + const size_t imcu_height = vfactor * m->min_scaled_dct_size; + const size_t imcu_width = hfactor * m->min_scaled_dct_size; + const size_t output_width = m->iMCU_cols_ * imcu_width; + if (imcu_row == cinfo->total_iMCU_rows || + (imcu_row > context && + cinfo->output_scanline < (imcu_row - context) * imcu_height)) { + // We are ready to output some scanlines. + size_t ybegin = cinfo->output_scanline; + size_t yend = (imcu_row == cinfo->total_iMCU_rows + ? cinfo->output_height + : (imcu_row - context) * imcu_height); + yend = std::min(yend, ybegin + max_output_rows - *num_output_rows); + size_t yb = (ybegin / vfactor) * vfactor; + size_t ye = DivCeil(yend, vfactor) * vfactor; + for (size_t y = yb; y < ye; y += vfactor) { + for (int c = 0; c < cinfo->num_components; ++c) { + RowBuffer* raw_out = &m->raw_output_[c]; + RowBuffer* render_out = &m->render_output_[c]; + int line_groups = vfactor / m->v_factor[c]; + int downsampled_width = output_width / m->h_factor[c]; + size_t yc = y / m->v_factor[c]; + for (int dy = 0; dy < line_groups; ++dy) { + size_t ymid = yc + dy; + const float* JXL_RESTRICT row_mid = raw_out->Row(ymid); + if (cinfo->do_fancy_upsampling && m->v_factor[c] == 2) { + const float* JXL_RESTRICT row_top = + ymid == 0 ? row_mid : raw_out->Row(ymid - 1); + const float* JXL_RESTRICT row_bot = ymid + 1 == m->raw_height_[c] + ? row_mid + : raw_out->Row(ymid + 1); + Upsample2Vertical(row_top, row_mid, row_bot, + render_out->Row(2 * dy), + render_out->Row(2 * dy + 1), downsampled_width); + } else { + for (int yix = 0; yix < m->v_factor[c]; ++yix) { + memcpy(render_out->Row(m->v_factor[c] * dy + yix), row_mid, + downsampled_width * sizeof(float)); + } + } + if (m->h_factor[c] > 1) { + for (int yix = 0; yix < m->v_factor[c]; ++yix) { + int row_ix = m->v_factor[c] * dy + yix; + float* JXL_RESTRICT row = render_out->Row(row_ix); + float* JXL_RESTRICT tmp = m->upsample_scratch_; + if (cinfo->do_fancy_upsampling && m->h_factor[c] == 2) { + Upsample2Horizontal(row, tmp, output_width); + } else { + // TODO(szabadka) SIMDify this. + for (size_t x = 0; x < output_width; ++x) { + tmp[x] = row[x / m->h_factor[c]]; + } + memcpy(row, tmp, output_width * sizeof(tmp[0])); + } + } + } + } + } + for (int yix = 0; yix < vfactor; ++yix) { + if (y + yix < ybegin || y + yix >= yend) continue; + float* rows[kMaxComponents]; + int num_all_components = + std::max(cinfo->out_color_components, cinfo->num_components); + for (int c = 0; c < num_all_components; ++c) { + rows[c] = m->render_output_[c].Row(yix); + } + (*m->color_transform)(rows, output_width); + for (int c = 0; c < cinfo->out_color_components; ++c) { + // Undo the centering of the sample values around zero. + DecenterRow(rows[c], output_width); + } + if (scanlines) { + uint8_t* output = scanlines[*num_output_rows]; + WriteToOutput(cinfo, rows, m->xoffset_, cinfo->output_width, + cinfo->out_color_components, output); + } + JXL_ASSERT(cinfo->output_scanline == y + yix); + ++cinfo->output_scanline; + ++(*num_output_rows); + if (cinfo->output_scanline == cinfo->output_height) { + ++m->output_passes_done_; + } + } + } + } else { + DecodeCurrentiMCURow(cinfo); + ++cinfo->output_iMCU_row; + } +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/render.h b/third-party/libjxl/libjxl/lib/jpegli/render.h new file mode 100644 index 0000000000..ad69335d70 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/render.h @@ -0,0 +1,24 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_RENDER_H_ +#define LIB_JPEGLI_RENDER_H_ + +#include + +#include "lib/jpegli/common.h" + +namespace jpegli { + +void PrepareForOutput(j_decompress_ptr cinfo); + +void ProcessOutput(j_decompress_ptr cinfo, size_t* num_output_rows, + JSAMPARRAY scanlines, size_t max_output_rows); + +void ProcessRawOutput(j_decompress_ptr cinfo, JSAMPIMAGE data); + +} // namespace jpegli + +#endif // LIB_JPEGLI_RENDER_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/simd.cc b/third-party/libjxl/libjxl/lib/jpegli/simd.cc new file mode 100644 index 0000000000..5e84939342 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/simd.cc @@ -0,0 +1,38 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/simd.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/simd.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +size_t GetVectorSize() { return HWY_LANES(uint8_t); } + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { +namespace { + +HWY_EXPORT(GetVectorSize); // Local function. + +} // namespace + +size_t VectorSize() { + static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)(); + return bytes; +} + +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/simd.h b/third-party/libjxl/libjxl/lib/jpegli/simd.h new file mode 100644 index 0000000000..aec772e2d4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/simd.h @@ -0,0 +1,18 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_SIMD_H_ +#define LIB_JPEGLI_SIMD_H_ + +#include + +namespace jpegli { + +// Returns SIMD vector size in bytes. +size_t VectorSize(); + +} // namespace jpegli + +#endif // LIB_JPEGLI_SIMD_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/source_manager.cc b/third-party/libjxl/libjxl/lib/jpegli/source_manager.cc new file mode 100644 index 0000000000..0b8e0a5c8c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/source_manager.cc @@ -0,0 +1,90 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/error.h" +#include "lib/jpegli/memory_manager.h" + +namespace jpegli { + +void init_mem_source(j_decompress_ptr cinfo) {} +void init_stdio_source(j_decompress_ptr cinfo) {} + +void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { + if (num_bytes <= 0) return; + while (num_bytes > static_cast(cinfo->src->bytes_in_buffer)) { + num_bytes -= cinfo->src->bytes_in_buffer; + (*cinfo->src->fill_input_buffer)(cinfo); + } + cinfo->src->next_input_byte += num_bytes; + cinfo->src->bytes_in_buffer -= num_bytes; +} + +void term_source(j_decompress_ptr cinfo) {} + +boolean EmitFakeEoiMarker(j_decompress_ptr cinfo) { + static constexpr uint8_t kFakeEoiMarker[2] = {0xff, 0xd9}; + cinfo->src->next_input_byte = kFakeEoiMarker; + cinfo->src->bytes_in_buffer = 2; + return TRUE; +} + +constexpr size_t kStdioBufferSize = 64 << 10; + +struct StdioSourceManager { + jpeg_source_mgr pub; + FILE* f; + uint8_t* buffer; + + static boolean fill_input_buffer(j_decompress_ptr cinfo) { + auto src = reinterpret_cast(cinfo->src); + size_t num_bytes_read = fread(src->buffer, 1, kStdioBufferSize, src->f); + if (num_bytes_read == 0) { + return EmitFakeEoiMarker(cinfo); + } + src->pub.next_input_byte = src->buffer; + src->pub.bytes_in_buffer = num_bytes_read; + return TRUE; + } +}; + +} // namespace jpegli + +void jpegli_mem_src(j_decompress_ptr cinfo, const unsigned char* inbuffer, + unsigned long insize) { + if (cinfo->src && cinfo->src->init_source != jpegli::init_mem_source) { + JPEGLI_ERROR("jpegli_mem_src: a different source manager was already set"); + } + if (!cinfo->src) { + cinfo->src = jpegli::Allocate(cinfo, 1); + } + cinfo->src->next_input_byte = inbuffer; + cinfo->src->bytes_in_buffer = insize; + cinfo->src->init_source = jpegli::init_mem_source; + cinfo->src->fill_input_buffer = jpegli::EmitFakeEoiMarker; + cinfo->src->skip_input_data = jpegli::skip_input_data; + cinfo->src->resync_to_restart = jpegli_resync_to_restart; + cinfo->src->term_source = jpegli::term_source; +} + +void jpegli_stdio_src(j_decompress_ptr cinfo, FILE* infile) { + if (cinfo->src && cinfo->src->init_source != jpegli::init_stdio_source) { + JPEGLI_ERROR("jpeg_stdio_src: a different source manager was already set"); + } + if (!cinfo->src) { + cinfo->src = reinterpret_cast( + jpegli::Allocate(cinfo, 1)); + } + auto src = reinterpret_cast(cinfo->src); + src->f = infile; + src->buffer = jpegli::Allocate(cinfo, jpegli::kStdioBufferSize); + src->pub.next_input_byte = src->buffer; + src->pub.bytes_in_buffer = 0; + src->pub.init_source = jpegli::init_stdio_source; + src->pub.fill_input_buffer = jpegli::StdioSourceManager::fill_input_buffer; + src->pub.skip_input_data = jpegli::skip_input_data; + src->pub.resync_to_restart = jpegli_resync_to_restart; + src->pub.term_source = jpegli::term_source; +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/source_manager_test.cc b/third-party/libjxl/libjxl/lib/jpegli/source_manager_test.cc new file mode 100644 index 0000000000..e15d18ec80 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/source_manager_test.cc @@ -0,0 +1,143 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { +namespace { + +void ReadOutputImage(j_decompress_ptr cinfo, TestImage* output) { + jpegli_read_header(cinfo, /*require_image=*/TRUE); + jpegli_start_decompress(cinfo); + output->ysize = cinfo->output_height; + output->xsize = cinfo->output_width; + output->components = cinfo->num_components; + output->AllocatePixels(); + size_t stride = cinfo->output_width * cinfo->num_components; + while (cinfo->output_scanline < cinfo->output_height) { + JSAMPROW scanline = &output->pixels[cinfo->output_scanline * stride]; + jpegli_read_scanlines(cinfo, &scanline, 1); + } + jpegli_finish_decompress(cinfo); +} + +struct TestConfig { + std::string fn; + std::string fn_desc; + DecompressParams dparams; +}; + +class SourceManagerTestParam : public ::testing::TestWithParam {}; + +namespace { +FILE* MemOpen(const std::vector& data) { + FILE* src = tmpfile(); + if (!src) return nullptr; + fwrite(data.data(), 1, data.size(), src); + rewind(src); + return src; +} +} // namespace + +TEST_P(SourceManagerTestParam, TestStdioSourceManager) { + TestConfig config = GetParam(); + std::vector compressed = ReadTestData(config.fn.c_str()); + if (config.dparams.size_factor < 1.0) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + FILE* src = MemOpen(compressed); + ASSERT_TRUE(src); + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_stdio_src(&cinfo, src); + ReadOutputImage(&cinfo, &output0); + return true; + }; + bool ok = try_catch_block(); + fclose(src); + ASSERT_TRUE(ok); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1); + VerifyOutputImage(output1, output0, 1.0f); +} + +TEST_P(SourceManagerTestParam, TestMemSourceManager) { + TestConfig config = GetParam(); + std::vector compressed = ReadTestData(config.fn.c_str()); + if (config.dparams.size_factor < 1.0f) { + compressed.resize(compressed.size() * config.dparams.size_factor); + } + TestImage output0; + jpeg_decompress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_decompress(&cinfo); + jpegli_mem_src(&cinfo, compressed.data(), compressed.size()); + ReadOutputImage(&cinfo, &output0); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&cinfo); + + TestImage output1; + DecodeWithLibjpeg(CompressParams(), DecompressParams(), compressed, &output1); + VerifyOutputImage(output1, output0, 1.0f); +} + +std::vector GenerateTests() { + std::vector all_tests; + { + std::vector> testfiles({ + {"jxl/flower/flower.png.im_q85_444.jpg", "Q85YUV444"}, + {"jxl/flower/flower.png.im_q85_420.jpg", "Q85YUV420"}, + {"jxl/flower/flower.png.im_q85_420_R13B.jpg", "Q85YUV420R13B"}, + }); + for (const auto& it : testfiles) { + for (float size_factor : {0.1f, 0.33f, 0.5f, 0.75f}) { + TestConfig config; + config.fn = it.first; + config.fn_desc = it.second; + config.dparams.size_factor = size_factor; + all_tests.push_back(config); + } + } + return all_tests; + } +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.fn_desc; + if (c.dparams.size_factor < 1.0f) { + os << "Partial" << static_cast(c.dparams.size_factor * 100) << "p"; + } + return os; +} + +std::string TestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(SourceManagerTest, SourceManagerTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/streaming_test.cc b/third-party/libjxl/libjxl/lib/jpegli/streaming_test.cc new file mode 100644 index 0000000000..9dcc0ff4ef --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/streaming_test.cc @@ -0,0 +1,233 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" + +namespace jpegli { +namespace { + +// A simple suspending source manager with an input buffer. +struct SourceManager { + jpeg_source_mgr pub; + std::vector buffer; + + SourceManager() { + pub.next_input_byte = nullptr; + pub.bytes_in_buffer = 0; + pub.init_source = init_source; + pub.fill_input_buffer = fill_input_buffer; + pub.skip_input_data = skip_input_data; + pub.resync_to_restart = jpegli_resync_to_restart; + pub.term_source = term_source; + } + + static void init_source(j_decompress_ptr cinfo) {} + static boolean fill_input_buffer(j_decompress_ptr cinfo) { return FALSE; } + static void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {} + static void term_source(j_decompress_ptr cinfo) {} +}; + +// A destination manager that empties its output buffer into a SourceManager's +// input buffer. The buffer size is kept short because empty_output_buffer() is +// called only when the output buffer is full, and we want to update the decoder +// input frequently to demostrate that streaming works. +static constexpr size_t kOutputBufferSize = 1024; +struct DestinationManager { + jpeg_destination_mgr pub; + std::vector buffer; + SourceManager* dest; + + DestinationManager(SourceManager* src) + : buffer(kOutputBufferSize), dest(src) { + pub.next_output_byte = buffer.data(); + pub.free_in_buffer = buffer.size(); + pub.init_destination = init_destination; + pub.empty_output_buffer = empty_output_buffer; + pub.term_destination = term_destination; + } + + static void init_destination(j_compress_ptr cinfo) {} + + static boolean empty_output_buffer(j_compress_ptr cinfo) { + auto us = reinterpret_cast(cinfo->dest); + jpeg_destination_mgr* src = &us->pub; + jpeg_source_mgr* dst = &us->dest->pub; + std::vector& src_buf = us->buffer; + std::vector& dst_buf = us->dest->buffer; + if (dst->bytes_in_buffer > 0 && dst->bytes_in_buffer < dst_buf.size()) { + memmove(dst_buf.data(), dst->next_input_byte, dst->bytes_in_buffer); + } + size_t src_len = src_buf.size() - src->free_in_buffer; + dst_buf.resize(dst->bytes_in_buffer + src_len); + memcpy(&dst_buf[dst->bytes_in_buffer], src_buf.data(), src_len); + dst->next_input_byte = dst_buf.data(); + dst->bytes_in_buffer = dst_buf.size(); + src->next_output_byte = src_buf.data(); + src->free_in_buffer = src_buf.size(); + return true; + } + + static void term_destination(j_compress_ptr cinfo) { + empty_output_buffer(cinfo); + } +}; + +struct TestConfig { + TestImage input; + CompressParams jparams; +}; + +class StreamingTestParam : public ::testing::TestWithParam {}; + +TEST_P(StreamingTestParam, TestStreaming) { + jpeg_decompress_struct dinfo = {}; + jpeg_compress_struct cinfo = {}; + TestConfig config = GetParam(); + TestImage& input = config.input; + TestImage output; + GeneratePixels(&input); + const auto try_catch_block = [&]() { + ERROR_HANDLER_SETUP(jpegli); + dinfo.err = cinfo.err; + dinfo.client_data = cinfo.client_data; + // Create a pair of compressor and decompressor objects, where the + // compressor's output is connected to the decompressor's input. + jpegli_create_decompress(&dinfo); + jpegli_create_compress(&cinfo); + SourceManager src; + dinfo.src = reinterpret_cast(&src); + DestinationManager dest(&src); + cinfo.dest = reinterpret_cast(&dest); + + cinfo.image_width = input.xsize; + cinfo.image_height = input.ysize; + cinfo.input_components = input.components; + cinfo.in_color_space = (J_COLOR_SPACE)input.color_space; + jpegli_set_defaults(&cinfo); + cinfo.comp_info[0].v_samp_factor = config.jparams.v_sampling[0]; + jpegli_set_progressive_level(&cinfo, 0); + cinfo.optimize_coding = FALSE; + jpegli_start_compress(&cinfo, TRUE); + + size_t stride = cinfo.image_width * cinfo.input_components; + size_t iMCU_height = 8 * cinfo.max_v_samp_factor; + std::vector row_bytes(iMCU_height * stride); + size_t yin = 0; + size_t yout = 0; + while (yin < cinfo.image_height) { + // Feed one iMCU row at a time to the compressor. + size_t lines_in = std::min(iMCU_height, cinfo.image_height - yin); + memcpy(&row_bytes[0], &input.pixels[yin * stride], lines_in * stride); + std::vector rows_in(lines_in); + for (size_t i = 0; i < lines_in; ++i) { + rows_in[i] = &row_bytes[i * stride]; + } + EXPECT_EQ(lines_in, + jpegli_write_scanlines(&cinfo, &rows_in[0], lines_in)); + yin += lines_in; + if (yin == cinfo.image_height) { + jpegli_finish_compress(&cinfo); + } + + // Atfer the first iMCU row, we don't yet expect any output because the + // compressor delays processing to have context rows after the iMCU row. + if (yin < std::min(2 * iMCU_height, cinfo.image_height)) { + continue; + } + + // After two iMCU rows, the compressor has started emitting compressed + // data. We check here that at least the scan header was output, because + // we expect that the compressor's output buffer was filled at least once + // while emitting the first compressed iMCU row. + if (yin == std::min(2 * iMCU_height, cinfo.image_height)) { + EXPECT_EQ(JPEG_REACHED_SOS, + jpegli_read_header(&dinfo, /*require_image=*/TRUE)); + output.xsize = dinfo.image_width; + output.ysize = dinfo.image_height; + output.components = dinfo.num_components; + EXPECT_EQ(output.xsize, input.xsize); + EXPECT_EQ(output.ysize, input.ysize); + EXPECT_EQ(output.components, input.components); + EXPECT_TRUE(jpegli_start_decompress(&dinfo)); + output.pixels.resize(output.ysize * stride); + if (yin < cinfo.image_height) { + continue; + } + } + + // After six iMCU rows, the compressor has emitted five iMCU rows of + // compressed data, of which we expect four full iMCU row of compressed + // data to be in the decoder's input buffer, but since the decoder also + // needs context rows for upsampling and smoothing, we don't expect any + // output to be ready yet. + if (yin < 7 * iMCU_height && yin < cinfo.image_height) { + continue; + } + + // After five iMCU rows, we expect the decoder to have rendered the output + // with four iMCU rows of delay. + // TODO(szabadka) Reduce the processing delay in the decoder if possible. + size_t lines_out = + (yin == cinfo.image_height ? cinfo.image_height - yout : iMCU_height); + std::vector rows_out(lines_out); + for (size_t i = 0; i < lines_out; ++i) { + rows_out[i] = + reinterpret_cast(&output.pixels[(yout + i) * stride]); + } + EXPECT_EQ(lines_out, + jpegli_read_scanlines(&dinfo, &rows_out[0], lines_out)); + VerifyOutputImage(input, output, yout, lines_out, 3.8f); + yout += lines_out; + + if (yout == cinfo.image_height) { + EXPECT_TRUE(jpegli_finish_decompress(&dinfo)); + } + } + return true; + }; + EXPECT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&dinfo); + jpegli_destroy_compress(&cinfo); +} + +std::vector GenerateTests() { + std::vector all_tests; + const size_t xsize0 = 1920; + const size_t ysize0 = 1080; + for (int dysize : {0, 1, 8, 9}) { + for (int v_sampling : {1, 2}) { + TestConfig config; + config.input.xsize = xsize0; + config.input.ysize = ysize0 + dysize; + config.jparams.h_sampling = {1, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + all_tests.push_back(config); + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + return os; +} + +std::string TestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(StreamingTest, StreamingTestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_params.h b/third-party/libjxl/libjxl/lib/jpegli/test_params.h new file mode 100644 index 0000000000..6ab9fa573a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/test_params.h @@ -0,0 +1,163 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TEST_PARAMS_H_ +#define LIB_JPEGLI_TEST_PARAMS_H_ + +#include +#include + +#include +#include + +#include "lib/jpegli/types.h" + +namespace jpegli { + +// We define this here as well to make sure that the *_api_test.cc tests only +// use the public API and therefore we don't include any *_internal.h headers. +template +constexpr inline T1 DivCeil(T1 a, T2 b) { + return (a + b - 1) / b; +} + +#define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0])) + +static constexpr int kLastScan = 0xffff; + +static uint32_t kTestColorMap[] = { + 0x000000, 0xff0000, 0x00ff00, 0x0000ff, 0xffff00, 0x00ffff, + 0xff00ff, 0xffffff, 0x6251fc, 0x45d9c7, 0xa7f059, 0xd9a945, + 0xfa4e44, 0xceaffc, 0xbad7db, 0xc1f0b1, 0xdbca9a, 0xfacac5, + 0xf201ff, 0x0063db, 0x00f01c, 0xdbb204, 0xf12f0c, 0x7ba1dc}; +static constexpr int kTestColorMapNumColors = ARRAY_SIZE(kTestColorMap); + +static constexpr int kSpecialMarker0 = 0xe5; +static constexpr int kSpecialMarker1 = 0xe9; +static constexpr uint8_t kMarkerData[] = {0, 1, 255, 0, 17}; +static constexpr uint8_t kMarkerSequence[] = {0xe6, 0xe8, 0xe7, + 0xe6, 0xe7, 0xe8}; +static constexpr size_t kMarkerSequenceLen = ARRAY_SIZE(kMarkerSequence); + +enum JpegIOMode { + PIXELS, + RAW_DATA, + COEFFICIENTS, +}; + +struct CustomQuantTable { + int slot_idx = 0; + uint16_t table_type = 0; + int scale_factor = 100; + bool add_raw = false; + bool force_baseline = true; + std::vector basic_table; + std::vector quantval; + void Generate(); +}; + +struct TestImage { + size_t xsize = 2268; + size_t ysize = 1512; + int color_space = 2; // JCS_RGB + size_t components = 3; + JpegliDataType data_type = JPEGLI_TYPE_UINT8; + JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN; + std::vector pixels; + std::vector> raw_data; + std::vector> coeffs; + void AllocatePixels() { + pixels.resize(ysize * xsize * components * + jpegli_bytes_per_sample(data_type)); + } + void Clear() { + pixels.clear(); + raw_data.clear(); + coeffs.clear(); + } +}; + +struct CompressParams { + int quality = 90; + bool set_jpeg_colorspace = false; + int jpeg_color_space = 0; // JCS_UNKNOWN + std::vector quant_indexes; + std::vector quant_tables; + std::vector h_sampling; + std::vector v_sampling; + std::vector comp_ids; + int override_JFIF = -1; + int override_Adobe = -1; + bool add_marker = false; + bool simple_progression = false; + // -1 is library default + // 0, 1, 2 is set through jpegli_set_progressive_level() + // 2 + N is kScriptN + int progressive_mode = -1; + unsigned int restart_interval = 0; + int restart_in_rows = 0; + int smoothing_factor = 0; + int optimize_coding = -1; + bool use_flat_dc_luma_code = false; + bool omit_standard_tables = false; + bool xyb_mode = false; + bool libjpeg_mode = false; + bool use_adaptive_quantization = true; + std::vector icc; + + int h_samp(int c) const { return h_sampling.empty() ? 1 : h_sampling[c]; } + int v_samp(int c) const { return v_sampling.empty() ? 1 : v_sampling[c]; } + int max_h_sample() const { + auto it = std::max_element(h_sampling.begin(), h_sampling.end()); + return it == h_sampling.end() ? 1 : *it; + } + int max_v_sample() const { + auto it = std::max_element(v_sampling.begin(), v_sampling.end()); + return it == v_sampling.end() ? 1 : *it; + } + int comp_width(const TestImage& input, int c) const { + return DivCeil(input.xsize * h_samp(c), max_h_sample() * 8) * 8; + } + int comp_height(const TestImage& input, int c) const { + return DivCeil(input.ysize * v_samp(c), max_v_sample() * 8) * 8; + } +}; + +enum ColorQuantMode { + CQUANT_1PASS, + CQUANT_2PASS, + CQUANT_EXTERNAL, + CQUANT_REUSE, +}; + +struct ScanDecompressParams { + int max_scan_number; + int dither_mode; + ColorQuantMode color_quant_mode; +}; + +struct DecompressParams { + float size_factor = 1.0f; + size_t chunk_size = 65536; + size_t max_output_lines = 16; + JpegIOMode output_mode = PIXELS; + JpegliDataType data_type = JPEGLI_TYPE_UINT8; + JpegliEndianness endianness = JPEGLI_NATIVE_ENDIAN; + bool set_out_color_space = false; + int out_color_space = 0; // JCS_UNKNOWN + bool crop_output = false; + bool do_block_smoothing = false; + bool do_fancy_upsampling = true; + bool skip_scans = false; + int scale_num = 1; + int scale_denom = 1; + bool quantize_colors = false; + int desired_number_of_colors = 256; + std::vector scan_params; +}; + +} // namespace jpegli + +#endif // LIB_JPEGLI_TEST_PARAMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_utils-inl.h b/third-party/libjxl/libjxl/lib/jpegli/test_utils-inl.h new file mode 100644 index 0000000000..a454917187 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/test_utils-inl.h @@ -0,0 +1,430 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// This template file is included in both the libjpeg_test_util.cc and the +// test_utils.cc files with different JPEG_API_FN macros and possibly different +// include paths for the jpeg headers. + +// Sequential non-interleaved. +static constexpr jpeg_scan_info kScript1[] = { + {1, {0}, 0, 63, 0, 0}, + {1, {1}, 0, 63, 0, 0}, + {1, {2}, 0, 63, 0, 0}, +}; +// Sequential partially interleaved, chroma first. +static constexpr jpeg_scan_info kScript2[] = { + {2, {1, 2}, 0, 63, 0, 0}, + {1, {0}, 0, 63, 0, 0}, +}; + +// Rest of the scan scripts are progressive. + +static constexpr jpeg_scan_info kScript3[] = { + // Interleaved full DC. + {3, {0, 1, 2}, 0, 0, 0, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; +static constexpr jpeg_scan_info kScript4[] = { + // Non-interleaved full DC. + {1, {0}, 0, 0, 0, 0}, + {1, {1}, 0, 0, 0, 0}, + {1, {2}, 0, 0, 0, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; +static constexpr jpeg_scan_info kScript5[] = { + // Partially interleaved full DC, chroma first. + {2, {1, 2}, 0, 0, 0, 0}, + {1, {0}, 0, 0, 0, 0}, + // AC shifted by 1 bit. + {1, {0}, 1, 63, 0, 1}, + {1, {1}, 1, 63, 0, 1}, + {1, {2}, 1, 63, 0, 1}, + // AC refinement scan. + {1, {0}, 1, 63, 1, 0}, + {1, {1}, 1, 63, 1, 0}, + {1, {2}, 1, 63, 1, 0}, +}; +static constexpr jpeg_scan_info kScript6[] = { + // Interleaved DC shifted by 2 bits. + {3, {0, 1, 2}, 0, 0, 0, 2}, + // Interleaved DC refinement scans. + {3, {0, 1, 2}, 0, 0, 2, 1}, + {3, {0, 1, 2}, 0, 0, 1, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; + +static constexpr jpeg_scan_info kScript7[] = { + // Non-interleaved DC shifted by 2 bits. + {1, {0}, 0, 0, 0, 2}, + {1, {1}, 0, 0, 0, 2}, + {1, {2}, 0, 0, 0, 2}, + // Non-interleaved DC first refinement scans. + {1, {0}, 0, 0, 2, 1}, + {1, {1}, 0, 0, 2, 1}, + {1, {2}, 0, 0, 2, 1}, + // Non-interleaved DC second refinement scans. + {1, {0}, 0, 0, 1, 0}, + {1, {1}, 0, 0, 1, 0}, + {1, {2}, 0, 0, 1, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; + +static constexpr jpeg_scan_info kScript8[] = { + // Partially interleaved DC shifted by 2 bits, chroma first + {2, {1, 2}, 0, 0, 0, 2}, + {1, {0}, 0, 0, 0, 2}, + // Partially interleaved DC first refinement scans. + {2, {0, 2}, 0, 0, 2, 1}, + {1, {1}, 0, 0, 2, 1}, + // Partially interleaved DC first refinement scans, chroma first. + {2, {1, 2}, 0, 0, 1, 0}, + {1, {0}, 0, 0, 1, 0}, + // Full AC scans. + {1, {0}, 1, 63, 0, 0}, + {1, {1}, 1, 63, 0, 0}, + {1, {2}, 1, 63, 0, 0}, +}; + +static constexpr jpeg_scan_info kScript9[] = { + // Interleaved full DC. + {3, {0, 1, 2}, 0, 0, 0, 0}, + // AC scans for component 0 + // shifted by 1 bit, two spectral ranges + {1, {0}, 1, 6, 0, 1}, + {1, {0}, 7, 63, 0, 1}, + // refinement scan, full + {1, {0}, 1, 63, 1, 0}, + // AC scans for component 1 + // shifted by 1 bit, full + {1, {1}, 1, 63, 0, 1}, + // refinement scan, two spectral ranges + {1, {1}, 1, 6, 1, 0}, + {1, {1}, 7, 63, 1, 0}, + // AC scans for component 2 + // shifted by 1 bit, two spectral ranges + {1, {2}, 1, 6, 0, 1}, + {1, {2}, 7, 63, 0, 1}, + // refinement scan, two spectral ranges (but different from above) + {1, {2}, 1, 16, 1, 0}, + {1, {2}, 17, 63, 1, 0}, +}; + +static constexpr jpeg_scan_info kScript10[] = { + // Interleaved full DC. + {3, {0, 1, 2}, 0, 0, 0, 0}, + // AC scans for spectral range 1..16 + // shifted by 1 + {1, {0}, 1, 16, 0, 1}, + {1, {1}, 1, 16, 0, 1}, + {1, {2}, 1, 16, 0, 1}, + // refinement scans, two sub-ranges + {1, {0}, 1, 8, 1, 0}, + {1, {0}, 9, 16, 1, 0}, + {1, {1}, 1, 8, 1, 0}, + {1, {1}, 9, 16, 1, 0}, + {1, {2}, 1, 8, 1, 0}, + {1, {2}, 9, 16, 1, 0}, + // AC scans for spectral range 17..63 + {1, {0}, 17, 63, 0, 1}, + {1, {1}, 17, 63, 0, 1}, + {1, {2}, 17, 63, 0, 1}, + // refinement scans, two sub-ranges + {1, {0}, 17, 28, 1, 0}, + {1, {0}, 29, 63, 1, 0}, + {1, {1}, 17, 28, 1, 0}, + {1, {1}, 29, 63, 1, 0}, + {1, {2}, 17, 28, 1, 0}, + {1, {2}, 29, 63, 1, 0}, +}; + +struct ScanScript { + int num_scans; + const jpeg_scan_info* scans; +}; + +static constexpr ScanScript kTestScript[] = { + {ARRAY_SIZE(kScript1), kScript1}, {ARRAY_SIZE(kScript2), kScript2}, + {ARRAY_SIZE(kScript3), kScript3}, {ARRAY_SIZE(kScript4), kScript4}, + {ARRAY_SIZE(kScript5), kScript5}, {ARRAY_SIZE(kScript6), kScript6}, + {ARRAY_SIZE(kScript7), kScript7}, {ARRAY_SIZE(kScript8), kScript8}, + {ARRAY_SIZE(kScript9), kScript9}, {ARRAY_SIZE(kScript10), kScript10}, +}; +static constexpr int kNumTestScripts = ARRAY_SIZE(kTestScript); + +void SetScanDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo, int scan_number) { + const ScanDecompressParams* sparams = nullptr; + for (const auto& sp : dparams.scan_params) { + if (scan_number <= sp.max_scan_number) { + sparams = &sp; + break; + } + } + if (sparams == nullptr) { + return; + } + if (dparams.quantize_colors) { + cinfo->dither_mode = (J_DITHER_MODE)sparams->dither_mode; + if (sparams->color_quant_mode == CQUANT_1PASS) { + cinfo->two_pass_quantize = FALSE; + cinfo->colormap = nullptr; + } else if (sparams->color_quant_mode == CQUANT_2PASS) { + JXL_CHECK(cinfo->out_color_space == JCS_RGB); + cinfo->two_pass_quantize = TRUE; + cinfo->colormap = nullptr; + } else if (sparams->color_quant_mode == CQUANT_EXTERNAL) { + JXL_CHECK(cinfo->out_color_space == JCS_RGB); + cinfo->two_pass_quantize = FALSE; + bool have_colormap = cinfo->colormap != nullptr; + cinfo->actual_number_of_colors = kTestColorMapNumColors; + cinfo->colormap = (*cinfo->mem->alloc_sarray)( + reinterpret_cast(cinfo), JPOOL_IMAGE, + cinfo->actual_number_of_colors, 3); + jxl::msan::UnpoisonMemory(cinfo->colormap, 3 * sizeof(JSAMPROW)); + for (int i = 0; i < kTestColorMapNumColors; ++i) { + cinfo->colormap[0][i] = (kTestColorMap[i] >> 16) & 0xff; + cinfo->colormap[1][i] = (kTestColorMap[i] >> 8) & 0xff; + cinfo->colormap[2][i] = (kTestColorMap[i] >> 0) & 0xff; + } + if (have_colormap) { + JPEG_API_FN(new_colormap)(cinfo); + } + } else if (sparams->color_quant_mode == CQUANT_REUSE) { + JXL_CHECK(cinfo->out_color_space == JCS_RGB); + JXL_CHECK(cinfo->colormap); + } + } +} + +void SetDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo) { + cinfo->do_block_smoothing = dparams.do_block_smoothing; + cinfo->do_fancy_upsampling = dparams.do_fancy_upsampling; + if (dparams.output_mode == RAW_DATA) { + cinfo->raw_data_out = TRUE; + } + if (dparams.set_out_color_space) { + cinfo->out_color_space = (J_COLOR_SPACE)dparams.out_color_space; + if (dparams.out_color_space == JCS_UNKNOWN) { + cinfo->jpeg_color_space = JCS_UNKNOWN; + } + } + cinfo->scale_num = dparams.scale_num; + cinfo->scale_denom = dparams.scale_denom; + cinfo->quantize_colors = dparams.quantize_colors; + cinfo->desired_number_of_colors = dparams.desired_number_of_colors; + if (!dparams.scan_params.empty()) { + if (cinfo->buffered_image) { + for (const auto& sparams : dparams.scan_params) { + if (sparams.color_quant_mode == CQUANT_1PASS) { + cinfo->enable_1pass_quant = TRUE; + } else if (sparams.color_quant_mode == CQUANT_2PASS) { + cinfo->enable_2pass_quant = TRUE; + } else if (sparams.color_quant_mode == CQUANT_EXTERNAL) { + cinfo->enable_external_quant = TRUE; + } + } + SetScanDecompressParams(dparams, cinfo, 1); + } else { + SetScanDecompressParams(dparams, cinfo, kLastScan); + } + } +} + +void CheckMarkerPresent(j_decompress_ptr cinfo, uint8_t marker_type) { + bool marker_found = false; + for (jpeg_saved_marker_ptr marker = cinfo->marker_list; marker != nullptr; + marker = marker->next) { + jxl::msan::UnpoisonMemory(marker, sizeof(*marker)); + jxl::msan::UnpoisonMemory(marker->data, marker->data_length); + if (marker->marker == marker_type && + marker->data_length == sizeof(kMarkerData) && + memcmp(marker->data, kMarkerData, sizeof(kMarkerData)) == 0) { + marker_found = true; + } + } + JXL_CHECK(marker_found); +} + +void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo) { + if (jparams.set_jpeg_colorspace) { + JXL_CHECK(cinfo->jpeg_color_space == jparams.jpeg_color_space); + } + if (jparams.override_JFIF >= 0) { + JXL_CHECK(cinfo->saw_JFIF_marker == jparams.override_JFIF); + } + if (jparams.override_Adobe >= 0) { + JXL_CHECK(cinfo->saw_Adobe_marker == jparams.override_Adobe); + } + if (jparams.add_marker) { + CheckMarkerPresent(cinfo, kSpecialMarker0); + CheckMarkerPresent(cinfo, kSpecialMarker1); + } + jxl::msan::UnpoisonMemory( + cinfo->comp_info, cinfo->num_components * sizeof(cinfo->comp_info[0])); + int max_h_samp_factor = 1; + int max_v_samp_factor = 1; + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + if (!jparams.comp_ids.empty()) { + JXL_CHECK(comp->component_id == jparams.comp_ids[i]); + } + if (!jparams.h_sampling.empty()) { + JXL_CHECK(comp->h_samp_factor == jparams.h_sampling[i]); + } + if (!jparams.v_sampling.empty()) { + JXL_CHECK(comp->v_samp_factor == jparams.v_sampling[i]); + } + if (!jparams.quant_indexes.empty()) { + JXL_CHECK(comp->quant_tbl_no == jparams.quant_indexes[i]); + } + max_h_samp_factor = std::max(max_h_samp_factor, comp->h_samp_factor); + max_v_samp_factor = std::max(max_v_samp_factor, comp->v_samp_factor); + } + JXL_CHECK(max_h_samp_factor == cinfo->max_h_samp_factor); + JXL_CHECK(max_v_samp_factor == cinfo->max_v_samp_factor); + int referenced_tables[NUM_QUANT_TBLS] = {}; + for (int i = 0; i < cinfo->num_components; ++i) { + jpeg_component_info* comp = &cinfo->comp_info[i]; + JXL_CHECK(comp->width_in_blocks == + DivCeil(cinfo->image_width * comp->h_samp_factor, + max_h_samp_factor * DCTSIZE)); + JXL_CHECK(comp->height_in_blocks == + DivCeil(cinfo->image_height * comp->v_samp_factor, + max_v_samp_factor * DCTSIZE)); + referenced_tables[comp->quant_tbl_no] = 1; + } + for (const auto& table : jparams.quant_tables) { + JQUANT_TBL* quant_table = cinfo->quant_tbl_ptrs[table.slot_idx]; + if (!referenced_tables[table.slot_idx]) { + JXL_CHECK(quant_table == nullptr); + continue; + } + JXL_CHECK(quant_table != nullptr); + jxl::msan::UnpoisonMemory(quant_table, sizeof(*quant_table)); + for (int k = 0; k < DCTSIZE2; ++k) { + JXL_CHECK(quant_table->quantval[k] == table.quantval[k]); + } + } +} + +void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo) { + JXL_CHECK(cinfo->input_scan_number > 0); + if (cinfo->progressive_mode) { + JXL_CHECK(cinfo->Ss != 0 || cinfo->Se != 63); + } else { + JXL_CHECK(cinfo->Ss == 0 && cinfo->Se == 63); + } + if (jparams.progressive_mode > 2) { + JXL_CHECK(jparams.progressive_mode < 3 + kNumTestScripts); + const ScanScript& script = kTestScript[jparams.progressive_mode - 3]; + JXL_CHECK(cinfo->input_scan_number <= script.num_scans); + const jpeg_scan_info& scan = script.scans[cinfo->input_scan_number - 1]; + JXL_CHECK(cinfo->comps_in_scan == scan.comps_in_scan); + for (int i = 0; i < cinfo->comps_in_scan; ++i) { + JXL_CHECK(cinfo->cur_comp_info[i]->component_index == + scan.component_index[i]); + } + JXL_CHECK(cinfo->Ss == scan.Ss); + JXL_CHECK(cinfo->Se == scan.Se); + JXL_CHECK(cinfo->Ah == scan.Ah); + JXL_CHECK(cinfo->Al == scan.Al); + } + if (jparams.restart_interval > 0) { + JXL_CHECK(cinfo->restart_interval == jparams.restart_interval); + } else if (jparams.restart_in_rows > 0) { + JXL_CHECK(cinfo->restart_interval == + jparams.restart_in_rows * cinfo->MCUs_per_row); + } + if (jparams.progressive_mode == 0 && jparams.optimize_coding == 0) { + if (cinfo->jpeg_color_space == JCS_RGB) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0); + } else if (cinfo->jpeg_color_space == JCS_YCbCr) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1); + } else if (cinfo->jpeg_color_space == JCS_CMYK) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0); + } else if (cinfo->jpeg_color_space == JCS_YCCK) { + JXL_CHECK(cinfo->comp_info[0].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].dc_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[3].dc_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[0].ac_tbl_no == 0); + JXL_CHECK(cinfo->comp_info[1].ac_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[2].ac_tbl_no == 1); + JXL_CHECK(cinfo->comp_info[3].ac_tbl_no == 0); + } + if (jparams.use_flat_dc_luma_code) { + JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0]; + jxl::msan::UnpoisonMemory(tbl, sizeof(*tbl)); + for (int i = 0; i < 15; ++i) { + JXL_CHECK(tbl->huffval[i] == i); + } + } + } +} + +void UnmapColors(uint8_t* row, size_t xsize, int components, + JSAMPARRAY colormap, size_t num_colors) { + JXL_CHECK(colormap != nullptr); + std::vector tmp(xsize * components); + for (size_t x = 0; x < xsize; ++x) { + JXL_CHECK(row[x] < num_colors); + for (int c = 0; c < components; ++c) { + tmp[x * components + c] = colormap[c][row[x]]; + } + } + memcpy(row, tmp.data(), tmp.size()); +} + +void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays, + TestImage* output) { + output->xsize = cinfo->image_width; + output->ysize = cinfo->image_height; + output->components = cinfo->num_components; + output->color_space = cinfo->out_color_space; + j_common_ptr comptr = reinterpret_cast(cinfo); + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + std::vector coeffs(comp->width_in_blocks * comp->height_in_blocks * + DCTSIZE2); + for (size_t by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)(comptr, coef_arrays[c], + by, 1, true); + size_t stride = comp->width_in_blocks * sizeof(JBLOCK); + size_t offset = by * comp->width_in_blocks * DCTSIZE2; + memcpy(&coeffs[offset], ba[0], stride); + } + output->coeffs.emplace_back(std::move(coeffs)); + } +} diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_utils.cc b/third-party/libjxl/libjxl/lib/jpegli/test_utils.cc new file mode 100644 index 0000000000..e4a4dc7a6a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/test_utils.cc @@ -0,0 +1,786 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/test_utils.h" + +#include +#include + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/sanitizers.h" + +#if !defined(TEST_DATA_PATH) +#include "tools/cpp/runfiles/runfiles.h" +#endif + +namespace jpegli { + +#define JPEG_API_FN(name) jpegli_##name +#include "lib/jpegli/test_utils-inl.h" +#undef JPEG_API_FN + +#if defined(TEST_DATA_PATH) +std::string GetTestDataPath(const std::string& filename) { + return std::string(TEST_DATA_PATH "/") + filename; +} +#else +using bazel::tools::cpp::runfiles::Runfiles; +const std::unique_ptr kRunfiles(Runfiles::Create("")); +std::string GetTestDataPath(const std::string& filename) { + std::string root(JPEGXL_ROOT_PACKAGE "/testdata/"); + return kRunfiles->Rlocation(root + filename); +} +#endif + +std::vector ReadTestData(const std::string& filename) { + std::string full_path = GetTestDataPath(filename); + fprintf(stderr, "ReadTestData %s\n", full_path.c_str()); + std::ifstream file(full_path, std::ios::binary); + std::vector str((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + JXL_CHECK(file.good()); + const uint8_t* raw = reinterpret_cast(str.data()); + std::vector data(raw, raw + str.size()); + printf("Test data %s is %d bytes long.\n", filename.c_str(), + static_cast(data.size())); + return data; +} + +void CustomQuantTable::Generate() { + basic_table.resize(DCTSIZE2); + quantval.resize(DCTSIZE2); + switch (table_type) { + case 0: { + for (int k = 0; k < DCTSIZE2; ++k) { + basic_table[k] = k + 1; + } + break; + } + default: + for (int k = 0; k < DCTSIZE2; ++k) { + basic_table[k] = table_type; + } + } + for (int k = 0; k < DCTSIZE2; ++k) { + quantval[k] = (basic_table[k] * scale_factor + 50U) / 100U; + quantval[k] = std::max(quantval[k], 1U); + quantval[k] = std::min(quantval[k], 65535U); + if (!add_raw) { + quantval[k] = std::min(quantval[k], force_baseline ? 255U : 32767U); + } + } +} + +bool PNMParser::ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth) { + if (pos_[0] != 'P' || (pos_[1] != '5' && pos_[1] != '6')) { + fprintf(stderr, "Invalid PNM header."); + return false; + } + *num_channels = (pos_[1] == '5' ? 1 : 3); + pos_ += 2; + + size_t maxval; + if (!SkipWhitespace() || !ParseUnsigned(xsize) || !SkipWhitespace() || + !ParseUnsigned(ysize) || !SkipWhitespace() || !ParseUnsigned(&maxval) || + !SkipWhitespace()) { + return false; + } + if (maxval == 0 || maxval >= 65536) { + fprintf(stderr, "Invalid maxval value.\n"); + return false; + } + bool found_bitdepth = false; + for (int bits = 1; bits <= 16; ++bits) { + if (maxval == (1u << bits) - 1) { + *bitdepth = bits; + found_bitdepth = true; + break; + } + } + if (!found_bitdepth) { + fprintf(stderr, "Invalid maxval value.\n"); + return false; + } + + *pos = pos_; + return true; +} + +bool PNMParser::ParseUnsigned(size_t* number) { + if (pos_ == end_ || *pos_ < '0' || *pos_ > '9') { + fprintf(stderr, "Expected unsigned number.\n"); + return false; + } + *number = 0; + while (pos_ < end_ && *pos_ >= '0' && *pos_ <= '9') { + *number *= 10; + *number += *pos_ - '0'; + ++pos_; + } + + return true; +} + +bool PNMParser::SkipWhitespace() { + if (pos_ == end_ || !IsWhitespace(*pos_)) { + fprintf(stderr, "Expected whitespace.\n"); + return false; + } + while (pos_ < end_ && IsWhitespace(*pos_)) { + ++pos_; + } + return true; +} + +bool ReadPNM(const std::vector& data, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth, + std::vector* pixels) { + if (data.size() < 2) { + fprintf(stderr, "PNM file too small.\n"); + return false; + } + PNMParser parser(data.data(), data.size()); + const uint8_t* pos = nullptr; + if (!parser.ParseHeader(&pos, xsize, ysize, num_channels, bitdepth)) { + return false; + } + pixels->resize(data.data() + data.size() - pos); + memcpy(&(*pixels)[0], pos, pixels->size()); + return true; +} + +std::string ColorSpaceName(J_COLOR_SPACE colorspace) { + switch (colorspace) { + case JCS_UNKNOWN: + return "UNKNOWN"; + case JCS_GRAYSCALE: + return "GRAYSCALE"; + case JCS_RGB: + return "RGB"; + case JCS_YCbCr: + return "YCbCr"; + case JCS_CMYK: + return "CMYK"; + case JCS_YCCK: + return "YCCK"; + default: + return ""; + } +} + +std::string IOMethodName(JpegliDataType data_type, + JpegliEndianness endianness) { + std::string retval; + if (data_type == JPEGLI_TYPE_UINT8) { + return ""; + } else if (data_type == JPEGLI_TYPE_UINT16) { + retval = "UINT16"; + } else if (data_type == JPEGLI_TYPE_FLOAT) { + retval = "FLOAT"; + } + if (endianness == JPEGLI_LITTLE_ENDIAN) { + retval += "LE"; + } else if (endianness == JPEGLI_BIG_ENDIAN) { + retval += "BE"; + } + return retval; +} + +std::string SamplingId(const CompressParams& jparams) { + std::stringstream os; + JXL_CHECK(jparams.h_sampling.size() == jparams.v_sampling.size()); + if (!jparams.h_sampling.empty()) { + size_t len = jparams.h_sampling.size(); + while (len > 1 && jparams.h_sampling[len - 1] == 1 && + jparams.v_sampling[len - 1] == 1) { + --len; + } + os << "SAMP"; + for (size_t i = 0; i < len; ++i) { + if (i > 0) os << "_"; + os << jparams.h_sampling[i] << "x" << jparams.v_sampling[i]; + } + } + return os.str(); +} + +std::ostream& operator<<(std::ostream& os, const TestImage& input) { + os << input.xsize << "x" << input.ysize; + os << IOMethodName(input.data_type, input.endianness); + if (input.color_space != JCS_RGB) { + os << "InputColor" << ColorSpaceName((J_COLOR_SPACE)input.color_space); + } + if (input.color_space == JCS_UNKNOWN) { + os << input.components; + } + return os; +} + +std::ostream& operator<<(std::ostream& os, const CompressParams& jparams) { + os << "Q" << jparams.quality; + os << SamplingId(jparams); + if (jparams.set_jpeg_colorspace) { + os << "JpegColor" + << ColorSpaceName((J_COLOR_SPACE)jparams.jpeg_color_space); + } + if (!jparams.comp_ids.empty()) { + os << "CID"; + for (size_t i = 0; i < jparams.comp_ids.size(); ++i) { + os << jparams.comp_ids[i]; + } + } + if (!jparams.quant_indexes.empty()) { + os << "QIDX"; + for (size_t i = 0; i < jparams.quant_indexes.size(); ++i) { + os << jparams.quant_indexes[i]; + } + for (const auto& table : jparams.quant_tables) { + os << "TABLE" << table.slot_idx << "T" << table.table_type << "F" + << table.scale_factor + << (table.add_raw ? "R" + : table.force_baseline ? "B" + : ""); + } + } + if (jparams.progressive_mode >= 0) { + os << "P" << jparams.progressive_mode; + } else if (jparams.simple_progression) { + os << "Psimple"; + } + if (jparams.optimize_coding == 1) { + os << "OptimizedCode"; + } else if (jparams.optimize_coding == 0) { + os << "FixedCode"; + if (jparams.use_flat_dc_luma_code) { + os << "FlatDCLuma"; + } else if (jparams.omit_standard_tables) { + os << "OmitDHT"; + } + } + if (!jparams.use_adaptive_quantization) { + os << "NoAQ"; + } + if (jparams.restart_interval > 0) { + os << "R" << jparams.restart_interval; + } + if (jparams.restart_in_rows > 0) { + os << "RR" << jparams.restart_in_rows; + } + if (jparams.xyb_mode) { + os << "XYB"; + } else if (jparams.libjpeg_mode) { + os << "Libjpeg"; + } + if (jparams.override_JFIF >= 0) { + os << (jparams.override_JFIF ? "AddJFIF" : "NoJFIF"); + } + if (jparams.override_Adobe >= 0) { + os << (jparams.override_Adobe ? "AddAdobe" : "NoAdobe"); + } + if (jparams.add_marker) { + os << "AddMarker"; + } + if (!jparams.icc.empty()) { + os << "ICCSize" << jparams.icc.size(); + } + if (jparams.smoothing_factor != 0) { + os << "SF" << jparams.smoothing_factor; + } + return os; +} + +void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels) { + if (colorspace == JCS_GRAYSCALE) { + *channels = 1; + } else if (colorspace == JCS_RGB || colorspace == JCS_YCbCr) { + *channels = 3; + } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) { + *channels = 4; + } else if (colorspace == JCS_UNKNOWN) { + JXL_CHECK(*channels <= 4); + } else { + JXL_ABORT(); + } +} + +void RGBToYCbCr(float r, float g, float b, float* y, float* cb, float* cr) { + *y = 0.299f * r + 0.587f * g + 0.114f * b; + *cb = -0.168736f * r - 0.331264f * g + 0.5f * b + 0.5f; + *cr = 0.5f * r - 0.418688f * g - 0.081312f * b + 0.5f; +} + +void ConvertPixel(const uint8_t* input_rgb, uint8_t* out, + J_COLOR_SPACE colorspace, size_t num_channels, + JpegliDataType data_type = JPEGLI_TYPE_UINT8, + bool swap_endianness = JPEGLI_NATIVE_ENDIAN) { + const float kMul = 255.0f; + float r = input_rgb[0] / kMul; + float g = input_rgb[1] / kMul; + float b = input_rgb[2] / kMul; + uint8_t out8[MAX_COMPONENTS]; + if (colorspace == JCS_GRAYSCALE) { + const float Y = 0.299f * r + 0.587f * g + 0.114f * b; + out8[0] = static_cast(std::round(Y * kMul)); + } else if (colorspace == JCS_RGB || colorspace == JCS_UNKNOWN) { + for (size_t c = 0; c < num_channels; ++c) { + out8[c] = input_rgb[std::min(2, c)]; + } + } else if (colorspace == JCS_YCbCr) { + float Y, Cb, Cr; + RGBToYCbCr(r, g, b, &Y, &Cb, &Cr); + out8[0] = static_cast(std::round(Y * kMul)); + out8[1] = static_cast(std::round(Cb * kMul)); + out8[2] = static_cast(std::round(Cr * kMul)); + } else if (colorspace == JCS_CMYK || colorspace == JCS_YCCK) { + float K = 1.0f - std::max(r, std::max(g, b)); + float scaleK = 1.0f / (1.0f - K); + r *= scaleK; + g *= scaleK; + b *= scaleK; + if (colorspace == JCS_CMYK) { + out8[0] = static_cast(std::round((1.0f - r) * kMul)); + out8[1] = static_cast(std::round((1.0f - g) * kMul)); + out8[2] = static_cast(std::round((1.0f - b) * kMul)); + } else if (colorspace == JCS_YCCK) { + float Y, Cb, Cr; + RGBToYCbCr(r, g, b, &Y, &Cb, &Cr); + out8[0] = static_cast(std::round(Y * kMul)); + out8[1] = static_cast(std::round(Cb * kMul)); + out8[2] = static_cast(std::round(Cr * kMul)); + } + out8[3] = static_cast(std::round(K * kMul)); + } else { + JXL_ABORT("Colorspace %d not supported", colorspace); + } + if (data_type == JPEGLI_TYPE_UINT8) { + memcpy(out, out8, num_channels); + } else if (data_type == JPEGLI_TYPE_UINT16) { + for (size_t c = 0; c < num_channels; ++c) { + uint16_t val = (out8[c] << 8) + out8[c]; + val |= 0x40; // Make little-endian and big-endian asymmetric + if (swap_endianness) { + val = JXL_BSWAP16(val); + } + memcpy(&out[sizeof(val) * c], &val, sizeof(val)); + } + } else if (data_type == JPEGLI_TYPE_FLOAT) { + for (size_t c = 0; c < num_channels; ++c) { + float val = out8[c] / 255.0f; + if (swap_endianness) { + val = BSwapFloat(val); + } + memcpy(&out[sizeof(val) * c], &val, sizeof(val)); + } + } +} + +void ConvertToGrayscale(TestImage* img) { + if (img->color_space == JCS_GRAYSCALE) return; + JXL_CHECK(img->data_type == JPEGLI_TYPE_UINT8); + for (size_t i = 0; i < img->pixels.size(); i += 3) { + if (img->color_space == JCS_RGB) { + ConvertPixel(&img->pixels[i], &img->pixels[i / 3], JCS_GRAYSCALE, 1); + } else if (img->color_space == JCS_YCbCr) { + img->pixels[i / 3] = img->pixels[i]; + } + } + img->pixels.resize(img->pixels.size() / 3); + img->color_space = JCS_GRAYSCALE; + img->components = 1; +} + +void GeneratePixels(TestImage* img) { + const std::vector imgdata = ReadTestData("jxl/flower/flower.pnm"); + size_t xsize, ysize, channels, bitdepth; + std::vector pixels; + JXL_CHECK(ReadPNM(imgdata, &xsize, &ysize, &channels, &bitdepth, &pixels)); + if (img->xsize == 0) img->xsize = xsize; + if (img->ysize == 0) img->ysize = ysize; + JXL_CHECK(img->xsize <= xsize); + JXL_CHECK(img->ysize <= ysize); + JXL_CHECK(3 == channels); + JXL_CHECK(8 == bitdepth); + size_t in_bytes_per_pixel = channels; + size_t in_stride = xsize * in_bytes_per_pixel; + size_t x0 = (xsize - img->xsize) / 2; + size_t y0 = (ysize - img->ysize) / 2; + SetNumChannels((J_COLOR_SPACE)img->color_space, &img->components); + size_t out_bytes_per_pixel = + jpegli_bytes_per_sample(img->data_type) * img->components; + size_t out_stride = img->xsize * out_bytes_per_pixel; + bool swap_endianness = + (img->endianness == JPEGLI_LITTLE_ENDIAN && !IsLittleEndian()) || + (img->endianness == JPEGLI_BIG_ENDIAN && IsLittleEndian()); + img->pixels.resize(img->ysize * out_stride); + for (size_t iy = 0; iy < img->ysize; ++iy) { + size_t y = y0 + iy; + for (size_t ix = 0; ix < img->xsize; ++ix) { + size_t x = x0 + ix; + size_t idx_in = y * in_stride + x * in_bytes_per_pixel; + size_t idx_out = iy * out_stride + ix * out_bytes_per_pixel; + ConvertPixel(&pixels[idx_in], &img->pixels[idx_out], + (J_COLOR_SPACE)img->color_space, img->components, + img->data_type, swap_endianness); + } + } +} + +void GenerateRawData(const CompressParams& jparams, TestImage* img) { + for (size_t c = 0; c < img->components; ++c) { + size_t xsize = jparams.comp_width(*img, c); + size_t ysize = jparams.comp_height(*img, c); + size_t factor_y = jparams.max_v_sample() / jparams.v_samp(c); + size_t factor_x = jparams.max_h_sample() / jparams.h_samp(c); + size_t factor = factor_x * factor_y; + std::vector plane(ysize * xsize); + size_t bytes_per_pixel = img->components; + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + int result = 0; + for (size_t iy = 0; iy < factor_y; ++iy) { + size_t yy = std::min(y * factor_y + iy, img->ysize - 1); + for (size_t ix = 0; ix < factor_x; ++ix) { + size_t xx = std::min(x * factor_x + ix, img->xsize - 1); + size_t pixel_ix = (yy * img->xsize + xx) * bytes_per_pixel + c; + result += img->pixels[pixel_ix]; + } + } + result = static_cast((result + factor / 2) / factor); + plane[y * xsize + x] = result; + } + } + img->raw_data.emplace_back(std::move(plane)); + } +} + +void GenerateCoeffs(const CompressParams& jparams, TestImage* img) { + for (size_t c = 0; c < img->components; ++c) { + int xsize_blocks = jparams.comp_width(*img, c) / DCTSIZE; + int ysize_blocks = jparams.comp_height(*img, c) / DCTSIZE; + std::vector plane(ysize_blocks * xsize_blocks * DCTSIZE2); + for (int by = 0; by < ysize_blocks; ++by) { + for (int bx = 0; bx < xsize_blocks; ++bx) { + JCOEF* block = &plane[(by * xsize_blocks + bx) * DCTSIZE2]; + for (int k = 0; k < DCTSIZE2; ++k) { + block[k] = (bx - by) / (k + 1); + } + } + } + img->coeffs.emplace_back(std::move(plane)); + } +} + +void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + j_compress_ptr cinfo) { + cinfo->image_width = input.xsize; + cinfo->image_height = input.ysize; + cinfo->input_components = input.components; + if (jparams.xyb_mode) { + jpegli_set_xyb_mode(cinfo); + } + if (jparams.libjpeg_mode) { + jpegli_enable_adaptive_quantization(cinfo, FALSE); + jpegli_use_standard_quant_tables(cinfo); + jpegli_set_progressive_level(cinfo, 0); + } + jpegli_set_defaults(cinfo); + cinfo->in_color_space = (J_COLOR_SPACE)input.color_space; + jpegli_default_colorspace(cinfo); + if (jparams.override_JFIF >= 0) { + cinfo->write_JFIF_header = jparams.override_JFIF; + } + if (jparams.override_Adobe >= 0) { + cinfo->write_Adobe_marker = jparams.override_Adobe; + } + if (jparams.set_jpeg_colorspace) { + jpegli_set_colorspace(cinfo, (J_COLOR_SPACE)jparams.jpeg_color_space); + } + if (!jparams.comp_ids.empty()) { + for (int c = 0; c < cinfo->num_components; ++c) { + cinfo->comp_info[c].component_id = jparams.comp_ids[c]; + } + } + if (!jparams.h_sampling.empty()) { + for (int c = 0; c < cinfo->num_components; ++c) { + cinfo->comp_info[c].h_samp_factor = jparams.h_sampling[c]; + cinfo->comp_info[c].v_samp_factor = jparams.v_sampling[c]; + } + } + jpegli_set_quality(cinfo, jparams.quality, TRUE); + if (!jparams.quant_indexes.empty()) { + for (int c = 0; c < cinfo->num_components; ++c) { + cinfo->comp_info[c].quant_tbl_no = jparams.quant_indexes[c]; + } + for (const auto& table : jparams.quant_tables) { + if (table.add_raw) { + cinfo->quant_tbl_ptrs[table.slot_idx] = + jpegli_alloc_quant_table((j_common_ptr)cinfo); + for (int k = 0; k < DCTSIZE2; ++k) { + cinfo->quant_tbl_ptrs[table.slot_idx]->quantval[k] = + table.quantval[k]; + } + cinfo->quant_tbl_ptrs[table.slot_idx]->sent_table = FALSE; + } else { + jpegli_add_quant_table(cinfo, table.slot_idx, &table.basic_table[0], + table.scale_factor, table.force_baseline); + } + } + } + if (jparams.simple_progression) { + jpegli_simple_progression(cinfo); + JXL_CHECK(jparams.progressive_mode == -1); + } + if (jparams.progressive_mode > 2) { + const ScanScript& script = kTestScript[jparams.progressive_mode - 3]; + cinfo->scan_info = script.scans; + cinfo->num_scans = script.num_scans; + } else if (jparams.progressive_mode >= 0) { + jpegli_set_progressive_level(cinfo, jparams.progressive_mode); + } + jpegli_set_input_format(cinfo, input.data_type, input.endianness); + jpegli_enable_adaptive_quantization(cinfo, jparams.use_adaptive_quantization); + cinfo->restart_interval = jparams.restart_interval; + cinfo->restart_in_rows = jparams.restart_in_rows; + cinfo->smoothing_factor = jparams.smoothing_factor; + if (jparams.optimize_coding == 1) { + cinfo->optimize_coding = TRUE; + } else if (jparams.optimize_coding == 0) { + cinfo->optimize_coding = FALSE; + } + cinfo->raw_data_in = !input.raw_data.empty(); + if (jparams.optimize_coding == 0 && jparams.use_flat_dc_luma_code) { + JHUFF_TBL* tbl = cinfo->dc_huff_tbl_ptrs[0]; + memset(tbl, 0, sizeof(*tbl)); + tbl->bits[4] = 15; + for (int i = 0; i < 15; ++i) tbl->huffval[i] = i; + } + if (input.coeffs.empty()) { + bool write_all_tables = TRUE; + if (jparams.optimize_coding == 0 && !jparams.use_flat_dc_luma_code && + jparams.omit_standard_tables) { + write_all_tables = FALSE; + cinfo->dc_huff_tbl_ptrs[0]->sent_table = TRUE; + cinfo->dc_huff_tbl_ptrs[1]->sent_table = TRUE; + cinfo->ac_huff_tbl_ptrs[0]->sent_table = TRUE; + cinfo->ac_huff_tbl_ptrs[1]->sent_table = TRUE; + } + jpegli_start_compress(cinfo, write_all_tables); + if (jparams.add_marker) { + jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData, + sizeof(kMarkerData)); + jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData)); + for (size_t p = 0; p < sizeof(kMarkerData); ++p) { + jpegli_write_m_byte(cinfo, kMarkerData[p]); + } + for (size_t i = 0; i < kMarkerSequenceLen; ++i) { + jpegli_write_marker(cinfo, kMarkerSequence[i], kMarkerData, + ((i + 2) % sizeof(kMarkerData))); + } + } + if (!jparams.icc.empty()) { + jpegli_write_icc_profile(cinfo, jparams.icc.data(), jparams.icc.size()); + } + } + if (cinfo->raw_data_in) { + // Need to copy because jpeg API requires non-const pointers. + std::vector> raw_data = input.raw_data; + size_t max_lines = jparams.max_v_sample() * DCTSIZE; + std::vector> rowdata(cinfo->num_components); + std::vector data(cinfo->num_components); + for (int c = 0; c < cinfo->num_components; ++c) { + rowdata[c].resize(jparams.v_samp(c) * DCTSIZE); + data[c] = &rowdata[c][0]; + } + while (cinfo->next_scanline < cinfo->image_height) { + for (int c = 0; c < cinfo->num_components; ++c) { + size_t cwidth = cinfo->comp_info[c].width_in_blocks * DCTSIZE; + size_t cheight = cinfo->comp_info[c].height_in_blocks * DCTSIZE; + size_t num_lines = jparams.v_samp(c) * DCTSIZE; + size_t y0 = (cinfo->next_scanline / max_lines) * num_lines; + for (size_t i = 0; i < num_lines; ++i) { + rowdata[c][i] = + (y0 + i < cheight ? &raw_data[c][(y0 + i) * cwidth] : nullptr); + } + } + size_t num_lines = jpegli_write_raw_data(cinfo, &data[0], max_lines); + JXL_CHECK(num_lines == max_lines); + } + } else if (!input.coeffs.empty()) { + j_common_ptr comptr = reinterpret_cast(cinfo); + jvirt_barray_ptr* coef_arrays = reinterpret_cast(( + *cinfo->mem->alloc_small)( + comptr, JPOOL_IMAGE, cinfo->num_components * sizeof(jvirt_barray_ptr))); + for (int c = 0; c < cinfo->num_components; ++c) { + size_t xsize_blocks = jparams.comp_width(input, c) / DCTSIZE; + size_t ysize_blocks = jparams.comp_height(input, c) / DCTSIZE; + coef_arrays[c] = (*cinfo->mem->request_virt_barray)( + comptr, JPOOL_IMAGE, FALSE, xsize_blocks, ysize_blocks, + cinfo->comp_info[c].v_samp_factor); + } + jpegli_write_coefficients(cinfo, coef_arrays); + if (jparams.add_marker) { + jpegli_write_marker(cinfo, kSpecialMarker0, kMarkerData, + sizeof(kMarkerData)); + jpegli_write_m_header(cinfo, kSpecialMarker1, sizeof(kMarkerData)); + for (size_t p = 0; p < sizeof(kMarkerData); ++p) { + jpegli_write_m_byte(cinfo, kMarkerData[p]); + } + } + for (int c = 0; c < cinfo->num_components; ++c) { + jpeg_component_info* comp = &cinfo->comp_info[c]; + for (size_t by = 0; by < comp->height_in_blocks; ++by) { + JBLOCKARRAY ba = (*cinfo->mem->access_virt_barray)( + comptr, coef_arrays[c], by, 1, true); + size_t stride = comp->width_in_blocks * sizeof(JBLOCK); + size_t offset = by * comp->width_in_blocks * DCTSIZE2; + memcpy(ba[0], &input.coeffs[c][offset], stride); + } + } + } else { + size_t stride = cinfo->image_width * cinfo->input_components * + jpegli_bytes_per_sample(input.data_type); + std::vector row_bytes(stride); + for (size_t y = 0; y < cinfo->image_height; ++y) { + memcpy(&row_bytes[0], &input.pixels[y * stride], stride); + JSAMPROW row[] = {row_bytes.data()}; + jpegli_write_scanlines(cinfo, row, 1); + } + } + jpegli_finish_compress(cinfo); +} + +bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + std::vector* compressed) { + uint8_t* buffer = nullptr; + unsigned long buffer_size = 0; + jpeg_compress_struct cinfo; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &buffer, &buffer_size); + EncodeWithJpegli(input, jparams, &cinfo); + return true; + }; + bool success = try_catch_block(); + jpegli_destroy_compress(&cinfo); + if (success) { + compressed->resize(buffer_size); + std::copy_n(buffer, buffer_size, compressed->data()); + } + if (buffer) std::free(buffer); + return success; +} + +int NumTestScanScripts() { return kNumTestScripts; } + +void DumpImage(const TestImage& image, const std::string fn) { + JXL_CHECK(image.components == 1 || image.components == 3); + size_t bytes_per_sample = jpegli_bytes_per_sample(image.data_type); + uint32_t maxval = (1u << (8 * bytes_per_sample)) - 1; + char type = image.components == 1 ? '5' : '6'; + std::ofstream out(fn.c_str(), std::ofstream::binary); + out << "P" << type << std::endl + << image.xsize << " " << image.ysize << std::endl + << maxval << std::endl; + out.write(reinterpret_cast(image.pixels.data()), + image.pixels.size()); + out.close(); +} + +double DistanceRms(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, double* max_diff) { + size_t stride = input.xsize * input.components; + size_t start_offset = start_line * stride; + auto get_sample = [&](const TestImage& im, const std::vector& data, + size_t idx) -> double { + size_t bytes_per_sample = jpegli_bytes_per_sample(im.data_type); + bool is_little_endian = + (im.endianness == JPEGLI_LITTLE_ENDIAN || + (im.endianness == JPEGLI_NATIVE_ENDIAN && IsLittleEndian())); + size_t offset = start_offset + idx * bytes_per_sample; + JXL_CHECK(offset < data.size()); + const uint8_t* p = &data[offset]; + if (im.data_type == JPEGLI_TYPE_UINT8) { + static const double mul8 = 1.0 / 255.0; + return p[0] * mul8; + } else if (im.data_type == JPEGLI_TYPE_UINT16) { + static const double mul16 = 1.0 / 65535.0; + return (is_little_endian ? LoadLE16(p) : LoadBE16(p)) * mul16; + } else if (im.data_type == JPEGLI_TYPE_FLOAT) { + return (is_little_endian ? LoadLEFloat(p) : LoadBEFloat(p)); + } + return 0.0; + }; + double diff2 = 0.0; + size_t num_samples = 0; + if (max_diff) *max_diff = 0.0; + if (!input.pixels.empty() && !output.pixels.empty()) { + num_samples = num_lines * stride; + for (size_t i = 0; i < num_samples; ++i) { + double sample_orig = get_sample(input, input.pixels, i); + double sample_output = get_sample(output, output.pixels, i); + double diff = sample_orig - sample_output; + if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff)); + diff2 += diff * diff; + } + } else { + JXL_CHECK(!input.raw_data.empty()); + JXL_CHECK(!output.raw_data.empty()); + for (size_t c = 0; c < input.raw_data.size(); ++c) { + JXL_CHECK(c < output.raw_data.size()); + num_samples += input.raw_data[c].size(); + for (size_t i = 0; i < input.raw_data[c].size(); ++i) { + double sample_orig = get_sample(input, input.raw_data[c], i); + double sample_output = get_sample(output, output.raw_data[c], i); + double diff = sample_orig - sample_output; + if (max_diff) *max_diff = std::max(*max_diff, 255.0 * std::abs(diff)); + diff2 += diff * diff; + } + } + } + return std::sqrt(diff2 / num_samples) * 255.0; +} + +double DistanceRms(const TestImage& input, const TestImage& output, + double* max_diff) { + return DistanceRms(input, output, 0, output.ysize, max_diff); +} + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, double max_rms, + double max_diff) { + double max_d; + double rms = DistanceRms(input, output, start_line, num_lines, &max_d); + printf("rms: %f, max_rms: %f, max_d: %f, max_diff: %f\n", rms, max_rms, + max_d, max_diff); + JXL_CHECK(rms <= max_rms); + JXL_CHECK(max_d <= max_diff); +} + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + double max_rms, double max_diff) { + JXL_CHECK(output.xsize == input.xsize); + JXL_CHECK(output.ysize == input.ysize); + JXL_CHECK(output.components == input.components); + JXL_CHECK(output.color_space == input.color_space); + if (!input.coeffs.empty()) { + JXL_CHECK(input.coeffs.size() == input.components); + JXL_CHECK(output.coeffs.size() == input.components); + for (size_t c = 0; c < input.components; ++c) { + JXL_CHECK(output.coeffs[c].size() == input.coeffs[c].size()); + JXL_CHECK(0 == memcmp(input.coeffs[c].data(), output.coeffs[c].data(), + input.coeffs[c].size())); + } + } else { + VerifyOutputImage(input, output, 0, output.ysize, max_rms, max_diff); + } +} + +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/test_utils.h b/third-party/libjxl/libjxl/lib/jpegli/test_utils.h new file mode 100644 index 0000000000..132cfd042a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/test_utils.h @@ -0,0 +1,130 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TEST_UTILS_H_ +#define LIB_JPEGLI_TEST_UTILS_H_ + +#include +#include + +#include +#include +#include + +/* clang-format off */ +#include +#include +#include +/* clang-format on */ + +#include "lib/jpegli/common.h" +#include "lib/jpegli/libjpeg_test_util.h" +#include "lib/jpegli/test_params.h" + +namespace jpegli { + +#define ERROR_HANDLER_SETUP(flavor) \ + jpeg_error_mgr jerr; \ + jmp_buf env; \ + cinfo.err = flavor##_std_error(&jerr); \ + if (setjmp(env)) { \ + return false; \ + } \ + cinfo.client_data = reinterpret_cast(&env); \ + cinfo.err->error_exit = [](j_common_ptr cinfo) { \ + (*cinfo->err->output_message)(cinfo); \ + jmp_buf* env = reinterpret_cast(cinfo->client_data); \ + flavor##_destroy(cinfo); \ + longjmp(*env, 1); \ + }; + +std::string IOMethodName(JpegliDataType data_type, JpegliEndianness endianness); + +std::string ColorSpaceName(J_COLOR_SPACE colorspace); + +std::ostream& operator<<(std::ostream& os, const TestImage& input); + +std::ostream& operator<<(std::ostream& os, const CompressParams& jparams); + +int NumTestScanScripts(); + +void VerifyHeader(const CompressParams& jparams, j_decompress_ptr cinfo); +void VerifyScanHeader(const CompressParams& jparams, j_decompress_ptr cinfo); + +void SetDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo); + +void SetScanDecompressParams(const DecompressParams& dparams, + j_decompress_ptr cinfo, int scan_number); + +void CopyCoefficients(j_decompress_ptr cinfo, jvirt_barray_ptr* coef_arrays, + TestImage* output); + +void UnmapColors(uint8_t* row, size_t xsize, int components, + JSAMPARRAY colormap, size_t num_colors); + +std::string GetTestDataPath(const std::string& filename); +std::vector ReadTestData(const std::string& filename); + +class PNMParser { + public: + explicit PNMParser(const uint8_t* data, const size_t len) + : pos_(data), end_(data + len) {} + + // Sets "pos" to the first non-header byte/pixel on success. + bool ParseHeader(const uint8_t** pos, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth); + + private: + static bool IsLineBreak(const uint8_t c) { return c == '\r' || c == '\n'; } + static bool IsWhitespace(const uint8_t c) { + return IsLineBreak(c) || c == '\t' || c == ' '; + } + + bool ParseUnsigned(size_t* number); + + bool SkipWhitespace(); + + const uint8_t* pos_; + const uint8_t* const end_; +}; + +bool ReadPNM(const std::vector& data, size_t* xsize, size_t* ysize, + size_t* num_channels, size_t* bitdepth, + std::vector* pixels); + +void SetNumChannels(J_COLOR_SPACE colorspace, size_t* channels); + +void ConvertToGrayscale(TestImage* img); + +void GeneratePixels(TestImage* img); + +void GenerateRawData(const CompressParams& jparams, TestImage* img); + +void GenerateCoeffs(const CompressParams& jparams, TestImage* img); + +void EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + j_compress_ptr cinfo); + +bool EncodeWithJpegli(const TestImage& input, const CompressParams& jparams, + std::vector* compressed); + +double DistanceRms(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, + double* max_diff = nullptr); + +double DistanceRms(const TestImage& input, const TestImage& output, + double* max_diff = nullptr); + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + size_t start_line, size_t num_lines, double max_rms, + double max_diff = 255.0); + +void VerifyOutputImage(const TestImage& input, const TestImage& output, + double max_rms, double max_diff = 255.0); + +} // namespace jpegli + +#endif // LIB_JPEGLI_TEST_UTILS_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/testing.h b/third-party/libjxl/libjxl/lib/jpegli/testing.h new file mode 100644 index 0000000000..873a0171e7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/testing.h @@ -0,0 +1,35 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TESTING_H_ +#define LIB_JPEGLI_TESTING_H_ + +// GTest/GMock specific macros / wrappers. + +// gmock unconditionally redefines those macros (to wrong values). +// Lets include it only here and mitigate the problem. +#pragma push_macro("PRIdS") +#pragma push_macro("PRIuS") +#include "gmock/gmock.h" +#pragma pop_macro("PRIuS") +#pragma pop_macro("PRIdS") + +#include "gtest/gtest.h" + +// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead +// used INSTANTIATE_TEST_CASE_P which is now deprecated. +#ifdef INSTANTIATE_TEST_SUITE_P +#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P +#else +#define JPEGLI_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P +#endif + +// Ensures that we don't make our test bounds too lax, effectively disabling the +// tests. +MATCHER_P(IsSlightlyBelow, max, "") { + return max * 0.75 <= arg && arg <= max * 1.0; +} + +#endif // LIB_JPEGLI_TESTING_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/transcode_api_test.cc b/third-party/libjxl/libjxl/lib/jpegli/transcode_api_test.cc new file mode 100644 index 0000000000..1d99ce37fa --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/transcode_api_test.cc @@ -0,0 +1,133 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include "lib/jpegli/decode.h" +#include "lib/jpegli/encode.h" +#include "lib/jpegli/test_utils.h" +#include "lib/jpegli/testing.h" +#include "lib/jxl/base/status.h" + +namespace jpegli { +namespace { + +void TranscodeWithJpegli(const std::vector& jpeg_input, + const CompressParams& jparams, + std::vector* jpeg_output) { + jpeg_decompress_struct dinfo = {}; + jpeg_compress_struct cinfo = {}; + uint8_t* transcoded_data = nullptr; + unsigned long transcoded_size; + const auto try_catch_block = [&]() -> bool { + ERROR_HANDLER_SETUP(jpegli); + dinfo.err = cinfo.err; + dinfo.client_data = cinfo.client_data; + jpegli_create_decompress(&dinfo); + jpegli_mem_src(&dinfo, jpeg_input.data(), jpeg_input.size()); + EXPECT_EQ(JPEG_REACHED_SOS, + jpegli_read_header(&dinfo, /*require_image=*/TRUE)); + jvirt_barray_ptr* coef_arrays = jpegli_read_coefficients(&dinfo); + JXL_CHECK(coef_arrays != nullptr); + jpegli_create_compress(&cinfo); + jpegli_mem_dest(&cinfo, &transcoded_data, &transcoded_size); + jpegli_copy_critical_parameters(&dinfo, &cinfo); + jpegli_set_progressive_level(&cinfo, jparams.progressive_mode); + cinfo.optimize_coding = jparams.optimize_coding; + jpegli_write_coefficients(&cinfo, coef_arrays); + jpegli_finish_compress(&cinfo); + jpegli_finish_decompress(&dinfo); + return true; + }; + ASSERT_TRUE(try_catch_block()); + jpegli_destroy_decompress(&dinfo); + jpegli_destroy_compress(&cinfo); + if (transcoded_data) { + jpeg_output->assign(transcoded_data, transcoded_data + transcoded_size); + free(transcoded_data); + } +} + +struct TestConfig { + TestImage input; + CompressParams jparams; +}; + +class TranscodeAPITestParam : public ::testing::TestWithParam {}; + +TEST_P(TranscodeAPITestParam, TestAPI) { + TestConfig config = GetParam(); + CompressParams& jparams = config.jparams; + GeneratePixels(&config.input); + + // Start with sequential non-optimized jpeg. + jparams.progressive_mode = 0; + jparams.optimize_coding = 0; + std::vector compressed; + ASSERT_TRUE(EncodeWithJpegli(config.input, jparams, &compressed)); + TestImage output0; + DecodeWithLibjpeg(jparams, DecompressParams(), compressed, &output0); + + // Transcode first to a sequential optimized jpeg, and then further to + // a progressive jpeg. + for (int progr : {0, 2}) { + std::vector transcoded; + jparams.progressive_mode = progr; + jparams.optimize_coding = 1; + TranscodeWithJpegli(compressed, jparams, &transcoded); + + // We expect a size reduction of at least 2%. + EXPECT_LT(transcoded.size(), compressed.size() * 0.98f); + + // Verify that transcoding is lossless. + TestImage output1; + DecodeWithLibjpeg(jparams, DecompressParams(), transcoded, &output1); + ASSERT_EQ(output0.pixels.size(), output1.pixels.size()); + EXPECT_EQ(0, memcmp(output0.pixels.data(), output1.pixels.data(), + output0.pixels.size())); + compressed = transcoded; + } +} + +std::vector GenerateTests() { + std::vector all_tests; + const size_t xsize0 = 1024; + const size_t ysize0 = 768; + for (int dxsize : {0, 1, 8, 9}) { + for (int dysize : {0, 1, 8, 9}) { + for (int h_sampling : {1, 2}) { + for (int v_sampling : {1, 2}) { + TestConfig config; + config.input.xsize = xsize0 + dxsize; + config.input.ysize = ysize0 + dysize; + config.jparams.h_sampling = {h_sampling, 1, 1}; + config.jparams.v_sampling = {v_sampling, 1, 1}; + all_tests.push_back(config); + } + } + } + } + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const TestConfig& c) { + os << c.input; + os << c.jparams; + return os; +} + +std::string TestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JPEGLI_INSTANTIATE_TEST_SUITE_P(TranscodeAPITest, TranscodeAPITestParam, + testing::ValuesIn(GenerateTests()), + TestDescription); + +} // namespace +} // namespace jpegli diff --git a/third-party/libjxl/libjxl/lib/jpegli/transpose-inl.h b/third-party/libjxl/libjxl/lib/jpegli/transpose-inl.h new file mode 100644 index 0000000000..9fdd222f4e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/transpose-inl.h @@ -0,0 +1,111 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JPEGLI_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JPEGLI_TRANSPOSE_INL_H_ +#undef LIB_JPEGLI_TRANSPOSE_INL_H_ +#else +#define LIB_JPEGLI_TRANSPOSE_INL_H_ +#endif + +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { +namespace { + +#if HWY_CAP_GE256 +static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from, + float* JXL_RESTRICT to) { + const HWY_CAPPED(float, 8) d; + auto i0 = Load(d, from); + auto i1 = Load(d, from + 1 * 8); + auto i2 = Load(d, from + 2 * 8); + auto i3 = Load(d, from + 3 * 8); + auto i4 = Load(d, from + 4 * 8); + auto i5 = Load(d, from + 5 * 8); + auto i6 = Load(d, from + 6 * 8); + auto i7 = Load(d, from + 7 * 8); + + const auto q0 = InterleaveLower(d, i0, i2); + const auto q1 = InterleaveLower(d, i1, i3); + const auto q2 = InterleaveUpper(d, i0, i2); + const auto q3 = InterleaveUpper(d, i1, i3); + const auto q4 = InterleaveLower(d, i4, i6); + const auto q5 = InterleaveLower(d, i5, i7); + const auto q6 = InterleaveUpper(d, i4, i6); + const auto q7 = InterleaveUpper(d, i5, i7); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + const auto r4 = InterleaveLower(d, q4, q5); + const auto r5 = InterleaveUpper(d, q4, q5); + const auto r6 = InterleaveLower(d, q6, q7); + const auto r7 = InterleaveUpper(d, q6, q7); + + i0 = ConcatLowerLower(d, r4, r0); + i1 = ConcatLowerLower(d, r5, r1); + i2 = ConcatLowerLower(d, r6, r2); + i3 = ConcatLowerLower(d, r7, r3); + i4 = ConcatUpperUpper(d, r4, r0); + i5 = ConcatUpperUpper(d, r5, r1); + i6 = ConcatUpperUpper(d, r6, r2); + i7 = ConcatUpperUpper(d, r7, r3); + + Store(i0, d, to); + Store(i1, d, to + 1 * 8); + Store(i2, d, to + 2 * 8); + Store(i3, d, to + 3 * 8); + Store(i4, d, to + 4 * 8); + Store(i5, d, to + 5 * 8); + Store(i6, d, to + 6 * 8); + Store(i7, d, to + 7 * 8); +} +#elif HWY_TARGET != HWY_SCALAR +static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from, + float* JXL_RESTRICT to) { + const HWY_CAPPED(float, 4) d; + for (size_t n = 0; n < 8; n += 4) { + for (size_t m = 0; m < 8; m += 4) { + auto p0 = Load(d, from + n * 8 + m); + auto p1 = Load(d, from + (n + 1) * 8 + m); + auto p2 = Load(d, from + (n + 2) * 8 + m); + auto p3 = Load(d, from + (n + 3) * 8 + m); + const auto q0 = InterleaveLower(d, p0, p2); + const auto q1 = InterleaveLower(d, p1, p3); + const auto q2 = InterleaveUpper(d, p0, p2); + const auto q3 = InterleaveUpper(d, p1, p3); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + Store(r0, d, to + m * 8 + n); + Store(r1, d, to + (1 + m) * 8 + n); + Store(r2, d, to + (2 + m) * 8 + n); + Store(r3, d, to + (3 + m) * 8 + n); + } + } +} +#else +static JXL_INLINE void Transpose8x8Block(const float* JXL_RESTRICT from, + float* JXL_RESTRICT to) { + for (size_t n = 0; n < 8; ++n) { + for (size_t m = 0; m < 8; ++m) { + to[8 * n + m] = from[8 * m + n]; + } + } +} +#endif + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); +#endif // LIB_JPEGLI_TRANSPOSE_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/types.h b/third-party/libjxl/libjxl/lib/jpegli/types.h new file mode 100644 index 0000000000..2f446b7fff --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/types.h @@ -0,0 +1,38 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_TYPES_H_ +#define LIB_JPEGLI_TYPES_H_ + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +// +// New API structs and functions that are not available in libjpeg +// +// NOTE: This part of the API is still experimental and will probably change in +// the future. +// + +typedef enum { + JPEGLI_TYPE_FLOAT = 0, + JPEGLI_TYPE_UINT8 = 2, + JPEGLI_TYPE_UINT16 = 3, +} JpegliDataType; + +typedef enum { + JPEGLI_NATIVE_ENDIAN = 0, + JPEGLI_LITTLE_ENDIAN = 1, + JPEGLI_BIG_ENDIAN = 2, +} JpegliEndianness; + +int jpegli_bytes_per_sample(JpegliDataType data_type); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // LIB_JPEGLI_TYPES_H_ diff --git a/third-party/libjxl/libjxl/lib/jpegli/upsample.cc b/third-party/libjxl/libjxl/lib/jpegli/upsample.cc new file mode 100644 index 0000000000..5559aa78a6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/upsample.cc @@ -0,0 +1,137 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jpegli/upsample.h" + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jpegli/upsample.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jpegli { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Vec; + +#if HWY_CAP_GE512 +using hwy::HWY_NAMESPACE::Half; +using hwy::HWY_NAMESPACE::Vec; +template +HWY_INLINE Vec>> Quarter(const DF df, V v) { + using HF = Half; + using HHF = Half; + auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v); + return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half); +} + +template +HWY_INLINE Vec Concat4(const DF df, V v0, V v1, V v2, V v3) { + using HF = Half; + return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0)); +} + +#endif + +// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be +// aligned. +template +void StoreInterleaved(const DF df, V v0, V v1, T* mem) { + static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types"); +#if HWY_TARGET == HWY_SCALAR + Store(v0, df, mem); + Store(v1, df, mem + 1); +#elif !HWY_CAP_GE256 + Store(InterleaveLower(df, v0, v1), df, mem); + Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df)); +#else + if (!HWY_CAP_GE512 || Lanes(df) == 8) { + auto t0 = InterleaveLower(df, v0, v1); + auto t1 = InterleaveUpper(df, v0, v1); + Store(ConcatLowerLower(df, t1, t0), df, mem); + Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df)); + } else { +#if HWY_CAP_GE512 + auto t0 = InterleaveLower(df, v0, v1); + auto t1 = InterleaveUpper(df, v0, v1); + Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1), + Quarter<1>(df, t0), Quarter<1>(df, t1)), + df, mem); + Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1), + Quarter<3>(df, t0), Quarter<3>(df, t1)), + df, mem + Lanes(df)); +#endif + } +#endif +} + +void Upsample2Horizontal(float* JXL_RESTRICT row, + float* JXL_RESTRICT scratch_space, size_t len_out) { + HWY_FULL(float) df; + auto threefour = Set(df, 0.75f); + auto onefour = Set(df, 0.25f); + const size_t len_in = (len_out + 1) >> 1; + memcpy(scratch_space, row, len_in * sizeof(row[0])); + scratch_space[-1] = scratch_space[0]; + scratch_space[len_in] = scratch_space[len_in - 1]; + for (size_t x = 0; x < len_in; x += Lanes(df)) { + auto current = Mul(Load(df, scratch_space + x), threefour); + auto prev = LoadU(df, scratch_space + x - 1); + auto next = LoadU(df, scratch_space + x + 1); + auto left = MulAdd(onefour, prev, current); + auto right = MulAdd(onefour, next, current); + StoreInterleaved(df, left, right, row + x * 2); + } +} + +void Upsample2Vertical(const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row_mid, + const float* JXL_RESTRICT row_bot, + float* JXL_RESTRICT row_out0, + float* JXL_RESTRICT row_out1, size_t len) { + HWY_FULL(float) df; + auto threefour = Set(df, 0.75f); + auto onefour = Set(df, 0.25f); + for (size_t x = 0; x < len; x += Lanes(df)) { + auto it = Load(df, row_top + x); + auto im = Load(df, row_mid + x); + auto ib = Load(df, row_bot + x); + auto im_scaled = Mul(im, threefour); + Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x); + Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jpegli +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jpegli { + +HWY_EXPORT(Upsample2Horizontal); +HWY_EXPORT(Upsample2Vertical); + +void Upsample2Horizontal(float* JXL_RESTRICT row, + float* JXL_RESTRICT scratch_space, size_t len_out) { + return HWY_DYNAMIC_DISPATCH(Upsample2Horizontal)(row, scratch_space, len_out); +} + +void Upsample2Vertical(const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row_mid, + const float* JXL_RESTRICT row_bot, + float* JXL_RESTRICT row_out0, + float* JXL_RESTRICT row_out1, size_t len) { + return HWY_DYNAMIC_DISPATCH(Upsample2Vertical)(row_top, row_mid, row_bot, + row_out0, row_out1, len); +} +} // namespace jpegli +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jpegli/upsample.h b/third-party/libjxl/libjxl/lib/jpegli/upsample.h new file mode 100644 index 0000000000..1a057208dc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jpegli/upsample.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JPEGLI_UPSAMPLE_H_ +#define LIB_JPEGLI_UPSAMPLE_H_ + +#include + +#include "lib/jxl/base/compiler_specific.h" + +namespace jpegli { + +void Upsample2Horizontal(float* JXL_RESTRICT row, + float* JXL_RESTRICT scratch_space, size_t len_out); + +void Upsample2Vertical(const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row_mid, + const float* JXL_RESTRICT row_bot, + float* JXL_RESTRICT row_out0, + float* JXL_RESTRICT row_out1, size_t len); + +} // namespace jpegli + +#endif // LIB_JPEGLI_UPSAMPLE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl.cmake b/third-party/libjxl/libjxl/lib/jxl.cmake new file mode 100644 index 0000000000..2464383288 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl.cmake @@ -0,0 +1,325 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include(compatibility.cmake) +include(jxl_lists.cmake) + +if (JPEGXL_ENABLE_TOOLS OR JPEGXL_ENABLE_DEVTOOLS OR JPEGXL_ENABLE_BOXES) +list(APPEND JPEGXL_INTERNAL_DEC_SOURCES ${JPEGXL_INTERNAL_DEC_BOX_SOURCES}) +endif() + +if (JPEGXL_ENABLE_TRANSCODE_JPEG OR JPEGXL_ENABLE_TOOLS OR JPEGXL_ENABLE_DEVTOOLS) +list(APPEND JPEGXL_INTERNAL_DEC_SOURCES ${JPEGXL_INTERNAL_DEC_JPEG_SOURCES}) +endif() + +set_source_files_properties(jxl/enc_fast_lossless.cc PROPERTIES COMPILE_FLAGS -O3) + +set(JPEGXL_DEC_INTERNAL_LIBS + hwy + Threads::Threads + ${ATOMICS_LIBRARIES} +) + +if (JPEGXL_ENABLE_TRANSCODE_JPEG OR JPEGXL_ENABLE_BOXES) +list(APPEND JPEGXL_DEC_INTERNAL_LIBS brotlidec brotlicommon) +endif() + +set(JPEGXL_INTERNAL_LIBS + ${JPEGXL_DEC_INTERNAL_LIBS} + brotlienc +) + +if (JPEGXL_ENABLE_SKCMS) + list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_SKCMS=1) + if (JPEGXL_BUNDLE_SKCMS) + list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_BUNDLE_SKCMS=1) + # skcms objects are later added to JPEGXL_INTERNAL_OBJECTS + else () + list(APPEND JPEGXL_INTERNAL_LIBS skcms) + endif () +else () + list(APPEND JPEGXL_INTERNAL_LIBS lcms2) +endif () + +if (JPEGXL_ENABLE_TRANSCODE_JPEG) + list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_TRANSCODE_JPEG=1) +else() + list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_TRANSCODE_JPEG=0) +endif () + +if (JPEGXL_ENABLE_BOXES) + list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_BOXES=1) +else() + list(APPEND JPEGXL_INTERNAL_FLAGS -DJPEGXL_ENABLE_BOXES=0) +endif () + +set(OBJ_COMPILE_DEFINITIONS + JPEGXL_MAJOR_VERSION=${JPEGXL_MAJOR_VERSION} + JPEGXL_MINOR_VERSION=${JPEGXL_MINOR_VERSION} + JPEGXL_PATCH_VERSION=${JPEGXL_PATCH_VERSION} + # Used to determine if we are building the library when defined or just + # including the library when not defined. This is public so libjxl shared + # library gets this define too. + JXL_INTERNAL_LIBRARY_BUILD +) + +# Generate version.h +configure_file("jxl/version.h.in" "include/jxl/version.h") + +# Headers for exporting/importing public headers +include(GenerateExportHeader) + +# CMake does not allow generate_export_header for INTERFACE library, so we +# add this stub library just for file generation. +add_library(jxl_export OBJECT ${JPEGXL_INTERNAL_PUBLIC_HEADERS}) +set_target_properties(jxl_export PROPERTIES + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 + DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD + LINKER_LANGUAGE CXX +) +generate_export_header(jxl_export + BASE_NAME JXL + EXPORT_FILE_NAME include/jxl/jxl_export.h) +# Place all public headers in a single directory. +foreach(path ${JPEGXL_INTERNAL_PUBLIC_HEADERS}) + configure_file( + ${path} + ${path} + COPYONLY + ) +endforeach() + +add_library(jxl_includes INTERFACE) +target_include_directories(jxl_includes SYSTEM INTERFACE + "$" +) +add_dependencies(jxl_includes jxl_export) + +# Base headers / utilities. +add_library(jxl_base-obj OBJECT ${JPEGXL_INTERNAL_BASE_SOURCES}) +target_compile_options(jxl_base-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS}) +target_compile_options(jxl_base-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS}) +set_property(TARGET jxl_base-obj PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(jxl_base-obj PUBLIC + ${PROJECT_SOURCE_DIR} + ${JXL_HWY_INCLUDE_DIRS} +) + +jxl_link_libraries(jxl_base-obj jxl_includes) + +# Decoder-only object library +add_library(jxl_dec-obj OBJECT ${JPEGXL_INTERNAL_DEC_SOURCES}) +target_compile_options(jxl_dec-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS}) +target_compile_options(jxl_dec-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS}) +set_property(TARGET jxl_dec-obj PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(jxl_dec-obj PUBLIC + "$" + "${JXL_HWY_INCLUDE_DIRS}" + "$>" +) +target_compile_definitions(jxl_dec-obj PUBLIC + ${OBJ_COMPILE_DEFINITIONS} +) +jxl_link_libraries(jxl_dec-obj jxl_base-obj) + +# Object library. This is used to hold the set of objects and properties. +add_library(jxl_enc-obj OBJECT ${JPEGXL_INTERNAL_ENC_SOURCES}) +target_compile_options(jxl_enc-obj PRIVATE ${JPEGXL_INTERNAL_FLAGS}) +target_compile_options(jxl_enc-obj PUBLIC ${JPEGXL_COVERAGE_FLAGS}) +set_property(TARGET jxl_enc-obj PROPERTY POSITION_INDEPENDENT_CODE ON) +target_include_directories(jxl_enc-obj PUBLIC + ${PROJECT_SOURCE_DIR} + ${JXL_HWY_INCLUDE_DIRS} + $ +) +target_compile_definitions(jxl_enc-obj PUBLIC + ${OBJ_COMPILE_DEFINITIONS} +) +jxl_link_libraries(jxl_enc-obj jxl_base-obj) + +#TODO(lode): don't depend on CMS for the core library +if (JPEGXL_ENABLE_SKCMS) + target_include_directories(jxl_enc-obj PRIVATE + $ + ) +else () + target_include_directories(jxl_enc-obj PRIVATE + $ + ) +endif () + +set_target_properties(jxl_dec-obj PROPERTIES + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 + DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD +) + +set_target_properties(jxl_enc-obj PROPERTIES + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 + DEFINE_SYMBOL JXL_INTERNAL_LIBRARY_BUILD +) + +# Private static library. This exposes all the internal functions and is used +# for tests. +add_library(jxl_dec-static STATIC + $ + $ +) +target_link_libraries(jxl_dec-static + PUBLIC ${JPEGXL_COVERAGE_FLAGS} ${JPEGXL_DEC_INTERNAL_LIBS} jxl_includes) + +# The list of objects in the static and shared libraries. +set(JPEGXL_INTERNAL_OBJECTS + $ + $ + $ +) +if (JPEGXL_ENABLE_SKCMS AND JPEGXL_BUNDLE_SKCMS) + list(APPEND JPEGXL_INTERNAL_OBJECTS $) +endif() + +# Private static library. This exposes all the internal functions and is used +# for tests. +# TODO(lode): once the source files are correctly split so that it is possible +# to do, remove $ here and depend on jxl_dec-static +add_library(jxl-static STATIC ${JPEGXL_INTERNAL_OBJECTS}) +target_link_libraries(jxl-static + PUBLIC ${JPEGXL_COVERAGE_FLAGS} ${JPEGXL_INTERNAL_LIBS} jxl_includes) +target_include_directories(jxl-static PUBLIC + "$") + +# JXL_EXPORT is defined to "__declspec(dllimport)" automatically by CMake +# in Windows builds when including headers from the C API and compiling from +# outside the jxl library. This is required when using the shared library, +# however in windows this causes the function to not be found when linking +# against the static library. This define JXL_EXPORT= here forces it to not +# use dllimport in tests and other tools that require the static library. +target_compile_definitions(jxl-static INTERFACE -DJXL_EXPORT=) +target_compile_definitions(jxl_dec-static INTERFACE -DJXL_EXPORT=) + +# TODO(deymo): Move TCMalloc linkage to the tools/ directory since the library +# shouldn't do any allocs anyway. +if(JPEGXL_ENABLE_TCMALLOC) + pkg_check_modules(TCMallocMinimal REQUIRED IMPORTED_TARGET + libtcmalloc_minimal) + # tcmalloc 2.8 has concurrency issues that makes it sometimes return nullptr + # for large allocs. See https://github.com/gperftools/gperftools/issues/1204 + # for details. + if(TCMallocMinimal_VERSION VERSION_EQUAL 2.8) + message(FATAL_ERROR + "tcmalloc version 2.8 has a concurrency bug. You have installed " + "version ${TCMallocMinimal_VERSION}, please either downgrade tcmalloc " + "to version 2.7, upgrade to 2.8.1 or newer or pass " + "-DJPEGXL_ENABLE_TCMALLOC=OFF to jpeg-xl cmake line. See the following " + "bug for details:\n" + " https://github.com/gperftools/gperftools/issues/1204\n") + endif() + target_link_libraries(jxl-static PUBLIC PkgConfig::TCMallocMinimal) +endif() # JPEGXL_ENABLE_TCMALLOC + +# Install the static library too, but as jxl.a file without the -static except +# in Windows. +if (NOT WIN32 OR MINGW) + set_target_properties(jxl-static PROPERTIES OUTPUT_NAME "jxl") + set_target_properties(jxl_dec-static PROPERTIES OUTPUT_NAME "jxl_dec") +endif() +install(TARGETS jxl-static DESTINATION ${CMAKE_INSTALL_LIBDIR}) +install(TARGETS jxl_dec-static DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +if (BUILD_SHARED_LIBS) + +# Public shared library. +add_library(jxl SHARED ${JPEGXL_INTERNAL_OBJECTS}) +strip_static(JPEGXL_INTERNAL_SHARED_LIBS JPEGXL_INTERNAL_LIBS) +target_link_libraries(jxl PUBLIC ${JPEGXL_COVERAGE_FLAGS} jxl_includes) +target_link_libraries(jxl PRIVATE ${JPEGXL_INTERNAL_SHARED_LIBS}) +# Shared library include path contains only the "include/" paths. +set_target_properties(jxl PROPERTIES + VERSION ${JPEGXL_LIBRARY_VERSION} + SOVERSION ${JPEGXL_LIBRARY_SOVERSION} + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + +# Public shared decoder library. +add_library(jxl_dec SHARED $ $) +strip_static(JPEGXL_DEC_INTERNAL_SHARED_LIBS JPEGXL_DEC_INTERNAL_LIBS) +target_link_libraries(jxl_dec PUBLIC ${JPEGXL_COVERAGE_FLAGS} jxl_includes) +target_link_libraries(jxl_dec PRIVATE ${JPEGXL_DEC_INTERNAL_SHARED_LIBS}) +# Shared library include path contains only the "include/" paths. +set_target_properties(jxl_dec PROPERTIES + VERSION ${JPEGXL_LIBRARY_VERSION} + SOVERSION ${JPEGXL_LIBRARY_SOVERSION} + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + +# Check whether the linker support excluding libs +set(LINKER_EXCLUDE_LIBS_FLAG "-Wl,--exclude-libs=ALL") +include(CheckCSourceCompiles) +list(APPEND CMAKE_EXE_LINKER_FLAGS ${LINKER_EXCLUDE_LIBS_FLAG}) +check_c_source_compiles("int main(){return 0;}" LINKER_SUPPORT_EXCLUDE_LIBS) +list(REMOVE_ITEM CMAKE_EXE_LINKER_FLAGS ${LINKER_EXCLUDE_LIBS_FLAG}) + +# Add a jxl.version file as a version script to tag symbols with the +# appropriate version number. This script is also used to limit what's exposed +# in the shared library from the static dependencies bundled here. +foreach(target IN ITEMS jxl jxl_dec) + set_target_properties(${target} PROPERTIES + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version) + if(APPLE) + set_property(TARGET ${target} APPEND_STRING PROPERTY + LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl_osx.syms") + elseif(WIN32) + # Nothing needed here, we use __declspec(dllexport) (jxl_export.h) + else() + set_property(TARGET ${target} APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version") + endif() # APPLE + # This hides the default visibility symbols from static libraries bundled into + # the shared library. In particular this prevents exposing symbols from hwy + # and skcms in the shared library. + if(LINKER_SUPPORT_EXCLUDE_LIBS) + set_property(TARGET ${target} APPEND_STRING PROPERTY + LINK_FLAGS " ${LINKER_EXCLUDE_LIBS_FLAG}") + endif() +endforeach() + +# Only install libjxl shared library. The libjxl_dec is not installed since it +# contains symbols also in libjxl which would conflict if programs try to use +# both. +install(TARGETS jxl + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +else() +add_library(jxl ALIAS jxl-static) +add_library(jxl_dec ALIAS jxl_dec-static) +endif() # BUILD_SHARED_LIBS + +# Add a pkg-config file for libjxl. +set(JPEGXL_LIBRARY_REQUIRES + "libhwy libbrotlienc libbrotlidec") +if(NOT JPEGXL_ENABLE_SKCMS) + set(JPEGXL_LIBRARY_REQUIRES "${JPEGXL_LIBRARY_REQUIRES} lcms2") +endif() + +# Allow adding prefix if CMAKE_INSTALL_INCLUDEDIR not absolute. +if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") + set(PKGCONFIG_TARGET_INCLUDES "${CMAKE_INSTALL_INCLUDEDIR}") +else() + set(PKGCONFIG_TARGET_INCLUDES "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +endif() +# Allow adding prefix if CMAKE_INSTALL_LIBDIR not absolute. +if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") + set(PKGCONFIG_TARGET_LIBS "${CMAKE_INSTALL_LIBDIR}") +else() + set(PKGCONFIG_TARGET_LIBS "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") +endif() + +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/jxl/libjxl.pc.in" + "libjxl.pc" @ONLY) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libjxl.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_context.h b/third-party/libjxl/libjxl/lib/jxl/ac_context.h new file mode 100644 index 0000000000..a2b9e046d1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ac_context.h @@ -0,0 +1,149 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_AC_CONTEXT_H_ +#define LIB_JXL_AC_CONTEXT_H_ + +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" + +namespace jxl { + +// Block context used for scanning order, number of non-zeros, AC coefficients. +// Equal to the channel. +constexpr uint32_t kDCTOrderContextStart = 0; + +// The number of predicted nonzeros goes from 0 to 1008. We use +// ceil(log2(predicted+1)) as a context for the number of nonzeros, so from 0 to +// 10, inclusive. +constexpr uint32_t kNonZeroBuckets = 37; + +static const uint16_t kCoeffFreqContext[64] = { + 0xBAD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, + 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, +}; + +static const uint16_t kCoeffNumNonzeroContext[64] = { + 0xBAD, 0, 31, 62, 62, 93, 93, 93, 93, 123, 123, 123, 123, + 152, 152, 152, 152, 152, 152, 152, 152, 180, 180, 180, 180, 180, + 180, 180, 180, 180, 180, 180, 180, 206, 206, 206, 206, 206, 206, + 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, + 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, 206, +}; + +// Supremum of ZeroDensityContext(x, y) + 1, when x + y < 64. +constexpr int kZeroDensityContextCount = 458; +// Supremum of ZeroDensityContext(x, y) + 1. +constexpr int kZeroDensityContextLimit = 474; + +/* This function is used for entropy-sources pre-clustering. + * + * Ideally, each combination of |nonzeros_left| and |k| should go to its own + * bucket; but it implies (64 * 63 / 2) == 2016 buckets. If there is other + * dimension (e.g. block context), then number of primary clusters becomes too + * big. + * + * To solve this problem, |nonzeros_left| and |k| values are clustered. It is + * known that their sum is at most 64, consequently, the total number buckets + * is at most A(64) * B(64). + */ +// TODO(user): investigate, why disabling pre-clustering makes entropy code +// less dense. Perhaps we would need to add HQ clustering algorithm that would +// be able to squeeze better by spending more CPU cycles. +static JXL_INLINE size_t ZeroDensityContext(size_t nonzeros_left, size_t k, + size_t covered_blocks, + size_t log2_covered_blocks, + size_t prev) { + JXL_DASSERT((1u << log2_covered_blocks) == covered_blocks); + nonzeros_left = (nonzeros_left + covered_blocks - 1) >> log2_covered_blocks; + k >>= log2_covered_blocks; + JXL_DASSERT(k > 0); + JXL_DASSERT(k < 64); + JXL_DASSERT(nonzeros_left > 0); + // Asserting nonzeros_left + k < 65 here causes crashes in debug mode with + // invalid input, since the (hot) decoding loop does not check this condition. + // As no out-of-bound memory reads are issued even if that condition is + // broken, we check this simpler condition which holds anyway. The decoder + // will still mark a file in which that condition happens as not valid at the + // end of the decoding loop, as `nzeros` will not be `0`. + JXL_DASSERT(nonzeros_left < 64); + return (kCoeffNumNonzeroContext[nonzeros_left] + kCoeffFreqContext[k]) * 2 + + prev; +} + +struct BlockCtxMap { + std::vector dc_thresholds[3]; + std::vector qf_thresholds; + std::vector ctx_map; + size_t num_ctxs, num_dc_ctxs; + + static constexpr uint8_t kDefaultCtxMap[] = { + // Default ctx map clusters all the large transforms together. + 0, 1, 2, 2, 3, 3, 4, 5, 6, 6, 6, 6, 6, // + 7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, // + 7, 8, 9, 9, 10, 11, 12, 13, 14, 14, 14, 14, 14, // + }; + static_assert(3 * kNumOrders == + sizeof(kDefaultCtxMap) / sizeof *kDefaultCtxMap, + "Update default context map"); + + size_t Context(int dc_idx, uint32_t qf, size_t ord, size_t c) const { + size_t qf_idx = 0; + for (uint32_t t : qf_thresholds) { + if (qf > t) qf_idx++; + } + size_t idx = c < 2 ? c ^ 1 : 2; + idx = idx * kNumOrders + ord; + idx = idx * (qf_thresholds.size() + 1) + qf_idx; + idx = idx * num_dc_ctxs + dc_idx; + return ctx_map[idx]; + } + // Non-zero context is based on number of non-zeros and block context. + // For better clustering, contexts with same number of non-zeros are grouped. + constexpr uint32_t ZeroDensityContextsOffset(uint32_t block_ctx) const { + return num_ctxs * kNonZeroBuckets + kZeroDensityContextCount * block_ctx; + } + + // Context map for AC coefficients consists of 2 blocks: + // |num_ctxs x : context for number of non-zeros in the block + // kNonZeroBuckets| computed from block context and predicted + // value (based top and left values) + // |num_ctxs x : context for AC coefficient symbols, + // kZeroDensityContextCount| computed from block context, + // number of non-zeros left and + // index in scan order + constexpr uint32_t NumACContexts() const { + return num_ctxs * (kNonZeroBuckets + kZeroDensityContextCount); + } + + // Non-zero context is based on number of non-zeros and block context. + // For better clustering, contexts with same number of non-zeros are grouped. + inline uint32_t NonZeroContext(uint32_t non_zeros, uint32_t block_ctx) const { + uint32_t ctx; + if (non_zeros >= 64) non_zeros = 64; + if (non_zeros < 8) { + ctx = non_zeros; + } else { + ctx = 4 + non_zeros / 2; + } + return ctx * num_ctxs + block_ctx; + } + + BlockCtxMap() { + ctx_map.assign(std::begin(kDefaultCtxMap), std::end(kDefaultCtxMap)); + num_ctxs = *std::max_element(ctx_map.begin(), ctx_map.end()) + 1; + num_dc_ctxs = 1; + } +}; + +} // namespace jxl + +#endif // LIB_JXL_AC_CONTEXT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_strategy.cc b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.cc new file mode 100644 index 0000000000..3dda5df7a6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.cc @@ -0,0 +1,107 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/ac_strategy.h" + +#include + +#include +#include // iota +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +// Tries to generalize zig-zag order to non-square blocks. Surprisingly, in +// square block frequency along the (i + j == const) diagonals is roughly the +// same. For historical reasons, consecutive diagonals are traversed +// in alternating directions - so called "zig-zag" (or "snake") order. +template +static void CoeffOrderAndLut(AcStrategy acs, coeff_order_t* out) { + size_t cx = acs.covered_blocks_x(); + size_t cy = acs.covered_blocks_y(); + CoefficientLayout(&cy, &cx); + + // CoefficientLayout ensures cx >= cy. + // We compute the zigzag order for a cx x cx block, then discard all the + // lines that are not multiple of the ratio between cx and cy. + size_t xs = cx / cy; + size_t xsm = xs - 1; + size_t xss = CeilLog2Nonzero(xs); + // First half of the block + size_t cur = cx * cy; + for (size_t i = 0; i < cx * kBlockDim; i++) { + for (size_t j = 0; j <= i; j++) { + size_t x = j; + size_t y = i - j; + if (i % 2) std::swap(x, y); + if ((y & xsm) != 0) continue; + y >>= xss; + size_t val = 0; + if (x < cx && y < cy) { + val = y * cx + x; + } else { + val = cur++; + } + if (is_lut) { + out[y * cx * kBlockDim + x] = val; + } else { + out[val] = y * cx * kBlockDim + x; + } + } + } + // Second half + for (size_t ip = cx * kBlockDim - 1; ip > 0; ip--) { + size_t i = ip - 1; + for (size_t j = 0; j <= i; j++) { + size_t x = cx * kBlockDim - 1 - (i - j); + size_t y = cx * kBlockDim - 1 - j; + if (i % 2) std::swap(x, y); + if ((y & xsm) != 0) continue; + y >>= xss; + size_t val = cur++; + if (is_lut) { + out[y * cx * kBlockDim + x] = val; + } else { + out[val] = y * cx * kBlockDim + x; + } + } + } +} + +void AcStrategy::ComputeNaturalCoeffOrder(coeff_order_t* order) const { + CoeffOrderAndLut(*this, order); +} +void AcStrategy::ComputeNaturalCoeffOrderLut(coeff_order_t* lut) const { + CoeffOrderAndLut(*this, lut); +} + +// These definitions are needed before C++17. +constexpr size_t AcStrategy::kMaxCoeffBlocks; +constexpr size_t AcStrategy::kMaxBlockDim; +constexpr size_t AcStrategy::kMaxCoeffArea; + +AcStrategyImage::AcStrategyImage(size_t xsize, size_t ysize) + : layers_(xsize, ysize) { + row_ = layers_.Row(0); + stride_ = layers_.PixelsPerRow(); +} + +size_t AcStrategyImage::CountBlocks(AcStrategy::Type type) const { + size_t ret = 0; + for (size_t y = 0; y < layers_.ysize(); y++) { + const uint8_t* JXL_RESTRICT row = layers_.ConstRow(y); + for (size_t x = 0; x < layers_.xsize(); x++) { + if (row[x] == ((static_cast(type) << 1) | 1)) ret++; + } + } + return ret; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_strategy.h b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.h new file mode 100644 index 0000000000..7d21167e6e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ac_strategy.h @@ -0,0 +1,261 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_AC_STRATEGY_H_ +#define LIB_JXL_AC_STRATEGY_H_ + +#include +#include + +#include // kMaxVectorSize + +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image_ops.h" + +// Defines the different kinds of transforms, and heuristics to choose between +// them. +// `AcStrategy` represents what transform should be used, and which sub-block of +// that transform we are currently in. Note that DCT4x4 is applied on all four +// 4x4 sub-blocks of an 8x8 block. +// `AcStrategyImage` defines which strategy should be used for each 8x8 block +// of the image. The highest 4 bits represent the strategy to be used, the +// lowest 4 represent the index of the block inside that strategy. + +namespace jxl { + +class AcStrategy { + public: + // Extremal values for the number of blocks/coefficients of a single strategy. + static constexpr size_t kMaxCoeffBlocks = 32; + static constexpr size_t kMaxBlockDim = kBlockDim * kMaxCoeffBlocks; + // Maximum number of coefficients in a block. Guaranteed to be a multiple of + // the vector size. + static constexpr size_t kMaxCoeffArea = kMaxBlockDim * kMaxBlockDim; + static_assert((kMaxCoeffArea * sizeof(float)) % hwy::kMaxVectorSize == 0, + "Coefficient area is not a multiple of vector size"); + + // Raw strategy types. + enum Type : uint32_t { + // Regular block size DCT + DCT = 0, + // Encode pixels without transforming + IDENTITY = 1, + // Use 2-by-2 DCT + DCT2X2 = 2, + // Use 4-by-4 DCT + DCT4X4 = 3, + // Use 16-by-16 DCT + DCT16X16 = 4, + // Use 32-by-32 DCT + DCT32X32 = 5, + // Use 16-by-8 DCT + DCT16X8 = 6, + // Use 8-by-16 DCT + DCT8X16 = 7, + // Use 32-by-8 DCT + DCT32X8 = 8, + // Use 8-by-32 DCT + DCT8X32 = 9, + // Use 32-by-16 DCT + DCT32X16 = 10, + // Use 16-by-32 DCT + DCT16X32 = 11, + // 4x8 and 8x4 DCT + DCT4X8 = 12, + DCT8X4 = 13, + // Corner-DCT. + AFV0 = 14, + AFV1 = 15, + AFV2 = 16, + AFV3 = 17, + // Larger DCTs + DCT64X64 = 18, + DCT64X32 = 19, + DCT32X64 = 20, + DCT128X128 = 21, + DCT128X64 = 22, + DCT64X128 = 23, + DCT256X256 = 24, + DCT256X128 = 25, + DCT128X256 = 26, + // Marker for num of valid strategies. + kNumValidStrategies + }; + + static constexpr uint32_t TypeBit(const Type type) { + return 1u << static_cast(type); + } + + // Returns true if this block is the first 8x8 block (i.e. top-left) of a + // possibly multi-block strategy. + JXL_INLINE bool IsFirstBlock() const { return is_first_; } + + JXL_INLINE bool IsMultiblock() const { + constexpr uint32_t bits = + TypeBit(Type::DCT16X16) | TypeBit(Type::DCT32X32) | + TypeBit(Type::DCT16X8) | TypeBit(Type::DCT8X16) | + TypeBit(Type::DCT32X8) | TypeBit(Type::DCT8X32) | + TypeBit(Type::DCT16X32) | TypeBit(Type::DCT32X16) | + TypeBit(Type::DCT32X64) | TypeBit(Type::DCT64X32) | + TypeBit(Type::DCT64X64) | TypeBit(DCT64X128) | TypeBit(DCT128X64) | + TypeBit(DCT128X128) | TypeBit(DCT128X256) | TypeBit(DCT256X128) | + TypeBit(DCT256X256); + JXL_DASSERT(Strategy() < kNumValidStrategies); + return ((1u << static_cast(Strategy())) & bits) != 0; + } + + // Returns the raw strategy value. Should only be used for tokenization. + JXL_INLINE uint8_t RawStrategy() const { + return static_cast(strategy_); + } + + JXL_INLINE Type Strategy() const { return strategy_; } + + // Inverse check + static JXL_INLINE constexpr bool IsRawStrategyValid(int raw_strategy) { + return raw_strategy < static_cast(kNumValidStrategies) && + raw_strategy >= 0; + } + static JXL_INLINE AcStrategy FromRawStrategy(uint8_t raw_strategy) { + return FromRawStrategy(static_cast(raw_strategy)); + } + static JXL_INLINE AcStrategy FromRawStrategy(Type raw_strategy) { + JXL_DASSERT(IsRawStrategyValid(static_cast(raw_strategy))); + return AcStrategy(raw_strategy, /*is_first=*/true); + } + + // "Natural order" means the order of increasing of "anisotropic" frequency of + // continuous version of DCT basis. + // Round-trip, for any given strategy s: + // X = NaturalCoeffOrder(s)[NaturalCoeffOrderLutN(s)[X]] + // X = NaturalCoeffOrderLut(s)[NaturalCoeffOrderN(s)[X]] + void ComputeNaturalCoeffOrder(coeff_order_t* order) const; + void ComputeNaturalCoeffOrderLut(coeff_order_t* lut) const; + + // Number of 8x8 blocks that this strategy will cover. 0 for non-top-left + // blocks inside a multi-block transform. + JXL_INLINE size_t covered_blocks_x() const { + static constexpr uint8_t kLut[] = {1, 1, 1, 1, 2, 4, 1, 2, 1, + 4, 2, 4, 1, 1, 1, 1, 1, 1, + 8, 4, 8, 16, 8, 16, 32, 16, 32}; + static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies, + "Update LUT"); + return kLut[size_t(strategy_)]; + } + + JXL_INLINE size_t covered_blocks_y() const { + static constexpr uint8_t kLut[] = {1, 1, 1, 1, 2, 4, 2, 1, 4, + 1, 4, 2, 1, 1, 1, 1, 1, 1, + 8, 8, 4, 16, 16, 8, 32, 32, 16}; + static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies, + "Update LUT"); + return kLut[size_t(strategy_)]; + } + + JXL_INLINE size_t log2_covered_blocks() const { + static constexpr uint8_t kLut[] = {0, 0, 0, 0, 2, 4, 1, 1, 2, + 2, 3, 3, 0, 0, 0, 0, 0, 0, + 6, 5, 5, 8, 7, 7, 10, 9, 9}; + static_assert(sizeof(kLut) / sizeof(*kLut) == kNumValidStrategies, + "Update LUT"); + return kLut[size_t(strategy_)]; + } + + private: + friend class AcStrategyRow; + JXL_INLINE AcStrategy(Type strategy, bool is_first) + : strategy_(strategy), is_first_(is_first) { + JXL_DASSERT(IsMultiblock() || is_first == true); + } + + Type strategy_; + bool is_first_; +}; + +// Class to use a certain row of the AC strategy. +class AcStrategyRow { + public: + explicit AcStrategyRow(const uint8_t* row) : row_(row) {} + AcStrategy operator[](size_t x) const { + return AcStrategy(static_cast(row_[x] >> 1), row_[x] & 1); + } + + private: + const uint8_t* JXL_RESTRICT row_; +}; + +class AcStrategyImage { + public: + AcStrategyImage() = default; + AcStrategyImage(size_t xsize, size_t ysize); + AcStrategyImage(AcStrategyImage&&) = default; + AcStrategyImage& operator=(AcStrategyImage&&) = default; + + void FillDCT8(const Rect& rect) { + FillPlane((static_cast(AcStrategy::Type::DCT) << 1) | 1, + &layers_, rect); + } + void FillDCT8() { FillDCT8(Rect(layers_)); } + + void FillInvalid() { FillImage(INVALID, &layers_); } + + void Set(size_t x, size_t y, AcStrategy::Type type) { +#if JXL_ENABLE_ASSERT + AcStrategy acs = AcStrategy::FromRawStrategy(type); +#endif // JXL_ENABLE_ASSERT + JXL_ASSERT(y + acs.covered_blocks_y() <= layers_.ysize()); + JXL_ASSERT(x + acs.covered_blocks_x() <= layers_.xsize()); + JXL_CHECK(SetNoBoundsCheck(x, y, type, /*check=*/false)); + } + + Status SetNoBoundsCheck(size_t x, size_t y, AcStrategy::Type type, + bool check = true) { + AcStrategy acs = AcStrategy::FromRawStrategy(type); + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + size_t pos = (y + iy) * stride_ + x + ix; + if (check && row_[pos] != INVALID) { + return JXL_FAILURE("Invalid AC strategy: block overlap"); + } + row_[pos] = + (static_cast(type) << 1) | ((iy | ix) == 0 ? 1 : 0); + } + } + return true; + } + + bool IsValid(size_t x, size_t y) { return row_[y * stride_ + x] != INVALID; } + + AcStrategyRow ConstRow(size_t y, size_t x_prefix = 0) const { + return AcStrategyRow(layers_.ConstRow(y) + x_prefix); + } + + AcStrategyRow ConstRow(const Rect& rect, size_t y) const { + return ConstRow(rect.y0() + y, rect.x0()); + } + + size_t PixelsPerRow() const { return layers_.PixelsPerRow(); } + + size_t xsize() const { return layers_.xsize(); } + size_t ysize() const { return layers_.ysize(); } + + // Count the number of blocks of a given type. + size_t CountBlocks(AcStrategy::Type type) const; + + private: + ImageB layers_; + uint8_t* JXL_RESTRICT row_; + size_t stride_; + + // A value that does not represent a valid combined AC strategy + // value. Used as a sentinel. + static constexpr uint8_t INVALID = 0xFF; +}; + +} // namespace jxl + +#endif // LIB_JXL_AC_STRATEGY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/ac_strategy_test.cc b/third-party/libjxl/libjxl/lib/jxl/ac_strategy_test.cc new file mode 100644 index 0000000000..5b46c697f3 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ac_strategy_test.cc @@ -0,0 +1,239 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/ac_strategy.h" + +#include + +#include +#include +#include // HWY_ALIGN_MAX +#include +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/dec_transforms_testonly.h" +#include "lib/jxl/enc_transforms.h" + +namespace jxl { +namespace { + +// Test that DCT -> IDCT is a noop. +class AcStrategyRoundtrip : public ::hwy::TestWithParamTargetAndT { + protected: + void Run() { + const AcStrategy::Type type = static_cast(GetParam()); + const AcStrategy acs = AcStrategy::FromRawStrategy(type); + + auto mem = hwy::AllocateAligned(4 * AcStrategy::kMaxCoeffArea); + float* scratch_space = mem.get(); + float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea; + float* idct = coeffs + AcStrategy::kMaxCoeffArea; + Rng rng(type * 65537 + 13); + + for (size_t j = 0; j < 64; j++) { + size_t i = (acs.log2_covered_blocks() + ? rng.UniformU(0, 64u << acs.log2_covered_blocks()) + : j); + float* input = idct + AcStrategy::kMaxCoeffArea; + std::fill_n(input, AcStrategy::kMaxCoeffArea, 0); + input[i] = 0.2f; + TransformFromPixels(type, input, acs.covered_blocks_x() * 8, coeffs, + scratch_space); + ASSERT_NEAR(coeffs[0], 0.2 / (64 << acs.log2_covered_blocks()), 1e-6) + << " i = " << i; + TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8, + scratch_space); + for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) { + ASSERT_NEAR(idct[j], j == i ? 0.2f : 0, 2e-6) + << "j = " << j << " i = " << i << " acs " << type; + } + } + // Test DC. + std::fill_n(idct, AcStrategy::kMaxCoeffArea, 0); + for (size_t y = 0; y < acs.covered_blocks_y(); y++) { + for (size_t x = 0; x < acs.covered_blocks_x(); x++) { + float* dc = idct + AcStrategy::kMaxCoeffArea; + std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0); + dc[y * acs.covered_blocks_x() * 8 + x] = 0.2; + LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs, + scratch_space); + DCFromLowestFrequencies(type, coeffs, idct, acs.covered_blocks_x() * 8); + std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0); + dc[y * acs.covered_blocks_x() * 8 + x] = 0.2; + for (size_t j = 0; j < 64u << acs.log2_covered_blocks(); j++) { + ASSERT_NEAR(idct[j], dc[j], 1e-6) + << "j = " << j << " x = " << x << " y = " << y << " acs " << type; + } + } + } + } +}; + +HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T( + AcStrategyRoundtrip, + ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies))); + +TEST_P(AcStrategyRoundtrip, Test) { Run(); } + +// Test that DC(2x2) -> DCT coefficients -> IDCT -> downsampled IDCT is a noop. +class AcStrategyRoundtripDownsample + : public ::hwy::TestWithParamTargetAndT { + protected: + void Run() { + const AcStrategy::Type type = static_cast(GetParam()); + const AcStrategy acs = AcStrategy::FromRawStrategy(type); + + auto mem = hwy::AllocateAligned(4 * AcStrategy::kMaxCoeffArea); + float* scratch_space = mem.get(); + float* coeffs = scratch_space + AcStrategy::kMaxCoeffArea; + std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f); + float* idct = coeffs + AcStrategy::kMaxCoeffArea; + Rng rng(type * 65537 + 13); + + for (size_t y = 0; y < acs.covered_blocks_y(); y++) { + for (size_t x = 0; x < acs.covered_blocks_x(); x++) { + if (x > 4 || y > 4) { + if (rng.Bernoulli(0.9f)) continue; + } + float* dc = idct + AcStrategy::kMaxCoeffArea; + std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0); + dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f; + LowestFrequenciesFromDC(type, dc, acs.covered_blocks_x() * 8, coeffs, + scratch_space); + TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8, + scratch_space); + std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0.0f); + std::fill_n(dc, AcStrategy::kMaxCoeffArea, 0); + dc[y * acs.covered_blocks_x() * 8 + x] = 0.2f; + // Downsample + for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) { + for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) { + float sum = 0; + for (size_t iy = 0; iy < 8; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() + + dx * 8 + ix]; + } + } + sum /= 64.0f; + ASSERT_NEAR(sum, dc[dy * 8 * acs.covered_blocks_x() + dx], 1e-6) + << "acs " << type; + } + } + } + } + } +}; + +HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T( + AcStrategyRoundtripDownsample, + ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies))); + +TEST_P(AcStrategyRoundtripDownsample, Test) { Run(); } + +// Test that IDCT(block with zeros in the non-topleft corner) -> downsampled +// IDCT is the same as IDCT -> DC(2x2) of the same block. +class AcStrategyDownsample : public ::hwy::TestWithParamTargetAndT { + protected: + void Run() { + const AcStrategy::Type type = static_cast(GetParam()); + const AcStrategy acs = AcStrategy::FromRawStrategy(type); + size_t cx = acs.covered_blocks_y(); + size_t cy = acs.covered_blocks_x(); + CoefficientLayout(&cy, &cx); + + auto mem = hwy::AllocateAligned(4 * AcStrategy::kMaxCoeffArea); + float* scratch_space = mem.get(); + float* idct = scratch_space + AcStrategy::kMaxCoeffArea; + float* idct_acs_downsampled = idct + AcStrategy::kMaxCoeffArea; + Rng rng(type * 65537 + 13); + + for (size_t y = 0; y < cy; y++) { + for (size_t x = 0; x < cx; x++) { + if (x > 4 || y > 4) { + if (rng.Bernoulli(0.9f)) continue; + } + float* coeffs = idct + AcStrategy::kMaxCoeffArea; + std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0); + coeffs[y * cx * 8 + x] = 0.2f; + TransformToPixels(type, coeffs, idct, acs.covered_blocks_x() * 8, + scratch_space); + std::fill_n(coeffs, AcStrategy::kMaxCoeffArea, 0); + coeffs[y * cx * 8 + x] = 0.2f; + DCFromLowestFrequencies(type, coeffs, idct_acs_downsampled, + acs.covered_blocks_x() * 8); + // Downsample + for (size_t dy = 0; dy < acs.covered_blocks_y(); dy++) { + for (size_t dx = 0; dx < acs.covered_blocks_x(); dx++) { + float sum = 0; + for (size_t iy = 0; iy < 8; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + sum += idct[(dy * 8 + iy) * 8 * acs.covered_blocks_x() + + dx * 8 + ix]; + } + } + sum /= 64; + ASSERT_NEAR( + sum, idct_acs_downsampled[dy * 8 * acs.covered_blocks_x() + dx], + 1e-6) + << " acs " << type; + } + } + } + } + } +}; + +HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T( + AcStrategyDownsample, + ::testing::Range(0, int(AcStrategy::Type::kNumValidStrategies))); + +TEST_P(AcStrategyDownsample, Test) { Run(); } + +class AcStrategyTargetTest : public ::hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(AcStrategyTargetTest); + +TEST_P(AcStrategyTargetTest, RoundtripAFVDCT) { + HWY_ALIGN_MAX float idct[16]; + for (size_t i = 0; i < 16; i++) { + HWY_ALIGN_MAX float pixels[16] = {}; + pixels[i] = 1; + HWY_ALIGN_MAX float coeffs[16] = {}; + + AFVDCT4x4(pixels, coeffs); + AFVIDCT4x4(coeffs, idct); + for (size_t j = 0; j < 16; j++) { + EXPECT_NEAR(idct[j], pixels[j], 1e-6); + } + } +} + +TEST_P(AcStrategyTargetTest, BenchmarkAFV) { + const AcStrategy::Type type = AcStrategy::Type::AFV0; + HWY_ALIGN_MAX float pixels[64] = {1}; + HWY_ALIGN_MAX float coeffs[64] = {}; + HWY_ALIGN_MAX float scratch_space[64] = {}; + for (size_t i = 0; i < 1 << 14; i++) { + TransformToPixels(type, coeffs, pixels, 8, scratch_space); + TransformFromPixels(type, pixels, 8, coeffs, scratch_space); + } + EXPECT_NEAR(pixels[0], 0.0, 1E-6); +} + +TEST_P(AcStrategyTargetTest, BenchmarkAFVDCT) { + HWY_ALIGN_MAX float pixels[64] = {1}; + HWY_ALIGN_MAX float coeffs[64] = {}; + for (size_t i = 0; i < 1 << 14; i++) { + AFVDCT4x4(pixels, coeffs); + AFVIDCT4x4(coeffs, pixels); + } + EXPECT_NEAR(pixels[0], 1.0, 1E-6); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/alpha.cc b/third-party/libjxl/libjxl/lib/jxl/alpha.cc new file mode 100644 index 0000000000..48d7e7ee92 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/alpha.cc @@ -0,0 +1,115 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/alpha.h" + +#include + +#include + +namespace jxl { + +static float Clamp(float x) { return std::max(std::min(1.0f, x), 0.0f); } + +void PerformAlphaBlending(const AlphaBlendingInputLayer& bg, + const AlphaBlendingInputLayer& fg, + const AlphaBlendingOutput& out, size_t num_pixels, + bool alpha_is_premultiplied, bool clamp) { + if (alpha_is_premultiplied) { + for (size_t x = 0; x < num_pixels; ++x) { + float fga = clamp ? Clamp(fg.a[x]) : fg.a[x]; + out.r[x] = (fg.r[x] + bg.r[x] * (1.f - fga)); + out.g[x] = (fg.g[x] + bg.g[x] * (1.f - fga)); + out.b[x] = (fg.b[x] + bg.b[x] * (1.f - fga)); + out.a[x] = (1.f - (1.f - fga) * (1.f - bg.a[x])); + } + } else { + for (size_t x = 0; x < num_pixels; ++x) { + float fga = clamp ? Clamp(fg.a[x]) : fg.a[x]; + const float new_a = 1.f - (1.f - fga) * (1.f - bg.a[x]); + const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f); + out.r[x] = (fg.r[x] * fga + bg.r[x] * bg.a[x] * (1.f - fga)) * rnew_a; + out.g[x] = (fg.g[x] * fga + bg.g[x] * bg.a[x] * (1.f - fga)) * rnew_a; + out.b[x] = (fg.b[x] * fga + bg.b[x] * bg.a[x] * (1.f - fga)) * rnew_a; + out.a[x] = new_a; + } + } +} +void PerformAlphaBlending(const float* bg, const float* bga, const float* fg, + const float* fga, float* out, size_t num_pixels, + bool alpha_is_premultiplied, bool clamp) { + if (bg == bga && fg == fga) { + for (size_t x = 0; x < num_pixels; ++x) { + float fa = clamp ? fga[x] : Clamp(fga[x]); + out[x] = (1.f - (1.f - fa) * (1.f - bga[x])); + } + } else { + if (alpha_is_premultiplied) { + for (size_t x = 0; x < num_pixels; ++x) { + float fa = clamp ? fga[x] : Clamp(fga[x]); + out[x] = (fg[x] + bg[x] * (1.f - fa)); + } + } else { + for (size_t x = 0; x < num_pixels; ++x) { + float fa = clamp ? fga[x] : Clamp(fga[x]); + const float new_a = 1.f - (1.f - fa) * (1.f - bga[x]); + const float rnew_a = (new_a > 0 ? 1.f / new_a : 0.f); + out[x] = (fg[x] * fa + bg[x] * bga[x] * (1.f - fa)) * rnew_a; + } + } + } +} + +void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga, + float* out, size_t num_pixels, bool clamp) { + if (fg == fga) { + memcpy(out, bg, num_pixels * sizeof(*out)); + } else if (clamp) { + for (size_t x = 0; x < num_pixels; ++x) { + out[x] = bg[x] + fg[x] * Clamp(fga[x]); + } + } else { + for (size_t x = 0; x < num_pixels; ++x) { + out[x] = bg[x] + fg[x] * fga[x]; + } + } +} + +void PerformMulBlending(const float* bg, const float* fg, float* out, + size_t num_pixels, bool clamp) { + if (clamp) { + for (size_t x = 0; x < num_pixels; ++x) { + out[x] = bg[x] * Clamp(fg[x]); + } + } else { + for (size_t x = 0; x < num_pixels; ++x) { + out[x] = bg[x] * fg[x]; + } + } +} + +void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g, + float* JXL_RESTRICT b, const float* JXL_RESTRICT a, + size_t num_pixels) { + for (size_t x = 0; x < num_pixels; ++x) { + const float multiplier = std::max(kSmallAlpha, a[x]); + r[x] *= multiplier; + g[x] *= multiplier; + b[x] *= multiplier; + } +} + +void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g, + float* JXL_RESTRICT b, const float* JXL_RESTRICT a, + size_t num_pixels) { + for (size_t x = 0; x < num_pixels; ++x) { + const float multiplier = 1.f / std::max(kSmallAlpha, a[x]); + r[x] *= multiplier; + g[x] *= multiplier; + b[x] *= multiplier; + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/alpha.h b/third-party/libjxl/libjxl/lib/jxl/alpha.h new file mode 100644 index 0000000000..efb76c800f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/alpha.h @@ -0,0 +1,66 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ALPHA_H_ +#define LIB_JXL_ALPHA_H_ + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +// A very small value to avoid divisions by zero when converting to +// unpremultiplied alpha. Page 21 of the technical introduction to OpenEXR +// (https://www.openexr.com/documentation/TechnicalIntroduction.pdf) recommends +// "a power of two" that is "less than half of the smallest positive 16-bit +// floating-point value". That smallest value happens to be the denormal number +// 2^-24, so 2^-26 should be a good choice. +static constexpr float kSmallAlpha = 1.f / (1u << 26u); + +struct AlphaBlendingInputLayer { + const float* r; + const float* g; + const float* b; + const float* a; +}; + +struct AlphaBlendingOutput { + float* r; + float* g; + float* b; + float* a; +}; + +// Note: The pointers in `out` are allowed to alias those in `bg` or `fg`. +// No pointer shall be null. +void PerformAlphaBlending(const AlphaBlendingInputLayer& bg, + const AlphaBlendingInputLayer& fg, + const AlphaBlendingOutput& out, size_t num_pixels, + bool alpha_is_premultiplied, bool clamp); +// Single plane alpha blending +void PerformAlphaBlending(const float* bg, const float* bga, const float* fg, + const float* fga, float* out, size_t num_pixels, + bool alpha_is_premultiplied, bool clamp); + +void PerformAlphaWeightedAdd(const float* bg, const float* fg, const float* fga, + float* out, size_t num_pixels, bool clamp); + +void PerformMulBlending(const float* bg, const float* fg, float* out, + size_t num_pixels, bool clamp); + +void PremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g, + float* JXL_RESTRICT b, const float* JXL_RESTRICT a, + size_t num_pixels); +void UnpremultiplyAlpha(float* JXL_RESTRICT r, float* JXL_RESTRICT g, + float* JXL_RESTRICT b, const float* JXL_RESTRICT a, + size_t num_pixels); + +} // namespace jxl + +#endif // LIB_JXL_ALPHA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/alpha_test.cc b/third-party/libjxl/libjxl/lib/jxl/alpha_test.cc new file mode 100644 index 0000000000..ddafd829ec --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/alpha_test.cc @@ -0,0 +1,134 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/alpha.h" + +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::FloatNear; + +TEST(AlphaTest, BlendingWithNonPremultiplied) { + const float bg_rgb[3] = {100, 110, 120}; + const float bg_a = 180.f / 255; + const float fg_rgb[3] = {25, 21, 23}; + const float fg_a = 15420.f / 65535; + const float fg_a2 = 2.0f; + float out_rgb[3]; + float out_a; + PerformAlphaBlending( + /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a}, + /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a}, + /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1, + /*alpha_is_premultiplied=*/false, /*clamp=*/false); + EXPECT_THAT(out_rgb, + ElementsAre(FloatNear(77.2f, .05f), FloatNear(83.0f, .05f), + FloatNear(90.6f, .05f))); + EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5); + PerformAlphaBlending( + /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a}, + /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2}, + /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1, + /*alpha_is_premultiplied=*/false, /*clamp=*/true); + EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f), + FloatNear(fg_rgb[1], .05f), + FloatNear(fg_rgb[2], .05f))); + EXPECT_NEAR(out_a, 1.0f, 1e-5); +} + +TEST(AlphaTest, BlendingWithPremultiplied) { + const float bg_rgb[3] = {100, 110, 120}; + const float bg_a = 180.f / 255; + const float fg_rgb[3] = {25, 21, 23}; + const float fg_a = 15420.f / 65535; + const float fg_a2 = 2.0f; + float out_rgb[3]; + float out_a; + PerformAlphaBlending( + /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a}, + /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a}, + /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1, + /*alpha_is_premultiplied=*/true, /*clamp=*/false); + EXPECT_THAT(out_rgb, + ElementsAre(FloatNear(101.5f, .05f), FloatNear(105.1f, .05f), + FloatNear(114.8f, .05f))); + EXPECT_NEAR(out_a, 3174.f / 4095, 1e-5); + PerformAlphaBlending( + /*bg=*/{&bg_rgb[0], &bg_rgb[1], &bg_rgb[2], &bg_a}, + /*fg=*/{&fg_rgb[0], &fg_rgb[1], &fg_rgb[2], &fg_a2}, + /*out=*/{&out_rgb[0], &out_rgb[1], &out_rgb[2], &out_a}, 1, + /*alpha_is_premultiplied=*/true, /*clamp=*/true); + EXPECT_THAT(out_rgb, ElementsAre(FloatNear(fg_rgb[0], .05f), + FloatNear(fg_rgb[1], .05f), + FloatNear(fg_rgb[2], .05f))); + EXPECT_NEAR(out_a, 1.0f, 1e-5); +} + +TEST(AlphaTest, Mul) { + const float bg = 100; + const float fg = 25; + float out; + PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/false); + EXPECT_THAT(out, FloatNear(fg * bg, .05f)); + PerformMulBlending(&bg, &fg, &out, 1, /*clamp=*/true); + EXPECT_THAT(out, FloatNear(bg, .05f)); +} + +TEST(AlphaTest, PremultiplyAndUnpremultiply) { + const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f}; + float r[] = {120, 130, 140, 150}; + float g[] = {124, 134, 144, 154}; + float b[] = {127, 137, 147, 157}; + + PremultiplyAlpha(r, g, b, alpha, 4); + EXPECT_THAT( + r, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(130 * 63.f / 255, 1e-5f), + FloatNear(140 * 127.f / 255, 1e-5f), 150)); + EXPECT_THAT( + g, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(134 * 63.f / 255, 1e-5f), + FloatNear(144 * 127.f / 255, 1e-5f), 154)); + EXPECT_THAT( + b, ElementsAre(FloatNear(0.f, 1e-5f), FloatNear(137 * 63.f / 255, 1e-5f), + FloatNear(147 * 127.f / 255, 1e-5f), 157)); + + UnpremultiplyAlpha(r, g, b, alpha, 4); + EXPECT_THAT(r, ElementsAre(FloatNear(120, 1e-4f), FloatNear(130, 1e-4f), + FloatNear(140, 1e-4f), FloatNear(150, 1e-4f))); + EXPECT_THAT(g, ElementsAre(FloatNear(124, 1e-4f), FloatNear(134, 1e-4f), + FloatNear(144, 1e-4f), FloatNear(154, 1e-4f))); + EXPECT_THAT(b, ElementsAre(FloatNear(127, 1e-4f), FloatNear(137, 1e-4f), + FloatNear(147, 1e-4f), FloatNear(157, 1e-4f))); +} + +TEST(AlphaTest, UnpremultiplyAndPremultiply) { + const float alpha[] = {0.f, 63.f / 255, 127.f / 255, 1.f}; + float r[] = {50, 60, 70, 80}; + float g[] = {54, 64, 74, 84}; + float b[] = {57, 67, 77, 87}; + + UnpremultiplyAlpha(r, g, b, alpha, 4); + EXPECT_THAT(r, ElementsAre(_, FloatNear(60 * 255.f / 63, 1e-4f), + FloatNear(70 * 255.f / 127, 1e-4f), 80)); + EXPECT_THAT(g, ElementsAre(_, FloatNear(64 * 255.f / 63, 1e-4f), + FloatNear(74 * 255.f / 127, 1e-4f), 84)); + EXPECT_THAT(b, ElementsAre(_, FloatNear(67 * 255.f / 63, 1e-4f), + FloatNear(77 * 255.f / 127, 1e-4f), 87)); + + PremultiplyAlpha(r, g, b, alpha, 4); + EXPECT_THAT(r, ElementsAre(FloatNear(50, 1e-4f), FloatNear(60, 1e-4f), + FloatNear(70, 1e-4f), FloatNear(80, 1e-4f))); + EXPECT_THAT(g, ElementsAre(FloatNear(54, 1e-4f), FloatNear(64, 1e-4f), + FloatNear(74, 1e-4f), FloatNear(84, 1e-4f))); + EXPECT_THAT(b, ElementsAre(FloatNear(57, 1e-4f), FloatNear(67, 1e-4f), + FloatNear(77, 1e-4f), FloatNear(87, 1e-4f))); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_common.cc b/third-party/libjxl/libjxl/lib/jxl/ans_common.cc new file mode 100644 index 0000000000..d2cf897ec4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ans_common.cc @@ -0,0 +1,148 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/ans_common.h" + +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +std::vector CreateFlatHistogram(int length, int total_count) { + JXL_ASSERT(length > 0); + JXL_ASSERT(length <= total_count); + const int count = total_count / length; + std::vector result(length, count); + const int rem_counts = total_count % length; + for (int i = 0; i < rem_counts; ++i) { + ++result[i]; + } + return result; +} + +// First, all trailing non-occurring symbols are removed from the distribution; +// if this leaves the distribution empty, a dummy symbol with max weight is +// added. This ensures that the resulting distribution sums to total table size. +// Then, `entry_size` is chosen to be the largest power of two so that +// `table_size` = ANS_TAB_SIZE/`entry_size` is at least as big as the +// distribution size. +// Note that each entry will only ever contain two different symbols, and +// consecutive ranges of offsets, which allows us to use a compact +// representation. +// Each entry is initialized with only the (symbol=i, offset) pairs; then +// positions for which the entry overflows (i.e. distribution[i] > entry_size) +// or is not full are computed, and put into a stack in increasing order. +// Missing symbols in the distribution are padded with 0 (because `table_size` +// >= number of symbols). The `cutoff` value for each entry is initialized to +// the number of occupied slots in that entry (i.e. `distributions[i]`). While +// the overflowing-symbol stack is not empty (which implies that the +// underflowing-symbol stack also is not), the top overfull and underfull +// positions are popped from the stack; the empty slots in the underfull entry +// are then filled with as many slots as needed from the overfull entry; such +// slots are placed after the slots in the overfull entry, and `offsets[1]` is +// computed accordingly. The formerly underfull entry is thus now neither +// underfull nor overfull, and represents exactly two symbols. The overfull +// entry might be either overfull or underfull, and is pushed into the +// corresponding stack. +void InitAliasTable(std::vector distribution, uint32_t range, + size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a) { + while (!distribution.empty() && distribution.back() == 0) { + distribution.pop_back(); + } + // Ensure that a valid table is always returned, even for an empty + // alphabet. Otherwise, a specially-crafted stream might crash the + // decoder. + if (distribution.empty()) { + distribution.emplace_back(range); + } + const size_t table_size = 1 << log_alpha_size; +#if JXL_ENABLE_ASSERT + int sum = std::accumulate(distribution.begin(), distribution.end(), 0); +#endif // JXL_ENABLE_ASSERT + JXL_ASSERT(static_cast(sum) == range); + // range must be a power of two + JXL_ASSERT((range & (range - 1)) == 0); + JXL_ASSERT(distribution.size() <= table_size); + JXL_ASSERT(table_size <= range); + const uint32_t entry_size = range >> log_alpha_size; // this is exact + // Special case for single-symbol distributions, that ensures that the state + // does not change when decoding from such a distribution. Note that, since we + // hardcode offset0 == 0, it is not straightforward (if at all possible) to + // fix the general case to produce this result. + for (size_t sym = 0; sym < distribution.size(); sym++) { + if (distribution[sym] == ANS_TAB_SIZE) { + for (size_t i = 0; i < table_size; i++) { + a[i].right_value = sym; + a[i].cutoff = 0; + a[i].offsets1 = entry_size * i; + a[i].freq0 = 0; + a[i].freq1_xor_freq0 = ANS_TAB_SIZE; + } + return; + } + } + std::vector underfull_posn; + std::vector overfull_posn; + std::vector cutoffs(1 << log_alpha_size); + // Initialize entries. + for (size_t i = 0; i < distribution.size(); i++) { + cutoffs[i] = distribution[i]; + if (cutoffs[i] > entry_size) { + overfull_posn.push_back(i); + } else if (cutoffs[i] < entry_size) { + underfull_posn.push_back(i); + } + } + for (uint32_t i = distribution.size(); i < table_size; i++) { + cutoffs[i] = 0; + underfull_posn.push_back(i); + } + // Reassign overflow/underflow values. + while (!overfull_posn.empty()) { + uint32_t overfull_i = overfull_posn.back(); + overfull_posn.pop_back(); + JXL_ASSERT(!underfull_posn.empty()); + uint32_t underfull_i = underfull_posn.back(); + underfull_posn.pop_back(); + uint32_t underfull_by = entry_size - cutoffs[underfull_i]; + cutoffs[overfull_i] -= underfull_by; + // overfull positions have their original symbols + a[underfull_i].right_value = overfull_i; + a[underfull_i].offsets1 = cutoffs[overfull_i]; + // Slots in the right part of entry underfull_i were taken from the end + // of the symbols in entry overfull_i. + if (cutoffs[overfull_i] < entry_size) { + underfull_posn.push_back(overfull_i); + } else if (cutoffs[overfull_i] > entry_size) { + overfull_posn.push_back(overfull_i); + } + } + for (uint32_t i = 0; i < table_size; i++) { + // cutoffs[i] is properly initialized but the clang-analyzer doesn't infer + // it since it is partially initialized across two for-loops. + // NOLINTNEXTLINE(clang-analyzer-core.UndefinedBinaryOperatorResult) + if (cutoffs[i] == entry_size) { + a[i].right_value = i; + a[i].offsets1 = 0; + a[i].cutoff = 0; + } else { + // Note that, if cutoff is not equal to entry_size, + // a[i].offsets1 was initialized with (overfull cutoff) - + // (entry_size - a[i].cutoff). Thus, subtracting + // a[i].cutoff cannot make it negative. + a[i].offsets1 -= cutoffs[i]; + a[i].cutoff = cutoffs[i]; + } + const size_t freq0 = i < distribution.size() ? distribution[i] : 0; + const size_t i1 = a[i].right_value; + const size_t freq1 = i1 < distribution.size() ? distribution[i1] : 0; + a[i].freq0 = static_cast(freq0); + a[i].freq1_xor_freq0 = static_cast(freq1 ^ freq0); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_common.h b/third-party/libjxl/libjxl/lib/jxl/ans_common.h new file mode 100644 index 0000000000..fb5058e310 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ans_common.h @@ -0,0 +1,143 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ANS_COMMON_H_ +#define LIB_JXL_ANS_COMMON_H_ + +#include + +#include +#include // Prefetch +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Returns the precision (number of bits) that should be used to store +// a histogram count such that Log2Floor(count) == logcount. +static JXL_INLINE uint32_t GetPopulationCountPrecision(uint32_t logcount, + uint32_t shift) { + int32_t r = std::min( + logcount, int(shift) - int((ANS_LOG_TAB_SIZE - logcount) >> 1)); + if (r < 0) return 0; + return r; +} + +// Returns a histogram where the counts are positive, differ by at most 1, +// and add up to total_count. The bigger counts (if any) are at the beginning +// of the histogram. +std::vector CreateFlatHistogram(int length, int total_count); + +// An alias table implements a mapping from the [0, ANS_TAB_SIZE) range into +// the [0, ANS_MAX_ALPHABET_SIZE) range, satisfying the following conditions: +// - each symbol occurs as many times as specified by any valid distribution +// of frequencies of the symbols. A valid distribution here is an array of +// ANS_MAX_ALPHABET_SIZE that contains numbers in the range [0, ANS_TAB_SIZE], +// and whose sum is ANS_TAB_SIZE. +// - lookups can be done in constant time, and also return how many smaller +// input values map into the same symbol, according to some well-defined order +// of input values. +// - the space used by the alias table is given by a small constant times the +// index of the largest symbol with nonzero probability in the distribution. +// Each of the entries in the table covers a range of `entry_size` values in the +// [0, ANS_TAB_SIZE) range; consecutive entries represent consecutive +// sub-ranges. In the range covered by entry `i`, the first `cutoff` values map +// to symbol `i`, while the others map to symbol `right_value`. +// +// TODO(veluca): consider making the order used for computing offsets easier to +// define - it is currently defined by the algorithm to compute the alias table. +// Beware of breaking the implicit assumption that symbols that come after the +// cutoff value should have an offset at least as big as the cutoff. + +struct AliasTable { + struct Symbol { + size_t value; + size_t offset; + size_t freq; + }; + +// Working set size matters here (~64 tables x 256 entries). +// offsets0 is always zero (beginning of [0] side among the same symbol). +// offsets1 is an offset of (pos >= cutoff) side decremented by cutoff. +#pragma pack(push, 1) + struct Entry { + uint8_t cutoff; // < kEntrySizeMinus1 when used by ANS. + uint8_t right_value; // < alphabet size. + uint16_t freq0; + + // Only used if `greater` (see Lookup) + uint16_t offsets1; // <= ANS_TAB_SIZE + uint16_t freq1_xor_freq0; // for branchless ternary in Lookup + }; +#pragma pack(pop) + + // Dividing `value` by `entry_size` determines `i`, the entry which is + // responsible for the input. If the remainder is below `cutoff`, then the + // mapped symbol is `i`; since `offsets[0]` stores the number of occurrences + // of `i` "before" the start of this entry, the offset of the input will be + // `offsets[0] + remainder`. If the remainder is above cutoff, the mapped + // symbol is `right_value`; since `offsets[1]` stores the number of + // occurrences of `right_value` "before" this entry, minus the `cutoff` value, + // the input offset is then `remainder + offsets[1]`. + static JXL_INLINE Symbol Lookup(const Entry* JXL_RESTRICT table, size_t value, + size_t log_entry_size, + size_t entry_size_minus_1) { + const size_t i = value >> log_entry_size; + const size_t pos = value & entry_size_minus_1; + +#if JXL_BYTE_ORDER_LITTLE + uint64_t entry; + memcpy(&entry, &table[i].cutoff, sizeof(entry)); + const size_t cutoff = entry & 0xFF; // = MOVZX + const size_t right_value = (entry >> 8) & 0xFF; // = MOVZX + const size_t freq0 = (entry >> 16) & 0xFFFF; +#else + // Generates multiple loads with complex addressing. + const size_t cutoff = table[i].cutoff; + const size_t right_value = table[i].right_value; + const size_t freq0 = table[i].freq0; +#endif + + const bool greater = pos >= cutoff; + +#if JXL_BYTE_ORDER_LITTLE + const uint64_t conditional = greater ? entry : 0; // = CMOV + const size_t offsets1_or_0 = (conditional >> 32) & 0xFFFF; + const size_t freq1_xor_freq0_or_0 = conditional >> 48; +#else + const size_t offsets1_or_0 = greater ? table[i].offsets1 : 0; + const size_t freq1_xor_freq0_or_0 = greater ? table[i].freq1_xor_freq0 : 0; +#endif + + // WARNING: moving this code may interfere with CMOV heuristics. + Symbol s; + s.value = greater ? right_value : i; + s.offset = offsets1_or_0 + pos; + s.freq = freq0 ^ freq1_xor_freq0_or_0; // = greater ? freq1 : freq0 + // XOR avoids implementation-defined conversion from unsigned to signed. + // Alternatives considered: BEXTR is 2 cycles on HSW, SET+shift causes + // spills, simple ternary has a long dependency chain. + + return s; + } + + static HWY_INLINE void Prefetch(const Entry* JXL_RESTRICT table, size_t value, + size_t log_entry_size) { + const size_t i = value >> log_entry_size; + hwy::Prefetch(table + i); + } +}; + +// Computes an alias table for a given distribution. +void InitAliasTable(std::vector distribution, uint32_t range, + size_t log_alpha_size, AliasTable::Entry* JXL_RESTRICT a); + +} // namespace jxl + +#endif // LIB_JXL_ANS_COMMON_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_common_test.cc b/third-party/libjxl/libjxl/lib/jxl/ans_common_test.cc new file mode 100644 index 0000000000..487b6cf5bd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ans_common_test.cc @@ -0,0 +1,43 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/ans_common.h" + +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +void VerifyAliasDistribution(const std::vector& distribution, + uint32_t range) { + constexpr size_t log_alpha_size = 8; + AliasTable::Entry table[1 << log_alpha_size]; + InitAliasTable(distribution, range, log_alpha_size, table); + std::vector> offsets(distribution.size()); + for (uint32_t i = 0; i < range; i++) { + AliasTable::Symbol s = AliasTable::Lookup( + table, i, ANS_LOG_TAB_SIZE - 8, (1 << (ANS_LOG_TAB_SIZE - 8)) - 1); + offsets[s.value].push_back(s.offset); + } + for (uint32_t i = 0; i < distribution.size(); i++) { + ASSERT_EQ(static_cast(distribution[i]), offsets[i].size()); + std::sort(offsets[i].begin(), offsets[i].end()); + for (uint32_t j = 0; j < offsets[i].size(); j++) { + ASSERT_EQ(offsets[i][j], j); + } + } +} + +TEST(ANSCommonTest, AliasDistributionSmoke) { + VerifyAliasDistribution({ANS_TAB_SIZE / 2, ANS_TAB_SIZE / 2}, ANS_TAB_SIZE); + VerifyAliasDistribution({ANS_TAB_SIZE}, ANS_TAB_SIZE); + VerifyAliasDistribution({0, 0, 0, ANS_TAB_SIZE, 0}, ANS_TAB_SIZE); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_params.h b/third-party/libjxl/libjxl/lib/jxl/ans_params.h new file mode 100644 index 0000000000..4bbc284c0b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ans_params.h @@ -0,0 +1,36 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ANS_PARAMS_H_ +#define LIB_JXL_ANS_PARAMS_H_ + +// Common parameters that are needed for both the ANS entropy encoding and +// decoding methods. + +#include +#include + +namespace jxl { + +// TODO(veluca): decide if 12 is the best constant here (valid range is up to +// 16). This requires recomputing the Huffman tables in {enc,dec}_ans.cc +// 14 gives a 0.2% improvement at d1 and makes d8 slightly worse. This is +// likely not worth the increase in encoder complexity. +#define ANS_LOG_TAB_SIZE 12u +#define ANS_TAB_SIZE (1 << ANS_LOG_TAB_SIZE) +#define ANS_TAB_MASK (ANS_TAB_SIZE - 1) + +// Largest possible symbol to be encoded by either ANS or prefix coding. +#define PREFIX_MAX_ALPHABET_SIZE 4096 +#define ANS_MAX_ALPHABET_SIZE 256 + +// Max number of bits for prefix coding. +#define PREFIX_MAX_BITS 15 + +#define ANS_SIGNATURE 0x13 // Initial state, used as CRC. + +} // namespace jxl + +#endif // LIB_JXL_ANS_PARAMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/ans_test.cc b/third-party/libjxl/libjxl/lib/jxl/ans_test.cc new file mode 100644 index 0000000000..06bc46477f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/ans_test.cc @@ -0,0 +1,278 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +void RoundtripTestcase(int n_histograms, int alphabet_size, + const std::vector& input_values) { + constexpr uint16_t kMagic1 = 0x9e33; + constexpr uint16_t kMagic2 = 0x8b04; + + BitWriter writer; + // Space for magic bytes. + BitWriter::Allotment allotment_magic1(&writer, 16); + writer.Write(16, kMagic1); + allotment_magic1.ReclaimAndCharge(&writer, 0, nullptr); + + std::vector context_map; + EntropyEncodingData codes; + std::vector> input_values_vec; + input_values_vec.push_back(input_values); + + BuildAndEncodeHistograms(HistogramParams(), n_histograms, input_values_vec, + &codes, &context_map, &writer, 0, nullptr); + WriteTokens(input_values_vec[0], codes, context_map, &writer, 0, nullptr); + + // Magic bytes + padding + BitWriter::Allotment allotment_magic2(&writer, 24); + writer.Write(16, kMagic2); + writer.ZeroPadToByte(); + allotment_magic2.ReclaimAndCharge(&writer, 0, nullptr); + + // We do not truncate the output. Reading past the end reads out zeroes + // anyway. + BitReader br(writer.GetSpan()); + + ASSERT_EQ(br.ReadBits(16), kMagic1); + + std::vector dec_context_map; + ANSCode decoded_codes; + ASSERT_TRUE( + DecodeHistograms(&br, n_histograms, &decoded_codes, &dec_context_map)); + ASSERT_EQ(dec_context_map, context_map); + ANSSymbolReader reader(&decoded_codes, &br); + + for (const Token& symbol : input_values) { + uint32_t read_symbol = + reader.ReadHybridUint(symbol.context, &br, dec_context_map); + ASSERT_EQ(read_symbol, symbol.value); + } + ASSERT_TRUE(reader.CheckANSFinalState()); + + ASSERT_EQ(br.ReadBits(16), kMagic2); + EXPECT_TRUE(br.Close()); +} + +TEST(ANSTest, EmptyRoundtrip) { + RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, std::vector()); +} + +TEST(ANSTest, SingleSymbolRoundtrip) { + for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) { + RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, {{0, i}}); + } + for (uint32_t i = 0; i < ANS_MAX_ALPHABET_SIZE; i++) { + RoundtripTestcase(2, ANS_MAX_ALPHABET_SIZE, + std::vector(1024, {0, i})); + } +} + +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) +constexpr size_t kReps = 3; +#else +constexpr size_t kReps = 10; +#endif + +void RoundtripRandomStream(int alphabet_size, size_t reps = kReps, + size_t num = 1 << 18) { + constexpr int kNumHistograms = 3; + Rng rng(0); + for (size_t i = 0; i < reps; i++) { + std::vector symbols; + for (size_t j = 0; j < num; j++) { + int context = rng.UniformI(0, kNumHistograms); + int value = rng.UniformU(0, alphabet_size); + symbols.emplace_back(context, value); + } + RoundtripTestcase(kNumHistograms, alphabet_size, symbols); + } +} + +void RoundtripRandomUnbalancedStream(int alphabet_size) { + constexpr int kNumHistograms = 3; + constexpr int kPrecision = 1 << 10; + Rng rng(0); + for (size_t i = 0; i < kReps; i++) { + std::vector distributions[kNumHistograms] = {}; + for (int j = 0; j < kNumHistograms; j++) { + distributions[j].resize(kPrecision); + int symbol = 0; + int remaining = 1; + for (int k = 0; k < kPrecision; k++) { + if (remaining == 0) { + if (symbol < alphabet_size - 1) symbol++; + // There is no meaning behind this distribution: it's anything that + // will create a nonuniform distribution and won't have too few + // symbols usually. Also we want different distributions we get to be + // sufficiently dissimilar. + remaining = rng.UniformU(0, kPrecision - k + 1); + } + distributions[j][k] = symbol; + remaining--; + } + } + std::vector symbols; + for (int j = 0; j < 1 << 18; j++) { + int context = rng.UniformI(0, kNumHistograms); + int value = rng.UniformU(0, kPrecision); + symbols.emplace_back(context, value); + } + RoundtripTestcase(kNumHistograms + 1, alphabet_size, symbols); + } +} + +TEST(ANSTest, RandomStreamRoundtrip3Small) { RoundtripRandomStream(3, 1, 16); } + +TEST(ANSTest, RandomStreamRoundtrip3) { RoundtripRandomStream(3); } + +TEST(ANSTest, RandomStreamRoundtripBig) { + RoundtripRandomStream(ANS_MAX_ALPHABET_SIZE); +} + +TEST(ANSTest, RandomUnbalancedStreamRoundtrip3) { + RoundtripRandomUnbalancedStream(3); +} + +TEST(ANSTest, RandomUnbalancedStreamRoundtripBig) { + RoundtripRandomUnbalancedStream(ANS_MAX_ALPHABET_SIZE); +} + +TEST(ANSTest, UintConfigRoundtrip) { + for (size_t log_alpha_size = 5; log_alpha_size <= 8; log_alpha_size++) { + std::vector uint_config, uint_config_dec; + for (size_t i = 0; i < log_alpha_size; i++) { + for (size_t j = 0; j <= i; j++) { + for (size_t k = 0; k <= i - j; k++) { + uint_config.emplace_back(i, j, k); + } + } + } + uint_config.emplace_back(log_alpha_size, 0, 0); + uint_config_dec.resize(uint_config.size()); + BitWriter writer; + BitWriter::Allotment allotment(&writer, 10 * uint_config.size()); + EncodeUintConfigs(uint_config, &writer, log_alpha_size); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + writer.ZeroPadToByte(); + BitReader br(writer.GetSpan()); + EXPECT_TRUE(DecodeUintConfigs(log_alpha_size, &uint_config_dec, &br)); + EXPECT_TRUE(br.Close()); + for (size_t i = 0; i < uint_config.size(); i++) { + EXPECT_EQ(uint_config[i].split_token, uint_config_dec[i].split_token); + EXPECT_EQ(uint_config[i].msb_in_token, uint_config_dec[i].msb_in_token); + EXPECT_EQ(uint_config[i].lsb_in_token, uint_config_dec[i].lsb_in_token); + } + } +} + +void TestCheckpointing(bool ans, bool lz77) { + std::vector> input_values(1); + for (size_t i = 0; i < 1024; i++) { + input_values[0].push_back(Token(0, i % 4)); + } + // up to lz77 window size. + for (size_t i = 0; i < (1 << 20) - 1022; i++) { + input_values[0].push_back(Token(0, (i % 5) + 4)); + } + // Ensure that when the window wraps around, new values are different. + input_values[0].push_back(Token(0, 0)); + for (size_t i = 0; i < 1024; i++) { + input_values[0].push_back(Token(0, i % 4)); + } + + std::vector context_map; + EntropyEncodingData codes; + HistogramParams params; + params.lz77_method = lz77 ? HistogramParams::LZ77Method::kLZ77 + : HistogramParams::LZ77Method::kNone; + params.force_huffman = !ans; + + BitWriter writer; + { + auto input_values_copy = input_values; + BuildAndEncodeHistograms(params, 1, input_values_copy, &codes, &context_map, + &writer, 0, nullptr); + WriteTokens(input_values_copy[0], codes, context_map, &writer, 0, nullptr); + writer.ZeroPadToByte(); + } + + // We do not truncate the output. Reading past the end reads out zeroes + // anyway. + BitReader br(writer.GetSpan()); + Status status = true; + { + BitReaderScopedCloser bc(&br, &status); + + std::vector dec_context_map; + ANSCode decoded_codes; + ASSERT_TRUE(DecodeHistograms(&br, 1, &decoded_codes, &dec_context_map)); + ASSERT_EQ(dec_context_map, context_map); + ANSSymbolReader reader(&decoded_codes, &br); + + ANSSymbolReader::Checkpoint checkpoint; + size_t br_pos = 0; + constexpr size_t kInterval = ANSSymbolReader::kMaxCheckpointInterval - 2; + for (size_t i = 0; i < input_values[0].size(); i++) { + if (i % kInterval == 0 && i > 0) { + reader.Restore(checkpoint); + ASSERT_TRUE(br.Close()); + br = BitReader(writer.GetSpan()); + br.SkipBits(br_pos); + for (size_t j = i - kInterval; j < i; j++) { + Token symbol = input_values[0][j]; + uint32_t read_symbol = + reader.ReadHybridUint(symbol.context, &br, dec_context_map); + ASSERT_EQ(read_symbol, symbol.value) << "j = " << j; + } + } + if (i % kInterval == 0) { + reader.Save(&checkpoint); + br_pos = br.TotalBitsConsumed(); + } + Token symbol = input_values[0][i]; + uint32_t read_symbol = + reader.ReadHybridUint(symbol.context, &br, dec_context_map); + ASSERT_EQ(read_symbol, symbol.value) << "i = " << i; + } + ASSERT_TRUE(reader.CheckANSFinalState()); + } + EXPECT_TRUE(status); +} + +TEST(ANSTest, TestCheckpointingANS) { + TestCheckpointing(/*ans=*/true, /*lz77=*/false); +} + +TEST(ANSTest, TestCheckpointingPrefix) { + TestCheckpointing(/*ans=*/false, /*lz77=*/false); +} + +TEST(ANSTest, TestCheckpointingANSLZ77) { + TestCheckpointing(/*ans=*/true, /*lz77=*/true); +} + +TEST(ANSTest, TestCheckpointingPrefixLZ77) { + TestCheckpointing(/*ans=*/false, /*lz77=*/true); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/base/arch_macros.h b/third-party/libjxl/libjxl/lib/jxl/base/arch_macros.h new file mode 100644 index 0000000000..a98301915e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/arch_macros.h @@ -0,0 +1,33 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_ARCH_MACROS_H_ +#define LIB_JXL_BASE_ARCH_MACROS_H_ + +// Defines the JXL_ARCH_* macros. + +namespace jxl { + +#if defined(__x86_64__) || defined(_M_X64) +#define JXL_ARCH_X64 1 +#else +#define JXL_ARCH_X64 0 +#endif + +#if defined(__powerpc64__) || defined(_M_PPC) +#define JXL_ARCH_PPC 1 +#else +#define JXL_ARCH_PPC 0 +#endif + +#if defined(__aarch64__) || defined(__arm__) +#define JXL_ARCH_ARM 1 +#else +#define JXL_ARCH_ARM 0 +#endif + +} // namespace jxl + +#endif // LIB_JXL_BASE_ARCH_MACROS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/bits.h b/third-party/libjxl/libjxl/lib/jxl/base/bits.h new file mode 100644 index 0000000000..9f86118e72 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/bits.h @@ -0,0 +1,147 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_BITS_H_ +#define LIB_JXL_BASE_BITS_H_ + +// Specialized instructions for processing register-sized bit arrays. + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +#if JXL_COMPILER_MSVC +#include +#endif + +#include +#include + +namespace jxl { + +// Empty struct used as a size tag type. +template +struct SizeTag {}; + +template +constexpr bool IsSigned() { + return T(0) > T(-1); +} + +// Undefined results for x == 0. +static JXL_INLINE JXL_MAYBE_UNUSED size_t +Num0BitsAboveMS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) { + JXL_DASSERT(x != 0); +#if JXL_COMPILER_MSVC + unsigned long index; + _BitScanReverse(&index, x); + return 31 - index; +#else + return static_cast(__builtin_clz(x)); +#endif +} +static JXL_INLINE JXL_MAYBE_UNUSED size_t +Num0BitsAboveMS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) { + JXL_DASSERT(x != 0); +#if JXL_COMPILER_MSVC +#if JXL_ARCH_X64 + unsigned long index; + _BitScanReverse64(&index, x); + return 63 - index; +#else // JXL_ARCH_X64 + // _BitScanReverse64 not available + uint32_t msb = static_cast(x >> 32u); + unsigned long index; + if (msb == 0) { + uint32_t lsb = static_cast(x & 0xFFFFFFFF); + _BitScanReverse(&index, lsb); + return 63 - index; + } else { + _BitScanReverse(&index, msb); + return 31 - index; + } +#endif // JXL_ARCH_X64 +#else + return static_cast(__builtin_clzll(x)); +#endif +} +template +static JXL_INLINE JXL_MAYBE_UNUSED size_t +Num0BitsAboveMS1Bit_Nonzero(const T x) { + static_assert(!IsSigned(), "Num0BitsAboveMS1Bit_Nonzero: use unsigned"); + return Num0BitsAboveMS1Bit_Nonzero(SizeTag(), x); +} + +// Undefined results for x == 0. +static JXL_INLINE JXL_MAYBE_UNUSED size_t +Num0BitsBelowLS1Bit_Nonzero(SizeTag<4> /* tag */, const uint32_t x) { + JXL_DASSERT(x != 0); +#if JXL_COMPILER_MSVC + unsigned long index; + _BitScanForward(&index, x); + return index; +#else + return static_cast(__builtin_ctz(x)); +#endif +} +static JXL_INLINE JXL_MAYBE_UNUSED size_t +Num0BitsBelowLS1Bit_Nonzero(SizeTag<8> /* tag */, const uint64_t x) { + JXL_DASSERT(x != 0); +#if JXL_COMPILER_MSVC +#if JXL_ARCH_X64 + unsigned long index; + _BitScanForward64(&index, x); + return index; +#else // JXL_ARCH_64 + // _BitScanForward64 not available + uint32_t lsb = static_cast(x & 0xFFFFFFFF); + unsigned long index; + if (lsb == 0) { + uint32_t msb = static_cast(x >> 32u); + _BitScanForward(&index, msb); + return 32 + index; + } else { + _BitScanForward(&index, lsb); + return index; + } +#endif // JXL_ARCH_X64 +#else + return static_cast(__builtin_ctzll(x)); +#endif +} +template +static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit_Nonzero(T x) { + static_assert(!IsSigned(), "Num0BitsBelowLS1Bit_Nonzero: use unsigned"); + return Num0BitsBelowLS1Bit_Nonzero(SizeTag(), x); +} + +// Returns bit width for x == 0. +template +static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsAboveMS1Bit(const T x) { + return (x == 0) ? sizeof(T) * 8 : Num0BitsAboveMS1Bit_Nonzero(x); +} + +// Returns bit width for x == 0. +template +static JXL_INLINE JXL_MAYBE_UNUSED size_t Num0BitsBelowLS1Bit(const T x) { + return (x == 0) ? sizeof(T) * 8 : Num0BitsBelowLS1Bit_Nonzero(x); +} + +// Returns base-2 logarithm, rounded down. +template +static JXL_INLINE JXL_MAYBE_UNUSED size_t FloorLog2Nonzero(const T x) { + return (sizeof(T) * 8 - 1) ^ Num0BitsAboveMS1Bit_Nonzero(x); +} + +// Returns base-2 logarithm, rounded up. +template +static JXL_INLINE JXL_MAYBE_UNUSED size_t CeilLog2Nonzero(const T x) { + const size_t floor_log2 = FloorLog2Nonzero(x); + if ((x & (x - 1)) == 0) return floor_log2; // power of two + return floor_log2 + 1; +} + +} // namespace jxl + +#endif // LIB_JXL_BASE_BITS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/byte_order.h b/third-party/libjxl/libjxl/lib/jxl/base/byte_order.h new file mode 100644 index 0000000000..8966834e08 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/byte_order.h @@ -0,0 +1,274 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_BYTE_ORDER_H_ +#define LIB_JXL_BASE_BYTE_ORDER_H_ + +#include +#include +#include // memcpy + +#include "lib/jxl/base/compiler_specific.h" + +#if JXL_COMPILER_MSVC +#include // _byteswap_* +#endif + +#if (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)) +#define JXL_BYTE_ORDER_LITTLE 1 +#else +// This means that we don't know that the byte order is little endian, in +// this case we use endian-neutral code that works for both little- and +// big-endian. +#define JXL_BYTE_ORDER_LITTLE 0 +#endif + +// Returns whether the system is little-endian (least-significant byte first). +#if JXL_BYTE_ORDER_LITTLE +static constexpr bool IsLittleEndian() { return true; } +#else +static inline bool IsLittleEndian() { + const uint32_t multibyte = 1; + uint8_t byte; + memcpy(&byte, &multibyte, 1); + return byte == 1; +} +#endif + +static inline bool SwapEndianness(JxlEndianness endianness) { + return ((endianness == JXL_BIG_ENDIAN && IsLittleEndian()) || + (endianness == JXL_LITTLE_ENDIAN && !IsLittleEndian())); +} + +#if JXL_COMPILER_MSVC +#define JXL_BSWAP16(x) _byteswap_ushort(x) +#define JXL_BSWAP32(x) _byteswap_ulong(x) +#define JXL_BSWAP64(x) _byteswap_uint64(x) +#else +#define JXL_BSWAP16(x) __builtin_bswap16(x) +#define JXL_BSWAP32(x) __builtin_bswap32(x) +#define JXL_BSWAP64(x) __builtin_bswap64(x) +#endif + +static JXL_INLINE uint32_t LoadBE16(const uint8_t* p) { + const uint32_t byte1 = p[0]; + const uint32_t byte0 = p[1]; + return (byte1 << 8) | byte0; +} + +static JXL_INLINE uint32_t LoadLE16(const uint8_t* p) { + const uint32_t byte0 = p[0]; + const uint32_t byte1 = p[1]; + return (byte1 << 8) | byte0; +} + +static JXL_INLINE uint32_t LoadBE32(const uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + uint32_t big; + memcpy(&big, p, 4); + return JXL_BSWAP32(big); +#else + // Byte-order-independent - can't assume this machine is big endian. + const uint32_t byte3 = p[0]; + const uint32_t byte2 = p[1]; + const uint32_t byte1 = p[2]; + const uint32_t byte0 = p[3]; + return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; +#endif +} + +static JXL_INLINE uint64_t LoadBE64(const uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + uint64_t big; + memcpy(&big, p, 8); + return JXL_BSWAP64(big); +#else + // Byte-order-independent - can't assume this machine is big endian. + const uint64_t byte7 = p[0]; + const uint64_t byte6 = p[1]; + const uint64_t byte5 = p[2]; + const uint64_t byte4 = p[3]; + const uint64_t byte3 = p[4]; + const uint64_t byte2 = p[5]; + const uint64_t byte1 = p[6]; + const uint64_t byte0 = p[7]; + return (byte7 << 56ull) | (byte6 << 48ull) | (byte5 << 40ull) | + (byte4 << 32ull) | (byte3 << 24ull) | (byte2 << 16ull) | + (byte1 << 8ull) | byte0; +#endif +} + +static JXL_INLINE uint32_t LoadLE32(const uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + uint32_t little; + memcpy(&little, p, 4); + return little; +#else + // Byte-order-independent - can't assume this machine is big endian. + const uint32_t byte0 = p[0]; + const uint32_t byte1 = p[1]; + const uint32_t byte2 = p[2]; + const uint32_t byte3 = p[3]; + return (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; +#endif +} + +static JXL_INLINE uint64_t LoadLE64(const uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + uint64_t little; + memcpy(&little, p, 8); + return little; +#else + // Byte-order-independent - can't assume this machine is big endian. + const uint64_t byte0 = p[0]; + const uint64_t byte1 = p[1]; + const uint64_t byte2 = p[2]; + const uint64_t byte3 = p[3]; + const uint64_t byte4 = p[4]; + const uint64_t byte5 = p[5]; + const uint64_t byte6 = p[6]; + const uint64_t byte7 = p[7]; + return (byte7 << 56) | (byte6 << 48) | (byte5 << 40) | (byte4 << 32) | + (byte3 << 24) | (byte2 << 16) | (byte1 << 8) | byte0; +#endif +} + +// Loads a Big-Endian float +static JXL_INLINE float LoadBEFloat(const uint8_t* p) { + uint32_t u = LoadBE32(p); + float result; + memcpy(&result, &u, 4); + return result; +} + +// Loads a Little-Endian float +static JXL_INLINE float LoadLEFloat(const uint8_t* p) { + uint32_t u = LoadLE32(p); + float result; + memcpy(&result, &u, 4); + return result; +} + +static JXL_INLINE void StoreBE16(const uint32_t native, uint8_t* p) { + p[0] = (native >> 8) & 0xFF; + p[1] = native & 0xFF; +} + +static JXL_INLINE void StoreLE16(const uint32_t native, uint8_t* p) { + p[1] = (native >> 8) & 0xFF; + p[0] = native & 0xFF; +} + +static JXL_INLINE void StoreBE32(const uint32_t native, uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + const uint32_t big = JXL_BSWAP32(native); + memcpy(p, &big, 4); +#else + // Byte-order-independent - can't assume this machine is big endian. + p[0] = native >> 24; + p[1] = (native >> 16) & 0xFF; + p[2] = (native >> 8) & 0xFF; + p[3] = native & 0xFF; +#endif +} + +static JXL_INLINE void StoreBE64(const uint64_t native, uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + const uint64_t big = JXL_BSWAP64(native); + memcpy(p, &big, 8); +#else + // Byte-order-independent - can't assume this machine is big endian. + p[0] = native >> 56ull; + p[1] = (native >> 48ull) & 0xFF; + p[2] = (native >> 40ull) & 0xFF; + p[3] = (native >> 32ull) & 0xFF; + p[4] = (native >> 24ull) & 0xFF; + p[5] = (native >> 16ull) & 0xFF; + p[6] = (native >> 8ull) & 0xFF; + p[7] = native & 0xFF; +#endif +} + +static JXL_INLINE void StoreLE32(const uint32_t native, uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + const uint32_t little = native; + memcpy(p, &little, 4); +#else + // Byte-order-independent - can't assume this machine is big endian. + p[3] = native >> 24; + p[2] = (native >> 16) & 0xFF; + p[1] = (native >> 8) & 0xFF; + p[0] = native & 0xFF; +#endif +} + +static JXL_INLINE void StoreLE64(const uint64_t native, uint8_t* p) { +#if JXL_BYTE_ORDER_LITTLE + const uint64_t little = native; + memcpy(p, &little, 8); +#else + // Byte-order-independent - can't assume this machine is big endian. + p[7] = native >> 56; + p[6] = (native >> 48) & 0xFF; + p[5] = (native >> 40) & 0xFF; + p[4] = (native >> 32) & 0xFF; + p[3] = (native >> 24) & 0xFF; + p[2] = (native >> 16) & 0xFF; + p[1] = (native >> 8) & 0xFF; + p[0] = native & 0xFF; +#endif +} + +static JXL_INLINE float BSwapFloat(float x) { + uint32_t u; + memcpy(&u, &x, 4); + uint32_t uswap = JXL_BSWAP32(u); + float xswap; + memcpy(&xswap, &uswap, 4); + return xswap; +} + +// Big/Little Endian order. +struct OrderBE {}; +struct OrderLE {}; + +// Wrappers for calling from generic code. +static JXL_INLINE void Store16(OrderBE /*tag*/, const uint32_t native, + uint8_t* p) { + return StoreBE16(native, p); +} + +static JXL_INLINE void Store16(OrderLE /*tag*/, const uint32_t native, + uint8_t* p) { + return StoreLE16(native, p); +} + +static JXL_INLINE void Store32(OrderBE /*tag*/, const uint32_t native, + uint8_t* p) { + return StoreBE32(native, p); +} + +static JXL_INLINE void Store32(OrderLE /*tag*/, const uint32_t native, + uint8_t* p) { + return StoreLE32(native, p); +} + +static JXL_INLINE uint32_t Load16(OrderBE /*tag*/, const uint8_t* p) { + return LoadBE16(p); +} + +static JXL_INLINE uint32_t Load16(OrderLE /*tag*/, const uint8_t* p) { + return LoadLE16(p); +} + +static JXL_INLINE uint32_t Load32(OrderBE /*tag*/, const uint8_t* p) { + return LoadBE32(p); +} + +static JXL_INLINE uint32_t Load32(OrderLE /*tag*/, const uint8_t* p) { + return LoadLE32(p); +} + +#endif // LIB_JXL_BASE_BYTE_ORDER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.cc b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.cc new file mode 100644 index 0000000000..9a9cc585a1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.cc @@ -0,0 +1,157 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/cache_aligned.h" + +#include +#include + +// Disabled: slower than malloc + alignment. +#define JXL_USE_MMAP 0 + +#if JXL_USE_MMAP +#include +#endif + +#include // std::max +#include +#include // kMaxVectorSize +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace { + +#pragma pack(push, 1) +struct AllocationHeader { + void* allocated; + size_t allocated_size; + uint8_t left_padding[hwy::kMaxVectorSize]; +}; +#pragma pack(pop) + +std::atomic num_allocations{0}; +std::atomic bytes_in_use{0}; +std::atomic max_bytes_in_use{0}; + +} // namespace + +// Avoids linker errors in pre-C++17 builds. +constexpr size_t CacheAligned::kPointerSize; +constexpr size_t CacheAligned::kCacheLineSize; +constexpr size_t CacheAligned::kAlignment; +constexpr size_t CacheAligned::kAlias; + +void CacheAligned::PrintStats() { + fprintf( + stderr, "Allocations: %" PRIuS " (max bytes in use: %E)\n", + static_cast(num_allocations.load(std::memory_order_relaxed)), + static_cast(max_bytes_in_use.load(std::memory_order_relaxed))); +} + +size_t CacheAligned::NextOffset() { + static std::atomic next{0}; + constexpr uint32_t kGroups = CacheAligned::kAlias / CacheAligned::kAlignment; + const uint32_t group = next.fetch_add(1, std::memory_order_relaxed) % kGroups; + return CacheAligned::kAlignment * group; +} + +void* CacheAligned::Allocate(const size_t payload_size, size_t offset) { + JXL_ASSERT(payload_size <= std::numeric_limits::max() / 2); + JXL_ASSERT((offset % kAlignment == 0) && offset <= kAlias); + + // What: | misalign | unused | AllocationHeader |payload + // Size: |<= kAlias | offset | |payload_size + // ^allocated.^aligned.^header............^payload + // The header must immediately precede payload, which must remain aligned. + // To avoid wasting space, the header resides at the end of `unused`, + // which therefore cannot be empty (offset == 0). + if (offset == 0) { + // SVE/RVV vectors can be large, so we cannot rely on them (including the + // padding at the end of AllocationHeader) to fit in kAlignment. + offset = hwy::RoundUpTo(sizeof(AllocationHeader), kAlignment); + } + +#if JXL_USE_MMAP + const size_t allocated_size = offset + payload_size; + const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE; + void* allocated = + mmap(nullptr, allocated_size, PROT_READ | PROT_WRITE, flags, -1, 0); + if (allocated == MAP_FAILED) return nullptr; + const uintptr_t aligned = reinterpret_cast(allocated); +#else + const size_t allocated_size = kAlias + offset + payload_size; + void* allocated = malloc(allocated_size); + if (allocated == nullptr) return nullptr; + // Always round up even if already aligned - we already asked for kAlias + // extra bytes and there's no way to give them back. + uintptr_t aligned = reinterpret_cast(allocated) + kAlias; + static_assert((kAlias & (kAlias - 1)) == 0, "kAlias must be a power of 2"); + static_assert(kAlias >= kAlignment, "Cannot align to more than kAlias"); + aligned &= ~(kAlias - 1); +#endif + +#if 0 + // No effect. + uintptr_t page_aligned = reinterpret_cast(allocated); + page_aligned &= ~(4096 - 1); + if (madvise(reinterpret_cast(page_aligned), allocated_size, + MADV_WILLNEED) != 0) { + JXL_NOTIFY_ERROR("madvise failed"); + } +#elif 0 + // INCREASES both first and subsequent decode times. + if (mlock(allocated, allocated_size) != 0) { + JXL_NOTIFY_ERROR("mlock failed"); + } +#endif + + // Update statistics (#allocations and max bytes in use) + num_allocations.fetch_add(1, std::memory_order_relaxed); + const uint64_t prev_bytes = + bytes_in_use.fetch_add(allocated_size, std::memory_order_acq_rel); + uint64_t expected_max = max_bytes_in_use.load(std::memory_order_acquire); + for (;;) { + const uint64_t desired = + std::max(expected_max, prev_bytes + allocated_size); + if (max_bytes_in_use.compare_exchange_strong(expected_max, desired, + std::memory_order_acq_rel)) { + break; + } + } + + const uintptr_t payload = aligned + offset; // still aligned + + // Stash `allocated` and payload_size inside header for use by Free(). + AllocationHeader* header = reinterpret_cast(payload) - 1; + header->allocated = allocated; + header->allocated_size = allocated_size; + + return JXL_ASSUME_ALIGNED(reinterpret_cast(payload), 64); +} + +void CacheAligned::Free(const void* aligned_pointer) { + if (aligned_pointer == nullptr) { + return; + } + const uintptr_t payload = reinterpret_cast(aligned_pointer); + JXL_ASSERT(payload % kAlignment == 0); + const AllocationHeader* header = + reinterpret_cast(payload) - 1; + + // Subtract (2's complement negation). + bytes_in_use.fetch_add(~header->allocated_size + 1, + std::memory_order_acq_rel); + +#if JXL_USE_MMAP + munmap(header->allocated, header->allocated_size); +#else + free(header->allocated); +#endif +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.h b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.h new file mode 100644 index 0000000000..e57df14837 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/cache_aligned.h @@ -0,0 +1,74 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_CACHE_ALIGNED_H_ +#define LIB_JXL_BASE_CACHE_ALIGNED_H_ + +// Memory allocator with support for alignment + misalignment. + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +// Functions that depend on the cache line size. +class CacheAligned { + public: + static void PrintStats(); + + static constexpr size_t kPointerSize = sizeof(void*); + static constexpr size_t kCacheLineSize = 64; + // To avoid RFOs, match L2 fill size (pairs of lines). + static constexpr size_t kAlignment = 2 * kCacheLineSize; + // Minimum multiple for which cache set conflicts and/or loads blocked by + // preceding stores can occur. + static constexpr size_t kAlias = 2048; + + // Returns a 'random' (cyclical) offset suitable for Allocate. + static size_t NextOffset(); + + // Returns null or memory whose address is congruent to `offset` (mod kAlias). + // This reduces cache conflicts and load/store stalls, especially with large + // allocations that would otherwise have similar alignments. At least + // `payload_size` (which can be zero) bytes will be accessible. + static void* Allocate(size_t payload_size, size_t offset); + + static void* Allocate(const size_t payload_size) { + return Allocate(payload_size, NextOffset()); + } + + static void Free(const void* aligned_pointer); +}; + +// Avoids the need for a function pointer (deleter) in CacheAlignedUniquePtr. +struct CacheAlignedDeleter { + void operator()(uint8_t* aligned_pointer) const { + return CacheAligned::Free(aligned_pointer); + } +}; + +using CacheAlignedUniquePtr = std::unique_ptr; + +// Does not invoke constructors. +static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes) { + return CacheAlignedUniquePtr( + static_cast(CacheAligned::Allocate(bytes)), + CacheAlignedDeleter()); +} + +static inline CacheAlignedUniquePtr AllocateArray(const size_t bytes, + const size_t offset) { + return CacheAlignedUniquePtr( + static_cast(CacheAligned::Allocate(bytes, offset)), + CacheAlignedDeleter()); +} + +} // namespace jxl + +#endif // LIB_JXL_BASE_CACHE_ALIGNED_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/compiler_specific.h b/third-party/libjxl/libjxl/lib/jxl/base/compiler_specific.h new file mode 100644 index 0000000000..702ff8e058 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/compiler_specific.h @@ -0,0 +1,157 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_COMPILER_SPECIFIC_H_ +#define LIB_JXL_BASE_COMPILER_SPECIFIC_H_ + +// Macros for compiler version + nonstandard keywords, e.g. __builtin_expect. + +#include +#include + +#include "lib/jxl/base/sanitizer_definitions.h" + +// #if is shorter and safer than #ifdef. *_VERSION are zero if not detected, +// otherwise 100 * major + minor version. Note that other packages check for +// #ifdef COMPILER_MSVC, so we cannot use that same name. + +#ifdef _MSC_VER +#define JXL_COMPILER_MSVC _MSC_VER +#else +#define JXL_COMPILER_MSVC 0 +#endif + +#ifdef __GNUC__ +#define JXL_COMPILER_GCC (__GNUC__ * 100 + __GNUC_MINOR__) +#else +#define JXL_COMPILER_GCC 0 +#endif + +#ifdef __clang__ +#define JXL_COMPILER_CLANG (__clang_major__ * 100 + __clang_minor__) +// Clang pretends to be GCC for compatibility. +#undef JXL_COMPILER_GCC +#define JXL_COMPILER_GCC 0 +#else +#define JXL_COMPILER_CLANG 0 +#endif + +#if JXL_COMPILER_MSVC +#define JXL_RESTRICT __restrict +#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG +#define JXL_RESTRICT __restrict__ +#else +#define JXL_RESTRICT +#endif + +#if JXL_COMPILER_MSVC +#define JXL_INLINE __forceinline +#define JXL_NOINLINE __declspec(noinline) +#else +#define JXL_INLINE inline __attribute__((always_inline)) +#define JXL_NOINLINE __attribute__((noinline)) +#endif + +#if JXL_COMPILER_MSVC +#define JXL_NORETURN __declspec(noreturn) +#elif JXL_COMPILER_GCC || JXL_COMPILER_CLANG +#define JXL_NORETURN __attribute__((noreturn)) +#else +#define JXL_NORETURN +#endif + +#if JXL_COMPILER_MSVC +#define JXL_UNREACHABLE_BUILTIN __assume(false) +#elif JXL_COMPILER_CLANG || JXL_COMPILER_GCC >= 405 +#define JXL_UNREACHABLE_BUILTIN __builtin_unreachable() +#else +#define JXL_UNREACHABLE_BUILTIN +#endif + +#if JXL_COMPILER_MSVC +#define JXL_MAYBE_UNUSED +#else +// Encountered "attribute list cannot appear here" when using the C++17 +// [[maybe_unused]], so only use the old style attribute for now. +#define JXL_MAYBE_UNUSED __attribute__((unused)) +#endif + +// MSAN execution won't hurt if some code it not inlined, but this can greatly +// improve compilation time. Unfortunately this macro can not be used just +// everywhere - inside header files it leads to "multiple definition" error; +// though it would be better not to have JXL_INLINE in header overall. +#if JXL_MEMORY_SANITIZER || JXL_ADDRESS_SANITIZER || JXL_THREAD_SANITIZER +#define JXL_MAYBE_INLINE JXL_MAYBE_UNUSED +#else +#define JXL_MAYBE_INLINE JXL_INLINE +#endif + +#if JXL_COMPILER_MSVC +// Unsupported, __assume is not the same. +#define JXL_LIKELY(expr) expr +#define JXL_UNLIKELY(expr) expr +#else +#define JXL_LIKELY(expr) __builtin_expect(!!(expr), 1) +#define JXL_UNLIKELY(expr) __builtin_expect(!!(expr), 0) +#endif + +// Returns a void* pointer which the compiler then assumes is N-byte aligned. +// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32); +// +// The assignment semantics are required by GCC/Clang. ICC provides an in-place +// __assume_aligned, whereas MSVC's __assume appears unsuitable. +#if JXL_COMPILER_CLANG +// Early versions of Clang did not support __builtin_assume_aligned. +#define JXL_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned) +#elif JXL_COMPILER_GCC +#define JXL_HAS_ASSUME_ALIGNED 1 +#else +#define JXL_HAS_ASSUME_ALIGNED 0 +#endif + +#if JXL_HAS_ASSUME_ALIGNED +#define JXL_ASSUME_ALIGNED(ptr, align) __builtin_assume_aligned((ptr), (align)) +#else +#define JXL_ASSUME_ALIGNED(ptr, align) (ptr) /* not supported */ +#endif + +#ifdef __has_attribute +#define JXL_HAVE_ATTRIBUTE(x) __has_attribute(x) +#else +#define JXL_HAVE_ATTRIBUTE(x) 0 +#endif + +// Raises warnings if the function return value is unused. Should appear as the +// first part of a function definition/declaration. +#if JXL_HAVE_ATTRIBUTE(nodiscard) +#define JXL_MUST_USE_RESULT [[nodiscard]] +#elif JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(warn_unused_result) +#define JXL_MUST_USE_RESULT __attribute__((warn_unused_result)) +#else +#define JXL_MUST_USE_RESULT +#endif + +// Disable certain -fsanitize flags for functions that are expected to include +// things like unsigned integer overflow. For example use in the function +// declaration JXL_NO_SANITIZE("unsigned-integer-overflow") to silence unsigned +// integer overflow ubsan messages. +#if JXL_COMPILER_CLANG && JXL_HAVE_ATTRIBUTE(no_sanitize) +#define JXL_NO_SANITIZE(X) __attribute__((no_sanitize(X))) +#else +#define JXL_NO_SANITIZE(X) +#endif + +#if JXL_HAVE_ATTRIBUTE(__format__) +#define JXL_FORMAT(idx_fmt, idx_arg) \ + __attribute__((__format__(__printf__, idx_fmt, idx_arg))) +#else +#define JXL_FORMAT(idx_fmt, idx_arg) +#endif + +#if JXL_COMPILER_MSVC +using ssize_t = intptr_t; +#endif + +#endif // LIB_JXL_BASE_COMPILER_SPECIFIC_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.cc b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.cc new file mode 100644 index 0000000000..20a911255c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.cc @@ -0,0 +1,23 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/data_parallel.h" + +namespace jxl { + +// static +JxlParallelRetCode ThreadPool::SequentialRunnerStatic( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) { + JxlParallelRetCode init_ret = (*init)(jpegxl_opaque, 1); + if (init_ret != 0) return init_ret; + + for (uint32_t i = start_range; i < end_range; i++) { + (*func)(jpegxl_opaque, i, 0); + } + return 0; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.h b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.h new file mode 100644 index 0000000000..ba7e7adfad --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/data_parallel.h @@ -0,0 +1,120 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_DATA_PARALLEL_H_ +#define LIB_JXL_BASE_DATA_PARALLEL_H_ + +// Portable, low-overhead C++11 ThreadPool alternative to OpenMP for +// data-parallel computations. + +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/status.h" +#if JXL_COMPILER_MSVC +// suppress warnings about the const & applied to function types +#pragma warning(disable : 4180) +#endif + +namespace jxl { + +class ThreadPool { + public: + ThreadPool(JxlParallelRunner runner, void* runner_opaque) + : runner_(runner ? runner : &ThreadPool::SequentialRunnerStatic), + runner_opaque_(runner ? runner_opaque : static_cast(this)) {} + + ThreadPool(const ThreadPool&) = delete; + ThreadPool& operator&(const ThreadPool&) = delete; + + JxlParallelRunner runner() const { return runner_; } + void* runner_opaque() const { return runner_opaque_; } + + // Runs init_func(num_threads) followed by data_func(task, thread) on worker + // thread(s) for every task in [begin, end). init_func() must return a Status + // indicating whether the initialization succeeded. + // "thread" is an integer smaller than num_threads. + // Not thread-safe - no two calls to Run may overlap. + // Subsequent calls will reuse the same threads. + // + // Precondition: begin <= end. + template + Status Run(uint32_t begin, uint32_t end, const InitFunc& init_func, + const DataFunc& data_func, const char* caller = "") { + JXL_ASSERT(begin <= end); + if (begin == end) return true; + RunCallState call_state(init_func, data_func); + // The runner_ uses the C convention and returns 0 in case of error, so we + // convert it to a Status. + return (*runner_)(runner_opaque_, static_cast(&call_state), + &call_state.CallInitFunc, &call_state.CallDataFunc, begin, + end) == 0; + } + + // Use this as init_func when no initialization is needed. + static Status NoInit(size_t num_threads) { return true; } + + private: + // class holding the state of a Run() call to pass to the runner_ as an + // opaque_jpegxl pointer. + template + class RunCallState final { + public: + RunCallState(const InitFunc& init_func, const DataFunc& data_func) + : init_func_(init_func), data_func_(data_func) {} + + // JxlParallelRunInit interface. + static int CallInitFunc(void* jpegxl_opaque, size_t num_threads) { + const auto* self = + static_cast*>(jpegxl_opaque); + // Returns -1 when the internal init function returns false Status to + // indicate an error. + return self->init_func_(num_threads) ? 0 : -1; + } + + // JxlParallelRunFunction interface. + static void CallDataFunc(void* jpegxl_opaque, uint32_t value, + size_t thread_id) { + const auto* self = + static_cast*>(jpegxl_opaque); + return self->data_func_(value, thread_id); + } + + private: + const InitFunc& init_func_; + const DataFunc& data_func_; + }; + + // Default JxlParallelRunner used when no runner is provided by the + // caller. This runner doesn't use any threading and thread_id is always 0. + static JxlParallelRetCode SequentialRunnerStatic( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range); + + // The caller supplied runner function and its opaque void*. + const JxlParallelRunner runner_; + void* const runner_opaque_; +}; + +template +Status RunOnPool(ThreadPool* pool, const uint32_t begin, const uint32_t end, + const InitFunc& init_func, const DataFunc& data_func, + const char* caller) { + if (pool == nullptr) { + ThreadPool default_pool(nullptr, nullptr); + return default_pool.Run(begin, end, init_func, data_func, caller); + } else { + return pool->Run(begin, end, init_func, data_func, caller); + } +} + +} // namespace jxl +#if JXL_COMPILER_MSVC +#pragma warning(default : 4180) +#endif + +#endif // LIB_JXL_BASE_DATA_PARALLEL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/float.h b/third-party/libjxl/libjxl/lib/jxl/base/float.h new file mode 100644 index 0000000000..b17413fc19 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/float.h @@ -0,0 +1,99 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_FLOAT_H_ +#define LIB_JXL_BASE_FLOAT_H_ + +#include +#include +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +namespace { +// Based on highway scalar implementation, for testing +float LoadFloat16(uint16_t bits16) { + const uint32_t sign = bits16 >> 15; + const uint32_t biased_exp = (bits16 >> 10) & 0x1F; + const uint32_t mantissa = bits16 & 0x3FF; + + // Subnormal or zero + if (biased_exp == 0) { + const float subnormal = + (1.0f / 16384) * (static_cast(mantissa) * (1.0f / 1024)); + return sign ? -subnormal : subnormal; + } + + // Normalized: convert the representation directly (faster than ldexp/tables). + const uint32_t biased_exp32 = biased_exp + (127 - 15); + const uint32_t mantissa32 = mantissa << (23 - 10); + const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32; + + float result; + memcpy(&result, &bits32, 4); + return result; +} +} // namespace + +template +static Status JXL_INLINE LoadFloatRow(const uint8_t* src, size_t count, + size_t stride, JxlDataType type, + bool little_endian, float scale, + SaveFloatAtFn callback) { + switch (type) { + case JXL_TYPE_FLOAT: + if (little_endian) { + for (size_t i = 0; i < count; ++i) { + callback(i, LoadLEFloat(src + stride * i)); + } + } else { + for (size_t i = 0; i < count; ++i) { + callback(i, LoadBEFloat(src + stride * i)); + } + } + return true; + + case JXL_TYPE_UINT8: + for (size_t i = 0; i < count; ++i) { + callback(i, src[stride * i] * scale); + } + return true; + + case JXL_TYPE_UINT16: + if (little_endian) { + for (size_t i = 0; i < count; ++i) { + callback(i, LoadLE16(src + stride * i) * scale); + } + } else { + for (size_t i = 0; i < count; ++i) { + callback(i, LoadBE16(src + stride * i) * scale); + } + } + return true; + + case JXL_TYPE_FLOAT16: + if (little_endian) { + for (size_t i = 0; i < count; ++i) { + callback(i, LoadFloat16(LoadLE16(src + stride * i))); + } + } else { + for (size_t i = 0; i < count; ++i) { + callback(i, LoadFloat16(LoadBE16(src + stride * i))); + } + } + return true; + + default: + return JXL_FAILURE("Unsupported sample format"); + } +} + +} // namespace jxl + +#endif // LIB_JXL_BASE_FLOAT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/iaca.h b/third-party/libjxl/libjxl/lib/jxl/base/iaca.h new file mode 100644 index 0000000000..e5732dae5c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/iaca.h @@ -0,0 +1,65 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_IACA_H_ +#define LIB_JXL_BASE_IACA_H_ + +#include "lib/jxl/base/compiler_specific.h" + +// IACA (Intel's Code Analyzer) analyzes instruction latencies, but only for +// code between special markers. These functions embed such markers in an +// executable, but only for reading via IACA - they deliberately trigger a +// crash if executed to ensure they are removed in normal builds. + +#ifndef JXL_IACA_ENABLED +#define JXL_IACA_ENABLED 0 +#endif + +namespace jxl { + +// Call before the region of interest. +static JXL_INLINE void BeginIACA() { +#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG) + asm volatile( + // UD2 "instruction" raises an invalid opcode exception. + ".byte 0x0F, 0x0B\n\t" + // Magic sequence recognized by IACA (MOV + addr32 fs:NOP). This actually + // clobbers EBX, but we don't care because the code won't be run, and we + // want IACA to observe the same code the compiler would have generated + // without this marker. + "movl $111, %%ebx\n\t" + ".byte 0x64, 0x67, 0x90\n\t" + : + : + // (Allegedly) clobbering memory may prevent reordering. + : "memory"); +#endif +} + +// Call after the region of interest. +static JXL_INLINE void EndIACA() { +#if JXL_IACA_ENABLED && (JXL_COMPILER_GCC || JXL_COMPILER_CLANG) + asm volatile( + // See above. + "movl $222, %%ebx\n\t" + ".byte 0x64, 0x67, 0x90\n\t" + // UD2 + ".byte 0x0F, 0x0B\n\t" + : + : + // (Allegedly) clobbering memory may prevent reordering. + : "memory"); +#endif +} + +// Add to a scope to mark a region. +struct ScopeIACA { + JXL_INLINE ScopeIACA() { BeginIACA(); } + JXL_INLINE ~ScopeIACA() { EndIACA(); } +}; + +} // namespace jxl + +#endif // LIB_JXL_BASE_IACA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/os_macros.h b/third-party/libjxl/libjxl/lib/jxl/base/os_macros.h new file mode 100644 index 0000000000..84d0b82bf5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/os_macros.h @@ -0,0 +1,50 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_OS_MACROS_H_ +#define LIB_JXL_BASE_OS_MACROS_H_ + +// Defines the JXL_OS_* macros. + +#if defined(_WIN32) || defined(_WIN64) +#define JXL_OS_WIN 1 +#else +#define JXL_OS_WIN 0 +#endif + +#ifdef __linux__ +#define JXL_OS_LINUX 1 +#else +#define JXL_OS_LINUX 0 +#endif + +#ifdef __APPLE__ +#define JXL_OS_MAC 1 +#else +#define JXL_OS_MAC 0 +#endif + +#define JXL_OS_IOS 0 +#ifdef __APPLE__ +#include +#if TARGET_OS_IPHONE +#undef JXL_OS_IOS +#define JXL_OS_IOS 1 +#endif +#endif + +#ifdef __FreeBSD__ +#define JXL_OS_FREEBSD 1 +#else +#define JXL_OS_FREEBSD 0 +#endif + +#ifdef __HAIKU__ +#define JXL_OS_HAIKU 1 +#else +#define JXL_OS_HAIKU 0 +#endif + +#endif // LIB_JXL_BASE_OS_MACROS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/override.h b/third-party/libjxl/libjxl/lib/jxl/base/override.h new file mode 100644 index 0000000000..1f8b657974 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/override.h @@ -0,0 +1,29 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_OVERRIDE_H_ +#define LIB_JXL_BASE_OVERRIDE_H_ + +// 'Trool' for command line arguments: force enable/disable, or use default. + +namespace jxl { + +// No effect if kDefault, otherwise forces a feature (typically a FrameHeader +// flag) on or off. +enum class Override : int { kOn = 1, kOff = 0, kDefault = -1 }; + +static inline Override OverrideFromBool(bool flag) { + return flag ? Override::kOn : Override::kOff; +} + +static inline bool ApplyOverride(Override o, bool default_condition) { + if (o == Override::kOn) return true; + if (o == Override::kOff) return false; + return default_condition; +} + +} // namespace jxl + +#endif // LIB_JXL_BASE_OVERRIDE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.cc b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.cc new file mode 100644 index 0000000000..11e4bff6fe --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.cc @@ -0,0 +1,63 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/padded_bytes.h" + +namespace jxl { + +void PaddedBytes::IncreaseCapacityTo(size_t capacity) { + JXL_ASSERT(capacity > capacity_); + + size_t new_capacity = std::max(capacity, 3 * capacity_ / 2); + new_capacity = std::max(64, new_capacity); + + // BitWriter writes up to 7 bytes past the end. + CacheAlignedUniquePtr new_data = AllocateArray(new_capacity + 8); + if (new_data == nullptr) { + // Allocation failed, discard all data to ensure this is noticed. + size_ = capacity_ = 0; + return; + } + + if (data_ == nullptr) { + // First allocation: ensure first byte is initialized (won't be copied). + new_data[0] = 0; + } else { + // Subsequent resize: copy existing data to new location. + memcpy(new_data.get(), data_.get(), size_); + // Ensure that the first new byte is initialized, to allow write_bits to + // safely append to the newly-resized PaddedBytes. + new_data[size_] = 0; + } + + capacity_ = new_capacity; + std::swap(new_data, data_); +} + +void PaddedBytes::assign(const uint8_t* new_begin, const uint8_t* new_end) { + JXL_DASSERT(new_begin <= new_end); + const size_t new_size = static_cast(new_end - new_begin); + + // memcpy requires non-overlapping ranges, and resizing might invalidate the + // new range. Neither happens if the new range is completely to the left or + // right of the _allocated_ range (irrespective of size_). + const uint8_t* allocated_end = begin() + capacity_; + const bool outside = new_end <= begin() || new_begin >= allocated_end; + if (outside) { + resize(new_size); // grow or shrink + memcpy(data(), new_begin, new_size); + return; + } + + // There is overlap. The new size cannot be larger because we own the memory + // and the new range cannot include anything outside the allocated range. + JXL_ASSERT(new_size <= capacity_); + + // memmove allows overlap and capacity_ is sufficient. + memmove(data(), new_begin, new_size); + size_ = new_size; // shrink +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.h b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.h new file mode 100644 index 0000000000..4534ddf863 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/padded_bytes.h @@ -0,0 +1,197 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_PADDED_BYTES_H_ +#define LIB_JXL_BASE_PADDED_BYTES_H_ + +// std::vector replacement with padding to reduce bounds checks in WriteBits + +#include +#include +#include // memcpy + +#include // max +#include +#include // swap + +#include "lib/jxl/base/cache_aligned.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Provides a subset of the std::vector interface with some differences: +// - allows BitWriter to write 64 bits at a time without bounds checking; +// - ONLY zero-initializes the first byte (required by BitWriter); +// - ensures cache-line alignment. +class PaddedBytes { + public: + // Required for output params. + PaddedBytes() : size_(0), capacity_(0) {} + + explicit PaddedBytes(size_t size) : size_(size), capacity_(0) { + if (size != 0) IncreaseCapacityTo(size); + } + + PaddedBytes(size_t size, uint8_t value) : size_(size), capacity_(0) { + if (size != 0) { + IncreaseCapacityTo(size); + } + if (size_ != 0) { + memset(data(), value, size); + } + } + + PaddedBytes(const PaddedBytes& other) : size_(other.size_), capacity_(0) { + if (size_ != 0) IncreaseCapacityTo(size_); + if (data() != nullptr) memcpy(data(), other.data(), size_); + } + PaddedBytes& operator=(const PaddedBytes& other) { + // Self-assignment is safe. + resize(other.size()); + if (data() != nullptr) memmove(data(), other.data(), size_); + return *this; + } + + // default is not OK - need to set other.size_ to 0! + PaddedBytes(PaddedBytes&& other) noexcept + : size_(other.size_), + capacity_(other.capacity_), + data_(std::move(other.data_)) { + other.size_ = other.capacity_ = 0; + } + PaddedBytes& operator=(PaddedBytes&& other) noexcept { + size_ = other.size_; + capacity_ = other.capacity_; + data_ = std::move(other.data_); + + if (&other != this) { + other.size_ = other.capacity_ = 0; + } + return *this; + } + + void swap(PaddedBytes& other) { + std::swap(size_, other.size_); + std::swap(capacity_, other.capacity_); + std::swap(data_, other.data_); + } + + void reserve(size_t capacity) { + if (capacity > capacity_) IncreaseCapacityTo(capacity); + } + + // NOTE: unlike vector, this does not initialize the new data! + // However, we guarantee that write_bits can safely append after + // the resize, as we zero-initialize the first new byte of data. + // If size < capacity(), does not invalidate the memory. + void resize(size_t size) { + if (size > capacity_) IncreaseCapacityTo(size); + size_ = (data() == nullptr) ? 0 : size; + } + + // resize(size) plus explicit initialization of the new data with `value`. + void resize(size_t size, uint8_t value) { + size_t old_size = size_; + resize(size); + if (size_ > old_size) { + memset(data() + old_size, value, size_ - old_size); + } + } + + // Amortized constant complexity due to exponential growth. + void push_back(uint8_t x) { + if (size_ == capacity_) { + IncreaseCapacityTo(capacity_ + 1); + if (data() == nullptr) return; + } + + data_[size_++] = x; + } + + size_t size() const { return size_; } + size_t capacity() const { return capacity_; } + + uint8_t* data() { return data_.get(); } + const uint8_t* data() const { return data_.get(); } + + // std::vector operations implemented in terms of the public interface above. + + void clear() { resize(0); } + bool empty() const { return size() == 0; } + + void assign(std::initializer_list il) { + resize(il.size()); + memcpy(data(), il.begin(), il.size()); + } + + // Replaces data() with [new_begin, new_end); potentially reallocates. + void assign(const uint8_t* new_begin, const uint8_t* new_end); + + uint8_t* begin() { return data(); } + const uint8_t* begin() const { return data(); } + uint8_t* end() { return begin() + size(); } + const uint8_t* end() const { return begin() + size(); } + + uint8_t& operator[](const size_t i) { + BoundsCheck(i); + return data()[i]; + } + const uint8_t& operator[](const size_t i) const { + BoundsCheck(i); + return data()[i]; + } + + uint8_t& back() { + JXL_ASSERT(size() != 0); + return data()[size() - 1]; + } + const uint8_t& back() const { + JXL_ASSERT(size() != 0); + return data()[size() - 1]; + } + + template + void append(const T& other) { + append(reinterpret_cast(other.data()), + reinterpret_cast(other.data()) + other.size()); + } + + void append(const uint8_t* begin, const uint8_t* end) { + if (end - begin > 0) { + size_t old_size = size(); + resize(size() + (end - begin)); + memcpy(data() + old_size, begin, end - begin); + } + } + + private: + void BoundsCheck(size_t i) const { + // <= is safe due to padding and required by BitWriter. + JXL_ASSERT(i <= size()); + } + + // Copies existing data to newly allocated "data_". If allocation fails, + // data() == nullptr and size_ = capacity_ = 0. + // The new capacity will be at least 1.5 times the old capacity. This ensures + // that we avoid quadratic behaviour. + void IncreaseCapacityTo(size_t capacity); + + size_t size_; + size_t capacity_; + CacheAlignedUniquePtr data_; +}; + +template +static inline void Append(const T& s, PaddedBytes* out, + size_t* JXL_RESTRICT byte_pos) { + memcpy(out->data() + *byte_pos, s.data(), s.size()); + *byte_pos += s.size(); + JXL_CHECK(*byte_pos <= out->size()); +} + +} // namespace jxl + +#endif // LIB_JXL_BASE_PADDED_BYTES_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/printf_macros.h b/third-party/libjxl/libjxl/lib/jxl/base/printf_macros.h new file mode 100644 index 0000000000..3215052afd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/printf_macros.h @@ -0,0 +1,34 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_PRINTF_MACROS_H_ +#define LIB_JXL_BASE_PRINTF_MACROS_H_ + +// Format string macros. These should be included after any other system +// library since those may unconditionally define these, depending on the +// platform. + +// PRIuS and PRIdS macros to print size_t and ssize_t respectively. +#if !defined(PRIdS) +#if defined(_WIN64) +#define PRIdS "lld" +#elif defined(_WIN32) +#define PRIdS "d" +#else +#define PRIdS "zd" +#endif +#endif // PRIdS + +#if !defined(PRIuS) +#if defined(_WIN64) +#define PRIuS "llu" +#elif defined(_WIN32) +#define PRIuS "u" +#else +#define PRIuS "zu" +#endif +#endif // PRIuS + +#endif // LIB_JXL_BASE_PRINTF_MACROS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/random.cc b/third-party/libjxl/libjxl/lib/jxl/base/random.cc new file mode 100644 index 0000000000..c99f88921c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/random.cc @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/random.h" + +#include + +namespace jxl { + +Rng::GeometricDistribution::GeometricDistribution(float p) + : inv_log_1mp(1.0 / std::log(1 - p)) {} + +uint32_t Rng::Geometric(const GeometricDistribution& dist) { + float f = UniformF(0, 1); + float log = std::log(1 - f) * dist.inv_log_1mp; + return static_cast(log); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/base/random.h b/third-party/libjxl/libjxl/lib/jxl/base/random.h new file mode 100644 index 0000000000..663b88c95d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/random.h @@ -0,0 +1,95 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_RANDOM_ +#define LIB_JXL_BASE_RANDOM_ + +// Random number generator + distributions. +// We don't use because the implementation (and thus results) differs +// between libstdc++ and libc++. + +#include +#include + +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { +struct Rng { + explicit Rng(size_t seed) + : s{static_cast(0x94D049BB133111EBull), + static_cast(0xBF58476D1CE4E5B9ull) + seed} {} + + // Xorshift128+ adapted from xorshift128+-inl.h + uint64_t operator()() { + uint64_t s1 = s[0]; + const uint64_t s0 = s[1]; + const uint64_t bits = s1 + s0; // b, c + s[0] = s0; + s1 ^= s1 << 23; + s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); + s[1] = s1; + return bits; + } + + // Uniformly distributed int64_t in [begin, end), under the assumption that + // `end-begin` is significantly smaller than 1<<64, otherwise there is some + // bias. + int64_t UniformI(int64_t begin, int64_t end) { + JXL_DASSERT(end > begin); + return static_cast((*this)() % + static_cast(end - begin)) + + begin; + } + + // Same as UniformI, but for uint64_t. + uint64_t UniformU(uint64_t begin, uint64_t end) { + JXL_DASSERT(end > begin); + return (*this)() % (end - begin) + begin; + } + + // Uniformly distributed float in [begin, end) range. Note: only 23 bits of + // randomness. + float UniformF(float begin, float end) { + float f; + // Bits of a random [1, 2) float. + uint32_t u = ((*this)() >> (64 - 23)) | 0x3F800000; + static_assert(sizeof(f) == sizeof(u), + "Float and U32 must have the same size"); + memcpy(&f, &u, sizeof(f)); + // Note: (end-begin) * f + (2*begin-end) may fail to return a number >= + // begin. + return (end - begin) * (f - 1.0f) + begin; + } + + // Bernoulli trial + bool Bernoulli(float p) { return UniformF(0, 1) < p; } + + // State for geometric distributions. + struct GeometricDistribution { + explicit GeometricDistribution(float p); + + private: + float inv_log_1mp; + friend struct Rng; + }; + + uint32_t Geometric(const GeometricDistribution& dist); + + template + void Shuffle(T* t, size_t n) { + for (size_t i = 0; i + 1 < n; i++) { + size_t a = UniformU(i, n); + std::swap(t[a], t[i]); + } + } + + private: + uint64_t s[2]; +}; + +} // namespace jxl +#endif // LIB_JXL_BASE_RANDOM_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/sanitizer_definitions.h b/third-party/libjxl/libjxl/lib/jxl/base/sanitizer_definitions.h new file mode 100644 index 0000000000..315f3bd003 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/sanitizer_definitions.h @@ -0,0 +1,44 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_SANITIZER_DEFINITIONS_H_ +#define LIB_JXL_BASE_SANITIZER_DEFINITIONS_H_ + +#ifdef MEMORY_SANITIZER +#define JXL_MEMORY_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define JXL_MEMORY_SANITIZER 1 +#else +#define JXL_MEMORY_SANITIZER 0 +#endif +#else +#define JXL_MEMORY_SANITIZER 0 +#endif + +#ifdef ADDRESS_SANITIZER +#define JXL_ADDRESS_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(address_sanitizer) +#define JXL_ADDRESS_SANITIZER 1 +#else +#define JXL_ADDRESS_SANITIZER 0 +#endif +#else +#define JXL_ADDRESS_SANITIZER 0 +#endif + +#ifdef THREAD_SANITIZER +#define JXL_THREAD_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(thread_sanitizer) +#define JXL_THREAD_SANITIZER 1 +#else +#define JXL_THREAD_SANITIZER 0 +#endif +#else +#define JXL_THREAD_SANITIZER 0 +#endif +#endif // LIB_JXL_BASE_SANITIZER_DEFINITIONS_H diff --git a/third-party/libjxl/libjxl/lib/jxl/base/scope_guard.h b/third-party/libjxl/libjxl/lib/jxl/base/scope_guard.h new file mode 100644 index 0000000000..a18a44cb79 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/scope_guard.h @@ -0,0 +1,48 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_SCOPE_GUARD_H_ +#define LIB_JXL_BASE_SCOPE_GUARD_H_ + +#include + +namespace jxl { + +template +class ScopeGuard { + public: + // Discourage unnecessary moves / copies. + ScopeGuard(const ScopeGuard &) = delete; + ScopeGuard &operator=(const ScopeGuard &) = delete; + ScopeGuard &operator=(ScopeGuard &&) = delete; + + // Pre-C++17 does not guarantee RVO -> require move constructor. + ScopeGuard(ScopeGuard &&other) : callback_(std::move(other.callback_)) { + other.armed_ = false; + } + + template + explicit ScopeGuard(CallbackParam &&callback) + : callback_(std::forward(callback)), armed_(true) {} + + ~ScopeGuard() { + if (armed_) callback_(); + } + + void Disarm() { armed_ = false; } + + private: + Callback callback_; + bool armed_; +}; + +template +ScopeGuard MakeScopeGuard(Callback &&callback) { + return ScopeGuard{std::forward(callback)}; +} + +} // namespace jxl + +#endif // LIB_JXL_BASE_SCOPE_GUARD_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/span.h b/third-party/libjxl/libjxl/lib/jxl/base/span.h new file mode 100644 index 0000000000..41c3623a4b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/span.h @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_SPAN_H_ +#define LIB_JXL_BASE_SPAN_H_ + +// Span (array view) is a non-owning container that provides cheap "cut" +// operations and could be used as "ArrayLike" data source for PaddedBytes. + +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { + +template +class Span { + public: + constexpr Span() noexcept : Span(nullptr, 0) {} + + constexpr Span(T* array, size_t length) noexcept + : ptr_(array), len_(length) {} + + template + explicit constexpr Span(T (&a)[N]) noexcept : Span(a, N) {} + + template + explicit constexpr Span(const ArrayLike& other) noexcept + : Span(reinterpret_cast(other.data()), other.size()) { + static_assert(sizeof(*other.data()) == sizeof(T), + "Incompatible type of source."); + } + + constexpr T* data() const noexcept { return ptr_; } + + constexpr size_t size() const noexcept { return len_; } + + constexpr bool empty() const noexcept { return len_ == 0; } + + constexpr T& operator[](size_t i) const noexcept { + // MSVC 2015 accepts this as constexpr, but not ptr_[i] + return *(data() + i); + } + + void remove_prefix(size_t n) noexcept { + JXL_ASSERT(size() >= n); + ptr_ += n; + len_ -= n; + } + + private: + T* ptr_; + size_t len_; +}; + +} // namespace jxl + +#endif // LIB_JXL_BASE_SPAN_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/base/status.h b/third-party/libjxl/libjxl/lib/jxl/base/status.h new file mode 100644 index 0000000000..45e7244ce5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/base/status.h @@ -0,0 +1,429 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BASE_STATUS_H_ +#define LIB_JXL_BASE_STATUS_H_ + +// Error handling: Status return type + helper macros. + +#include +#include +#include +#include + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/sanitizer_definitions.h" + +#if JXL_ADDRESS_SANITIZER || JXL_MEMORY_SANITIZER || JXL_THREAD_SANITIZER +#include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace +#endif // defined(*_SANITIZER) + +namespace jxl { + +// Uncomment to abort when JXL_FAILURE or JXL_STATUS with a fatal error is +// reached: +// #define JXL_CRASH_ON_ERROR + +#ifndef JXL_ENABLE_ASSERT +#define JXL_ENABLE_ASSERT 1 +#endif + +#ifndef JXL_ENABLE_CHECK +#define JXL_ENABLE_CHECK 1 +#endif + +// Pass -DJXL_DEBUG_ON_ERROR at compile time to print debug messages when a +// function returns JXL_FAILURE or calls JXL_NOTIFY_ERROR. Note that this is +// irrelevant if you also pass -DJXL_CRASH_ON_ERROR. +#if defined(JXL_DEBUG_ON_ERROR) || defined(JXL_CRASH_ON_ERROR) +#undef JXL_DEBUG_ON_ERROR +#define JXL_DEBUG_ON_ERROR 1 +#else // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR +#ifdef NDEBUG +#define JXL_DEBUG_ON_ERROR 0 +#else // NDEBUG +#define JXL_DEBUG_ON_ERROR 1 +#endif // NDEBUG +#endif // JXL_DEBUG_ON_ERROR || JXL_CRASH_ON_ERROR + +// Pass -DJXL_DEBUG_ON_ALL_ERROR at compile time to print debug messages on +// all error (fatal and non-fatal) status. This implies JXL_DEBUG_ON_ERROR. +#if defined(JXL_DEBUG_ON_ALL_ERROR) +#undef JXL_DEBUG_ON_ALL_ERROR +#define JXL_DEBUG_ON_ALL_ERROR 1 +// JXL_DEBUG_ON_ALL_ERROR implies JXL_DEBUG_ON_ERROR too. +#undef JXL_DEBUG_ON_ERROR +#define JXL_DEBUG_ON_ERROR 1 +#else // JXL_DEBUG_ON_ALL_ERROR +#define JXL_DEBUG_ON_ALL_ERROR 0 +#endif // JXL_DEBUG_ON_ALL_ERROR + +// The Verbose level for the library +#ifndef JXL_DEBUG_V_LEVEL +#define JXL_DEBUG_V_LEVEL 0 +#endif // JXL_DEBUG_V_LEVEL + +// Pass -DJXL_DEBUG_ON_ABORT={0,1} to force disable/enable the debug messages on +// JXL_ASSERT, JXL_CHECK and JXL_ABORT. +#ifndef JXL_DEBUG_ON_ABORT +#define JXL_DEBUG_ON_ABORT JXL_DEBUG_ON_ERROR +#endif // JXL_DEBUG_ON_ABORT + +// Print a debug message on standard error. You should use the JXL_DEBUG macro +// instead of calling Debug directly. This function returns false, so it can be +// used as a return value in JXL_FAILURE. +JXL_FORMAT(1, 2) +inline JXL_NOINLINE bool Debug(const char* format, ...) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + return false; +} + +// Print a debug message on standard error if "enabled" is true. "enabled" is +// normally a macro that evaluates to 0 or 1 at compile time, so the Debug +// function is never called and optimized out in release builds. Note that the +// arguments are compiled but not evaluated when enabled is false. The format +// string must be a explicit string in the call, for example: +// JXL_DEBUG(JXL_DEBUG_MYMODULE, "my module message: %d", some_var); +// Add a header at the top of your module's .cc or .h file (depending on whether +// you have JXL_DEBUG calls from the .h as well) like this: +// #ifndef JXL_DEBUG_MYMODULE +// #define JXL_DEBUG_MYMODULE 0 +// #endif JXL_DEBUG_MYMODULE +#define JXL_DEBUG_TMP(format, ...) \ + ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__) + +#define JXL_DEBUG(enabled, format, ...) \ + do { \ + if (enabled) { \ + JXL_DEBUG_TMP(format, ##__VA_ARGS__); \ + } \ + } while (0) + +// JXL_DEBUG version that prints the debug message if the global verbose level +// defined at compile time by JXL_DEBUG_V_LEVEL is greater or equal than the +// passed level. +#define JXL_DEBUG_V(level, format, ...) \ + JXL_DEBUG(level <= JXL_DEBUG_V_LEVEL, format, ##__VA_ARGS__) + +// Warnings (via JXL_WARNING) are enabled by default in debug builds (opt and +// debug). +#ifdef JXL_DEBUG_WARNING +#undef JXL_DEBUG_WARNING +#define JXL_DEBUG_WARNING 1 +#else // JXL_DEBUG_WARNING +#ifdef NDEBUG +#define JXL_DEBUG_WARNING 0 +#else // JXL_DEBUG_WARNING +#define JXL_DEBUG_WARNING 1 +#endif // NDEBUG +#endif // JXL_DEBUG_WARNING +#define JXL_WARNING(format, ...) \ + JXL_DEBUG(JXL_DEBUG_WARNING, format, ##__VA_ARGS__) + +// Exits the program after printing a stack trace when possible. +JXL_NORETURN inline JXL_NOINLINE bool Abort() { +#if JXL_ADDRESS_SANITIZER || JXL_MEMORY_SANITIZER || JXL_THREAD_SANITIZER + // If compiled with any sanitizer print a stack trace. This call doesn't crash + // the program, instead the trap below will crash it also allowing gdb to + // break there. + __sanitizer_print_stack_trace(); +#endif // *_SANITIZER) + +#if JXL_COMPILER_MSVC + __debugbreak(); + abort(); +#else + __builtin_trap(); +#endif +} + +// Exits the program after printing file/line plus a formatted string. +#define JXL_ABORT(format, ...) \ + ((JXL_DEBUG_ON_ABORT) && ::jxl::Debug(("%s:%d: JXL_ABORT: " format "\n"), \ + __FILE__, __LINE__, ##__VA_ARGS__), \ + ::jxl::Abort()) + +// Use this for code paths that are unreachable unless the code would change +// to make it reachable, in which case it will print a warning and abort in +// debug builds. In release builds no code is produced for this, so only use +// this if this path is really unreachable. +#define JXL_UNREACHABLE(format, ...) \ + do { \ + if (JXL_DEBUG_WARNING) { \ + ::jxl::Debug(("%s:%d: JXL_UNREACHABLE: " format "\n"), __FILE__, \ + __LINE__, ##__VA_ARGS__); \ + ::jxl::Abort(); \ + } else { \ + JXL_UNREACHABLE_BUILTIN; \ + } \ + } while (0) + +// Does not guarantee running the code, use only for debug mode checks. +#if JXL_ENABLE_ASSERT +#define JXL_ASSERT(condition) \ + do { \ + if (!(condition)) { \ + JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_ASSERT: %s", #condition); \ + ::jxl::Abort(); \ + } \ + } while (0) +#else +#define JXL_ASSERT(condition) \ + do { \ + } while (0) +#endif + +// Define JXL_IS_DEBUG_BUILD that denotes asan, msan and other debug builds, +// but not opt or release. +#ifndef JXL_IS_DEBUG_BUILD +#if !defined(NDEBUG) || defined(ADDRESS_SANITIZER) || \ + defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER) || \ + defined(__clang_analyzer__) +#define JXL_IS_DEBUG_BUILD 1 +#else +#define JXL_IS_DEBUG_BUILD 0 +#endif +#endif // JXL_IS_DEBUG_BUILD + +// Same as above, but only runs in debug builds (builds where NDEBUG is not +// defined). This is useful for slower asserts that we want to run more rarely +// than usual. These will run on asan, msan and other debug builds, but not in +// opt or release. +#if JXL_IS_DEBUG_BUILD +#define JXL_DASSERT(condition) \ + do { \ + if (!(condition)) { \ + JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_DASSERT: %s", #condition); \ + ::jxl::Abort(); \ + } \ + } while (0) +#else +#define JXL_DASSERT(condition) \ + do { \ + } while (0) +#endif + +// Always runs the condition, so can be used for non-debug calls. +#if JXL_ENABLE_CHECK +#define JXL_CHECK(condition) \ + do { \ + if (!(condition)) { \ + JXL_DEBUG(JXL_DEBUG_ON_ABORT, "JXL_CHECK: %s", #condition); \ + ::jxl::Abort(); \ + } \ + } while (0) +#else +#define JXL_CHECK(condition) \ + do { \ + (void)(condition); \ + } while (0) +#endif + +// A jxl::Status value from a StatusCode or Status which prints a debug message +// when enabled. +#define JXL_STATUS(status, format, ...) \ + ::jxl::StatusMessage(::jxl::Status(status), "%s:%d: " format "\n", __FILE__, \ + __LINE__, ##__VA_ARGS__) + +// Notify of an error but discard the resulting Status value. This is only +// useful for debug builds or when building with JXL_CRASH_ON_ERROR. +#define JXL_NOTIFY_ERROR(format, ...) \ + (void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_ERROR: " format, \ + ##__VA_ARGS__) + +// An error Status with a message. The JXL_STATUS() macro will return a Status +// object with a kGenericError code, but the comma operator helps with +// clang-tidy inference and potentially with optimizations. +#define JXL_FAILURE(format, ...) \ + ((void)JXL_STATUS(::jxl::StatusCode::kGenericError, "JXL_FAILURE: " format, \ + ##__VA_ARGS__), \ + ::jxl::Status(::jxl::StatusCode::kGenericError)) + +// Always evaluates the status exactly once, so can be used for non-debug calls. +// Returns from the current context if the passed Status expression is an error +// (fatal or non-fatal). The return value is the passed Status. +#define JXL_RETURN_IF_ERROR(status) \ + do { \ + ::jxl::Status jxl_return_if_error_status = (status); \ + if (!jxl_return_if_error_status) { \ + (void)::jxl::StatusMessage( \ + jxl_return_if_error_status, \ + "%s:%d: JXL_RETURN_IF_ERROR code=%d: %s\n", __FILE__, __LINE__, \ + static_cast(jxl_return_if_error_status.code()), #status); \ + return jxl_return_if_error_status; \ + } \ + } while (0) + +// As above, but without calling StatusMessage. Intended for bundles (see +// fields.h), which have numerous call sites (-> relevant for code size) and do +// not want to generate excessive messages when decoding partial headers. +#define JXL_QUIET_RETURN_IF_ERROR(status) \ + do { \ + ::jxl::Status jxl_return_if_error_status = (status); \ + if (!jxl_return_if_error_status) { \ + return jxl_return_if_error_status; \ + } \ + } while (0) + +enum class StatusCode : int32_t { + // Non-fatal errors (negative values). + kNotEnoughBytes = -1, + + // The only non-error status code. + kOk = 0, + + // Fatal-errors (positive values) + kGenericError = 1, +}; + +// Drop-in replacement for bool that raises compiler warnings if not used +// after being returned from a function. Example: +// Status LoadFile(...) { return true; } is more compact than +// bool JXL_MUST_USE_RESULT LoadFile(...) { return true; } +// In case of error, the status can carry an extra error code in its value which +// is split between fatal and non-fatal error codes. +class JXL_MUST_USE_RESULT Status { + public: + // We want implicit constructor from bool to allow returning "true" or "false" + // on a function when using Status. "true" means kOk while "false" means a + // generic fatal error. + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr Status(bool ok) + : code_(ok ? StatusCode::kOk : StatusCode::kGenericError) {} + + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr Status(StatusCode code) : code_(code) {} + + // We also want implicit cast to bool to check for return values of functions. + // NOLINTNEXTLINE(google-explicit-constructor) + constexpr operator bool() const { return code_ == StatusCode::kOk; } + + constexpr StatusCode code() const { return code_; } + + // Returns whether the status code is a fatal error. + constexpr bool IsFatalError() const { + return static_cast(code_) > 0; + } + + private: + StatusCode code_; +}; + +// Helper function to create a Status and print the debug message or abort when +// needed. +inline JXL_FORMAT(2, 3) Status + StatusMessage(const Status status, const char* format, ...) { + // This block will be optimized out when JXL_DEBUG_ON_ERROR and + // JXL_DEBUG_ON_ALL_ERROR are both disabled. + if ((JXL_DEBUG_ON_ERROR && status.IsFatalError()) || + (JXL_DEBUG_ON_ALL_ERROR && !status)) { + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + } +#ifdef JXL_CRASH_ON_ERROR + // JXL_CRASH_ON_ERROR means to Abort() only on non-fatal errors. + if (status.IsFatalError()) { + Abort(); + } +#endif // JXL_CRASH_ON_ERROR + return status; +} + +template +class JXL_MUST_USE_RESULT StatusOr { + static_assert(!std::is_convertible::value && + !std::is_convertible::value, + "You cannot make a StatusOr with a type convertible from or to " + "StatusCode"); + static_assert(std::is_move_constructible::value && + std::is_move_assignable::value, + "T must be move constructible and move assignable"); + + public: + // NOLINTNEXTLINE(google-explicit-constructor) + StatusOr(StatusCode code) : code_(code) { + JXL_ASSERT(code_ != StatusCode::kOk); + } + + // NOLINTNEXTLINE(google-explicit-constructor) + StatusOr(Status status) : StatusOr(status.code()) {} + + // NOLINTNEXTLINE(google-explicit-constructor) + StatusOr(T&& value) : code_(StatusCode::kOk) { + new (&storage_.data_) T(std::move(value)); + } + + StatusOr(StatusOr&& other) noexcept { + if (other.ok()) { + new (&storage_.data_) T(std::move(other.storage_.data_)); + } + code_ = other.code_; + } + + StatusOr& operator=(StatusOr&& other) noexcept { + if (this == &other) return *this; + if (ok() && other.ok()) { + storage_.data_ = std::move(other.storage_.data_); + } else if (other.ok()) { + new (&storage_.data_) T(std::move(other.storage_.data_)); + } else if (ok()) { + storage_.data_.~T(); + } + code_ = other.code_; + return *this; + } + + StatusOr(const StatusOr&) = delete; + StatusOr operator=(const StatusOr&) = delete; + + bool ok() const { return code_ == StatusCode::kOk; } + Status status() const { return code_; } + + // Only call this if you are absolutely sure that `ok()` is true. + // Ideally, never call this manually and rely on JXL_ASSIGN_OR_RETURN. + T value() && { + JXL_ASSERT(ok()); + return std::move(storage_.data_); + } + + ~StatusOr() { + if (code_ == StatusCode::kOk) { + storage_.data_.~T(); + } + } + + private: + union Storage { + char dummy_; + T data_; + Storage() {} + ~Storage() {} + } storage_; + + StatusCode code_; +}; + +#define JXL_ASSIGN_OR_RETURN(lhs, statusor) \ + PRIVATE_JXL_ASSIGN_OR_RETURN_IMPL( \ + assign_or_return_temporary_variable##__LINE__, lhs, statusor) + +// NOLINTBEGIN(bugprone-macro-parentheses) +#define PRIVATE_JXL_ASSIGN_OR_RETURN_IMPL(name, lhs, statusor) \ + auto name = std::move(statusor); \ + JXL_RETURN_IF_ERROR(name.status()); \ + lhs = std::move(name).value(); +// NOLINTEND(bugprone-macro-parentheses) + +} // namespace jxl + +#endif // LIB_JXL_BASE_STATUS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/bit_reader_test.cc b/third-party/libjxl/libjxl/lib/jxl/bit_reader_test.cc new file mode 100644 index 0000000000..24cc9b64e8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/bit_reader_test.cc @@ -0,0 +1,262 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(BitReaderTest, ExtendsWithZeroes) { + for (size_t size = 4; size < 32; ++size) { + std::vector data(size, 0xff); + + for (size_t n_bytes = 0; n_bytes < size; n_bytes++) { + BitReader br(Span(data.data(), n_bytes)); + // Read all the bits + for (size_t i = 0; i < n_bytes * kBitsPerByte; i++) { + ASSERT_EQ(br.ReadBits(1), 1u) << "n_bytes=" << n_bytes << " i=" << i; + } + + // PEEK more than the declared size - all will be zero. Cannot consume. + for (size_t i = 0; i < BitReader::kMaxBitsPerCall; i++) { + ASSERT_EQ(br.PeekBits(i), 0u) + << "size=" << size << "n_bytes=" << n_bytes << " i=" << i; + } + + EXPECT_TRUE(br.Close()); + } + } +} + +struct Symbol { + uint32_t num_bits; + uint32_t value; +}; + +// Reading from output gives the same values. +TEST(BitReaderTest, TestRoundTrip) { + test::ThreadPoolForTests pool(8); + EXPECT_TRUE(RunOnPool( + &pool, 0, 1000, ThreadPool::NoInit, + [](const uint32_t task, size_t /* thread */) { + constexpr size_t kMaxBits = 8000; + BitWriter writer; + BitWriter::Allotment allotment(&writer, kMaxBits); + + std::vector symbols; + symbols.reserve(1000); + + Rng rng(55537 + 129 * task); + + for (;;) { + const uint32_t num_bits = rng.UniformU(1, 33); + if (writer.BitsWritten() + num_bits > kMaxBits) break; + const uint32_t value = rng.UniformU(0, 1ULL << num_bits); + symbols.push_back({num_bits, value}); + writer.Write(num_bits, value); + } + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + BitReader reader(writer.GetSpan()); + for (const Symbol& s : symbols) { + EXPECT_EQ(s.value, reader.ReadBits(s.num_bits)); + } + EXPECT_TRUE(reader.Close()); + }, + "TestTBitReaderRoundTrip")); +} + +// SkipBits is the same as reading that many bits. +TEST(BitReaderTest, TestSkip) { + test::ThreadPoolForTests pool(8); + EXPECT_TRUE(RunOnPool( + &pool, 0, 96, ThreadPool::NoInit, + [](const uint32_t task, size_t /* thread */) { + constexpr size_t kSize = 100; + + for (size_t skip = 0; skip < 128; ++skip) { + BitWriter writer; + BitWriter::Allotment allotment(&writer, kSize * kBitsPerByte); + // Start with "task" 1-bits. + for (size_t i = 0; i < task; ++i) { + writer.Write(1, 1); + } + + // Write 0-bits that we will skip over + for (size_t i = 0; i < skip; ++i) { + writer.Write(1, 0); + } + + // Write terminator bits '101' + writer.Write(3, 5); + EXPECT_EQ(task + skip + 3, writer.BitsWritten()); + writer.ZeroPadToByte(); + AuxOut aux_out; + allotment.ReclaimAndCharge(&writer, 0, &aux_out); + EXPECT_LT(aux_out.layers[0].total_bits, kSize * 8); + + BitReader reader1(writer.GetSpan()); + BitReader reader2(writer.GetSpan()); + // Verify initial 1-bits + for (size_t i = 0; i < task; ++i) { + EXPECT_EQ(1u, reader1.ReadBits(1)); + EXPECT_EQ(1u, reader2.ReadBits(1)); + } + + // SkipBits or manually read "skip" bits + reader1.SkipBits(skip); + for (size_t i = 0; i < skip; ++i) { + EXPECT_EQ(0u, reader2.ReadBits(1)) + << " skip=" << skip << " i=" << i; + } + EXPECT_EQ(reader1.TotalBitsConsumed(), reader2.TotalBitsConsumed()); + + // Ensure both readers see the terminator bits. + EXPECT_EQ(5u, reader1.ReadBits(3)); + EXPECT_EQ(5u, reader2.ReadBits(3)); + + EXPECT_TRUE(reader1.Close()); + EXPECT_TRUE(reader2.Close()); + } + }, + "TestSkip")); +} + +// Verifies byte order and different groupings of bits. +TEST(BitReaderTest, TestOrder) { + constexpr size_t kMaxBits = 16; + + // u(1) - bits written into LSBs of first byte + { + BitWriter writer; + BitWriter::Allotment allotment(&writer, kMaxBits); + for (size_t i = 0; i < 5; ++i) { + writer.Write(1, 1); + } + for (size_t i = 0; i < 5; ++i) { + writer.Write(1, 0); + } + for (size_t i = 0; i < 6; ++i) { + writer.Write(1, 1); + } + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + BitReader reader(writer.GetSpan()); + EXPECT_EQ(0x1Fu, reader.ReadFixedBits<8>()); + EXPECT_EQ(0xFCu, reader.ReadFixedBits<8>()); + EXPECT_TRUE(reader.Close()); + } + + // u(8) - get bytes in the same order + { + BitWriter writer; + BitWriter::Allotment allotment(&writer, kMaxBits); + writer.Write(8, 0xF8); + writer.Write(8, 0x3F); + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + BitReader reader(writer.GetSpan()); + EXPECT_EQ(0xF8u, reader.ReadFixedBits<8>()); + EXPECT_EQ(0x3Fu, reader.ReadFixedBits<8>()); + EXPECT_TRUE(reader.Close()); + } + + // u(16) - little-endian bytes + { + BitWriter writer; + BitWriter::Allotment allotment(&writer, kMaxBits); + writer.Write(16, 0xF83F); + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + BitReader reader(writer.GetSpan()); + EXPECT_EQ(0x3Fu, reader.ReadFixedBits<8>()); + EXPECT_EQ(0xF8u, reader.ReadFixedBits<8>()); + EXPECT_TRUE(reader.Close()); + } + + // Non-byte-aligned, mixed sizes + { + BitWriter writer; + BitWriter::Allotment allotment(&writer, kMaxBits); + writer.Write(1, 1); + writer.Write(3, 6); + writer.Write(8, 0xDB); + writer.Write(4, 8); + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + BitReader reader(writer.GetSpan()); + EXPECT_EQ(0xBDu, reader.ReadFixedBits<8>()); + EXPECT_EQ(0x8Du, reader.ReadFixedBits<8>()); + EXPECT_TRUE(reader.Close()); + } +} + +TEST(BitReaderTest, TotalCountersTest) { + uint8_t buf[8] = {1, 2, 3, 4}; + BitReader reader(Span(buf, sizeof(buf))); + + EXPECT_EQ(sizeof(buf), reader.TotalBytes()); + EXPECT_EQ(0u, reader.TotalBitsConsumed()); + reader.ReadFixedBits<1>(); + EXPECT_EQ(1u, reader.TotalBitsConsumed()); + + reader.ReadFixedBits<10>(); + EXPECT_EQ(11u, reader.TotalBitsConsumed()); + + reader.ReadFixedBits<4>(); + EXPECT_EQ(15u, reader.TotalBitsConsumed()); + + reader.ReadFixedBits<1>(); + EXPECT_EQ(16u, reader.TotalBitsConsumed()); + + reader.ReadFixedBits<16>(); + EXPECT_EQ(32u, reader.TotalBitsConsumed()); + + EXPECT_TRUE(reader.Close()); +} + +TEST(BitReaderTest, MoveTest) { + uint8_t buf[8] = {1, 2, 3, 4}; + BitReader reader2; + { + BitReader reader1(Span(buf, sizeof(buf))); + + EXPECT_EQ(0u, reader1.TotalBitsConsumed()); + reader1.ReadFixedBits<16>(); + EXPECT_EQ(16u, reader1.TotalBitsConsumed()); + + reader2 = std::move(reader1); + // From this point reader1 is invalid, but can continue to access reader2 + // and we don't need to call Close() on reader1. + } + + EXPECT_EQ(16u, reader2.TotalBitsConsumed()); + EXPECT_EQ(3U, reader2.ReadFixedBits<8>()); + EXPECT_EQ(24u, reader2.TotalBitsConsumed()); + + EXPECT_TRUE(reader2.Close()); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/bits_test.cc b/third-party/libjxl/libjxl/lib/jxl/bits_test.cc new file mode 100644 index 0000000000..bd7aa548c8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/bits_test.cc @@ -0,0 +1,87 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/bits.h" + +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(BitsTest, TestNumZeroBits) { + // Zero input is well-defined. + EXPECT_EQ(32u, Num0BitsAboveMS1Bit(0u)); + EXPECT_EQ(64u, Num0BitsAboveMS1Bit(0ull)); + EXPECT_EQ(32u, Num0BitsBelowLS1Bit(0u)); + EXPECT_EQ(64u, Num0BitsBelowLS1Bit(0ull)); + + EXPECT_EQ(31u, Num0BitsAboveMS1Bit(1u)); + EXPECT_EQ(30u, Num0BitsAboveMS1Bit(2u)); + EXPECT_EQ(63u, Num0BitsAboveMS1Bit(1ull)); + EXPECT_EQ(62u, Num0BitsAboveMS1Bit(2ull)); + + EXPECT_EQ(0u, Num0BitsBelowLS1Bit(1u)); + EXPECT_EQ(0u, Num0BitsBelowLS1Bit(1ull)); + EXPECT_EQ(1u, Num0BitsBelowLS1Bit(2u)); + EXPECT_EQ(1u, Num0BitsBelowLS1Bit(2ull)); + + EXPECT_EQ(0u, Num0BitsAboveMS1Bit(0x80000000u)); + EXPECT_EQ(0u, Num0BitsAboveMS1Bit(0x8000000000000000ull)); + EXPECT_EQ(31u, Num0BitsBelowLS1Bit(0x80000000u)); + EXPECT_EQ(63u, Num0BitsBelowLS1Bit(0x8000000000000000ull)); +} + +TEST(BitsTest, TestFloorLog2) { + // for input = [1, 7] + const size_t expected[7] = {0, 1, 1, 2, 2, 2, 2}; + for (uint32_t i = 1; i <= 7; ++i) { + EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(i)) << " " << i; + EXPECT_EQ(expected[i - 1], FloorLog2Nonzero(uint64_t(i))) << " " << i; + } + + EXPECT_EQ(11u, FloorLog2Nonzero(0x00000fffu)); // 4095 + EXPECT_EQ(12u, FloorLog2Nonzero(0x00001000u)); // 4096 + EXPECT_EQ(12u, FloorLog2Nonzero(0x00001001u)); // 4097 + + EXPECT_EQ(31u, FloorLog2Nonzero(0x80000000u)); + EXPECT_EQ(31u, FloorLog2Nonzero(0x80000001u)); + EXPECT_EQ(31u, FloorLog2Nonzero(0xFFFFFFFFu)); + + EXPECT_EQ(31u, FloorLog2Nonzero(0x80000000ull)); + EXPECT_EQ(31u, FloorLog2Nonzero(0x80000001ull)); + EXPECT_EQ(31u, FloorLog2Nonzero(0xFFFFFFFFull)); + + EXPECT_EQ(63u, FloorLog2Nonzero(0x8000000000000000ull)); + EXPECT_EQ(63u, FloorLog2Nonzero(0x8000000000000001ull)); + EXPECT_EQ(63u, FloorLog2Nonzero(0xFFFFFFFFFFFFFFFFull)); +} + +TEST(BitsTest, TestCeilLog2) { + // for input = [1, 7] + const size_t expected[7] = {0, 1, 2, 2, 3, 3, 3}; + for (uint32_t i = 1; i <= 7; ++i) { + EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(i)) << " " << i; + EXPECT_EQ(expected[i - 1], CeilLog2Nonzero(uint64_t(i))) << " " << i; + } + + EXPECT_EQ(12u, CeilLog2Nonzero(0x00000fffu)); // 4095 + EXPECT_EQ(12u, CeilLog2Nonzero(0x00001000u)); // 4096 + EXPECT_EQ(13u, CeilLog2Nonzero(0x00001001u)); // 4097 + + EXPECT_EQ(31u, CeilLog2Nonzero(0x80000000u)); + EXPECT_EQ(32u, CeilLog2Nonzero(0x80000001u)); + EXPECT_EQ(32u, CeilLog2Nonzero(0xFFFFFFFFu)); + + EXPECT_EQ(31u, CeilLog2Nonzero(0x80000000ull)); + EXPECT_EQ(32u, CeilLog2Nonzero(0x80000001ull)); + EXPECT_EQ(32u, CeilLog2Nonzero(0xFFFFFFFFull)); + + EXPECT_EQ(63u, CeilLog2Nonzero(0x8000000000000000ull)); + EXPECT_EQ(64u, CeilLog2Nonzero(0x8000000000000001ull)); + EXPECT_EQ(64u, CeilLog2Nonzero(0xFFFFFFFFFFFFFFFFull)); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/blending.cc b/third-party/libjxl/libjxl/lib/jxl/blending.cc new file mode 100644 index 0000000000..291e3ba525 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/blending.cc @@ -0,0 +1,152 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/blending.h" + +#include "lib/jxl/alpha.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +bool NeedsBlending(PassesDecoderState* dec_state) { + const PassesSharedState& state = *dec_state->shared; + if (!(state.frame_header.frame_type == FrameType::kRegularFrame || + state.frame_header.frame_type == FrameType::kSkipProgressive)) { + return false; + } + const auto& info = state.frame_header.blending_info; + bool replace_all = (info.mode == BlendMode::kReplace); + for (const auto& ec_i : state.frame_header.extra_channel_blending_info) { + if (ec_i.mode != BlendMode::kReplace) { + replace_all = false; + } + } + // Replace the full frame: nothing to do. + if (!state.frame_header.custom_size_or_origin && replace_all) { + return false; + } + return true; +} + +void PerformBlending(const float* const* bg, const float* const* fg, + float* const* out, size_t x0, size_t xsize, + const PatchBlending& color_blending, + const PatchBlending* ec_blending, + const std::vector& extra_channel_info) { + bool has_alpha = false; + size_t num_ec = extra_channel_info.size(); + for (size_t i = 0; i < num_ec; i++) { + if (extra_channel_info[i].type == jxl::ExtraChannel::kAlpha) { + has_alpha = true; + break; + } + } + ImageF tmp(xsize, 3 + num_ec); + // Blend extra channels first so that we use the pre-blending alpha. + for (size_t i = 0; i < num_ec; i++) { + if (ec_blending[i].mode == PatchBlendMode::kAdd) { + for (size_t x = 0; x < xsize; x++) { + tmp.Row(3 + i)[x] = bg[3 + i][x + x0] + fg[3 + i][x + x0]; + } + } else if (ec_blending[i].mode == PatchBlendMode::kBlendAbove) { + size_t alpha = ec_blending[i].alpha_channel; + bool is_premultiplied = extra_channel_info[alpha].alpha_associated; + PerformAlphaBlending(bg[3 + i] + x0, bg[3 + alpha] + x0, fg[3 + i] + x0, + fg[3 + alpha] + x0, tmp.Row(3 + i), xsize, + is_premultiplied, ec_blending[i].clamp); + } else if (ec_blending[i].mode == PatchBlendMode::kBlendBelow) { + size_t alpha = ec_blending[i].alpha_channel; + bool is_premultiplied = extra_channel_info[alpha].alpha_associated; + PerformAlphaBlending(fg[3 + i] + x0, fg[3 + alpha] + x0, bg[3 + i] + x0, + bg[3 + alpha] + x0, tmp.Row(3 + i), xsize, + is_premultiplied, ec_blending[i].clamp); + } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddAbove) { + size_t alpha = ec_blending[i].alpha_channel; + PerformAlphaWeightedAdd(bg[3 + i] + x0, fg[3 + i] + x0, + fg[3 + alpha] + x0, tmp.Row(3 + i), xsize, + ec_blending[i].clamp); + } else if (ec_blending[i].mode == PatchBlendMode::kAlphaWeightedAddBelow) { + size_t alpha = ec_blending[i].alpha_channel; + PerformAlphaWeightedAdd(fg[3 + i] + x0, bg[3 + i] + x0, + bg[3 + alpha] + x0, tmp.Row(3 + i), xsize, + ec_blending[i].clamp); + } else if (ec_blending[i].mode == PatchBlendMode::kMul) { + PerformMulBlending(bg[3 + i] + x0, fg[3 + i] + x0, tmp.Row(3 + i), xsize, + ec_blending[i].clamp); + } else if (ec_blending[i].mode == PatchBlendMode::kReplace) { + memcpy(tmp.Row(3 + i), fg[3 + i] + x0, xsize * sizeof(**fg)); + } else if (ec_blending[i].mode == PatchBlendMode::kNone) { + if (xsize) memcpy(tmp.Row(3 + i), bg[3 + i] + x0, xsize * sizeof(**fg)); + } else { + JXL_UNREACHABLE("new PatchBlendMode?"); + } + } + size_t alpha = color_blending.alpha_channel; + + if (color_blending.mode == PatchBlendMode::kAdd || + (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove && + !has_alpha) || + (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow && + !has_alpha)) { + for (int p = 0; p < 3; p++) { + float* out = tmp.Row(p); + for (size_t x = 0; x < xsize; x++) { + out[x] = bg[p][x + x0] + fg[p][x + x0]; + } + } + } else if (color_blending.mode == PatchBlendMode::kBlendAbove + // blend without alpha is just replace + && has_alpha) { + bool is_premultiplied = extra_channel_info[alpha].alpha_associated; + PerformAlphaBlending( + {bg[0] + x0, bg[1] + x0, bg[2] + x0, bg[3 + alpha] + x0}, + {fg[0] + x0, fg[1] + x0, fg[2] + x0, fg[3 + alpha] + x0}, + {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize, + is_premultiplied, color_blending.clamp); + } else if (color_blending.mode == PatchBlendMode::kBlendBelow + // blend without alpha is just replace + && has_alpha) { + bool is_premultiplied = extra_channel_info[alpha].alpha_associated; + PerformAlphaBlending( + {fg[0] + x0, fg[1] + x0, fg[2] + x0, fg[3 + alpha] + x0}, + {bg[0] + x0, bg[1] + x0, bg[2] + x0, bg[3 + alpha] + x0}, + {tmp.Row(0), tmp.Row(1), tmp.Row(2), tmp.Row(3 + alpha)}, xsize, + is_premultiplied, color_blending.clamp); + } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddAbove) { + JXL_DASSERT(has_alpha); + for (size_t c = 0; c < 3; c++) { + PerformAlphaWeightedAdd(bg[c] + x0, fg[c] + x0, fg[3 + alpha] + x0, + tmp.Row(c), xsize, color_blending.clamp); + } + } else if (color_blending.mode == PatchBlendMode::kAlphaWeightedAddBelow) { + JXL_DASSERT(has_alpha); + for (size_t c = 0; c < 3; c++) { + PerformAlphaWeightedAdd(fg[c] + x0, bg[c] + x0, bg[3 + alpha] + x0, + tmp.Row(c), xsize, color_blending.clamp); + } + } else if (color_blending.mode == PatchBlendMode::kMul) { + for (int p = 0; p < 3; p++) { + PerformMulBlending(bg[p] + x0, fg[p] + x0, tmp.Row(p), xsize, + color_blending.clamp); + } + } else if (color_blending.mode == PatchBlendMode::kReplace || + color_blending.mode == PatchBlendMode::kBlendAbove || + color_blending.mode == PatchBlendMode::kBlendBelow) { // kReplace + for (size_t p = 0; p < 3; p++) { + memcpy(tmp.Row(p), fg[p] + x0, xsize * sizeof(**fg)); + } + } else if (color_blending.mode == PatchBlendMode::kNone) { + for (size_t p = 0; p < 3; p++) { + memcpy(tmp.Row(p), bg[p] + x0, xsize * sizeof(**fg)); + } + } else { + JXL_UNREACHABLE("new PatchBlendMode?"); + } + for (size_t i = 0; i < 3 + num_ec; i++) { + if (xsize != 0) memcpy(out[i] + x0, tmp.Row(i), xsize * sizeof(**out)); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/blending.h b/third-party/libjxl/libjxl/lib/jxl/blending.h new file mode 100644 index 0000000000..7eab7d50cd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/blending.h @@ -0,0 +1,24 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BLENDING_H_ +#define LIB_JXL_BLENDING_H_ +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +bool NeedsBlending(PassesDecoderState* dec_state); + +void PerformBlending(const float* const* bg, const float* const* fg, + float* const* out, size_t x0, size_t xsize, + const PatchBlending& color_blending, + const PatchBlending* ec_blending, + const std::vector& extra_channel_info); + +} // namespace jxl + +#endif // LIB_JXL_BLENDING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/blending_test.cc b/third-party/libjxl/libjxl/lib/jxl/blending_test.cc new file mode 100644 index 0000000000..ff4c46c529 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/blending_test.cc @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/codec.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +using ::testing::SizeIs; + +TEST(BlendingTest, Crops) { + const PaddedBytes compressed = + jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl"); + CodecInOut decoded; + ASSERT_TRUE(test::DecodeFile({}, Span(compressed), &decoded)); + ASSERT_THAT(decoded.frames, SizeIs(4)); + + int i = 0; + for (const ImageBundle& ib : decoded.frames) { + std::ostringstream filename; + filename << "jxl/blending/cropped_traffic_light_frame-" << i << ".png"; + const PaddedBytes compressed_frame = + jxl::test::ReadTestData(filename.str()); + CodecInOut frame; + ASSERT_TRUE(SetFromBytes(Span(compressed_frame), &frame)); + JXL_EXPECT_OK(SamePixels(ib.color(), *frame.Main().color(), _)); + ++i; + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.cc b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.cc new file mode 100644 index 0000000000..c4cba3a31a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.cc @@ -0,0 +1,101 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/box_content_decoder.h" + +#include "lib/jxl/sanitizers.h" + +namespace jxl { + +JxlBoxContentDecoder::JxlBoxContentDecoder() {} + +JxlBoxContentDecoder::~JxlBoxContentDecoder() { + if (brotli_dec) { + BrotliDecoderDestroyInstance(brotli_dec); + } +} + +void JxlBoxContentDecoder::StartBox(bool brob_decode, bool box_until_eof, + size_t contents_size) { + if (brotli_dec) { + BrotliDecoderDestroyInstance(brotli_dec); + brotli_dec = nullptr; + } + header_done_ = false; + brob_decode_ = brob_decode; + box_until_eof_ = box_until_eof; + remaining_ = box_until_eof ? 0 : contents_size; + pos_ = 0; +} + +JxlDecoderStatus JxlBoxContentDecoder::Process(const uint8_t* next_in, + size_t avail_in, size_t box_pos, + uint8_t** next_out, + size_t* avail_out) { + next_in += pos_ - box_pos; + avail_in -= pos_ - box_pos; + + if (brob_decode_) { + if (!header_done_) { + if (avail_in < 4) return JXL_DEC_NEED_MORE_INPUT; + if (!box_until_eof_) { + if (remaining_ < 4) return JXL_DEC_ERROR; + remaining_ -= 4; + } + next_in += 4; + avail_in -= 4; + pos_ += 4; + header_done_ = true; + } + + if (!brotli_dec) { + brotli_dec = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + } + + const uint8_t* next_in_before = next_in; + uint8_t* next_out_before = *next_out; + msan::MemoryIsInitialized(next_in, avail_in); + BrotliDecoderResult res = BrotliDecoderDecompressStream( + brotli_dec, &avail_in, &next_in, avail_out, next_out, nullptr); + size_t consumed = next_in - next_in_before; + size_t produced = *next_out - next_out_before; + if (res == BROTLI_DECODER_RESULT_ERROR) { + return JXL_DEC_ERROR; + } + msan::UnpoisonMemory(next_out_before, produced); + pos_ += consumed; + if (!box_until_eof_) remaining_ -= consumed; + if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + return JXL_DEC_NEED_MORE_INPUT; + } + if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { + return JXL_DEC_BOX_NEED_MORE_OUTPUT; + } + if (res == BROTLI_DECODER_RESULT_SUCCESS) { + return JXL_DEC_SUCCESS; + } + // unknown Brotli result + return JXL_DEC_ERROR; + } else { + // remaining box bytes as seen from dec->file_pos + size_t can_read = avail_in; + if (!box_until_eof_) can_read = std::min(can_read, remaining_); + size_t to_write = std::min(can_read, *avail_out); + memcpy(*next_out, next_in, to_write); + + *next_out += to_write; + *avail_out -= to_write; + if (!box_until_eof_) remaining_ -= to_write; + pos_ += to_write; + + if (to_write < can_read) return JXL_DEC_BOX_NEED_MORE_OUTPUT; + + if (!box_until_eof_ && remaining_ > 0) return JXL_DEC_NEED_MORE_INPUT; + + return JXL_DEC_SUCCESS; + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.h b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.h new file mode 100644 index 0000000000..6153360a8e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/box_content_decoder.h @@ -0,0 +1,49 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_BOX_CONTENT_DECODER_H_ +#define LIB_JXL_BOX_CONTENT_DECODER_H_ + +#include +#include +#include +#include + +#include +#include + +namespace jxl { + +/** Outputs the contents of a box in a streaming fashion, either directly, or + * optionally decoding with Brotli, in case of a brob box. The input must be + * the contents of a box, excluding the box header. + */ +class JxlBoxContentDecoder { + public: + JxlBoxContentDecoder(); + ~JxlBoxContentDecoder(); + + void StartBox(bool brob_decode, bool box_until_eof, size_t contents_size); + + // Outputs decoded bytes from the box, decoding with brotli if needed. + // box_pos is the position in the box content which next_in points to. + // Returns success, whether more input or output bytes are needed, or error. + JxlDecoderStatus Process(const uint8_t* next_in, size_t avail_in, + size_t box_pos, uint8_t** next_out, + size_t* avail_out); + + private: + BrotliDecoderState* brotli_dec; + + bool header_done_; + bool brob_decode_; + bool box_until_eof_; + size_t remaining_; + size_t pos_; +}; + +} // namespace jxl + +#endif // LIB_JXL_BOX_CONTENT_DECODER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.cc b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.cc new file mode 100644 index 0000000000..dec8c5ea2d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.cc @@ -0,0 +1,1939 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) +// +// The physical architecture of butteraugli is based on the following naming +// convention: +// * Opsin - dynamics of the photosensitive chemicals in the retina +// with their immediate electrical processing +// * Xyb - hybrid opponent/trichromatic color space +// x is roughly red-subtract-green. +// y is yellow. +// b is blue. +// Xyb values are computed from Opsin mixing, not directly from rgb. +// * Mask - for visual masking +// * Hf - color modeling for spatially high-frequency features +// * Lf - color modeling for spatially low-frequency features +// * Diffmap - to cluster and build an image of error between the images +// * Blur - to hold the smoothing code + +#include "lib/jxl/butteraugli/butteraugli.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/butteraugli/butteraugli.cc" +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/fast_math-inl.h" +#include "lib/jxl/gauss_blur.h" +#include "lib/jxl/image_ops.h" + +#ifndef JXL_BUTTERAUGLI_ONCE +#define JXL_BUTTERAUGLI_ONCE + +namespace jxl { + +std::vector ComputeKernel(float sigma) { + const float m = 2.25; // Accuracy increases when m is increased. + const double scaler = -1.0 / (2.0 * sigma * sigma); + const int diff = std::max(1, m * std::fabs(sigma)); + std::vector kernel(2 * diff + 1); + for (int i = -diff; i <= diff; ++i) { + kernel[i + diff] = std::exp(scaler * i * i); + } + return kernel; +} + +void ConvolveBorderColumn(const ImageF& in, const std::vector& kernel, + const size_t x, float* BUTTERAUGLI_RESTRICT row_out) { + const size_t offset = kernel.size() / 2; + int minx = x < offset ? 0 : x - offset; + int maxx = std::min(in.xsize() - 1, x + offset); + float weight = 0.0f; + for (int j = minx; j <= maxx; ++j) { + weight += kernel[j - x + offset]; + } + float scale = 1.0f / weight; + for (size_t y = 0; y < in.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y); + float sum = 0.0f; + for (int j = minx; j <= maxx; ++j) { + sum += row_in[j] * kernel[j - x + offset]; + } + row_out[y] = sum * scale; + } +} + +// Computes a horizontal convolution and transposes the result. +void ConvolutionWithTranspose(const ImageF& in, + const std::vector& kernel, + ImageF* BUTTERAUGLI_RESTRICT out) { + JXL_CHECK(out->xsize() == in.ysize()); + JXL_CHECK(out->ysize() == in.xsize()); + const size_t len = kernel.size(); + const size_t offset = len / 2; + float weight_no_border = 0.0f; + for (size_t j = 0; j < len; ++j) { + weight_no_border += kernel[j]; + } + const float scale_no_border = 1.0f / weight_no_border; + const size_t border1 = std::min(in.xsize(), offset); + const size_t border2 = in.xsize() > offset ? in.xsize() - offset : 0; + std::vector scaled_kernel(len / 2 + 1); + for (size_t i = 0; i <= len / 2; ++i) { + scaled_kernel[i] = kernel[i] * scale_no_border; + } + + // middle + switch (len) { + case 7: { + const float sk0 = scaled_kernel[0]; + const float sk1 = scaled_kernel[1]; + const float sk2 = scaled_kernel[2]; + const float sk3 = scaled_kernel[3]; + for (size_t y = 0; y < in.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; + for (size_t x = border1; x < border2; ++x, ++row_in) { + const float sum0 = (row_in[0] + row_in[6]) * sk0; + const float sum1 = (row_in[1] + row_in[5]) * sk1; + const float sum2 = (row_in[2] + row_in[4]) * sk2; + const float sum = (row_in[3]) * sk3 + sum0 + sum1 + sum2; + float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); + row_out[y] = sum; + } + } + } break; + case 13: { + for (size_t y = 0; y < in.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; + for (size_t x = border1; x < border2; ++x, ++row_in) { + float sum0 = (row_in[0] + row_in[12]) * scaled_kernel[0]; + float sum1 = (row_in[1] + row_in[11]) * scaled_kernel[1]; + float sum2 = (row_in[2] + row_in[10]) * scaled_kernel[2]; + float sum3 = (row_in[3] + row_in[9]) * scaled_kernel[3]; + sum0 += (row_in[4] + row_in[8]) * scaled_kernel[4]; + sum1 += (row_in[5] + row_in[7]) * scaled_kernel[5]; + const float sum = (row_in[6]) * scaled_kernel[6]; + float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); + row_out[y] = sum + sum0 + sum1 + sum2 + sum3; + } + } + break; + } + case 15: { + for (size_t y = 0; y < in.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; + for (size_t x = border1; x < border2; ++x, ++row_in) { + float sum0 = (row_in[0] + row_in[14]) * scaled_kernel[0]; + float sum1 = (row_in[1] + row_in[13]) * scaled_kernel[1]; + float sum2 = (row_in[2] + row_in[12]) * scaled_kernel[2]; + float sum3 = (row_in[3] + row_in[11]) * scaled_kernel[3]; + sum0 += (row_in[4] + row_in[10]) * scaled_kernel[4]; + sum1 += (row_in[5] + row_in[9]) * scaled_kernel[5]; + sum2 += (row_in[6] + row_in[8]) * scaled_kernel[6]; + const float sum = (row_in[7]) * scaled_kernel[7]; + float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); + row_out[y] = sum + sum0 + sum1 + sum2 + sum3; + } + } + break; + } + case 33: { + for (size_t y = 0; y < in.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_in = in.Row(y) + border1 - offset; + for (size_t x = border1; x < border2; ++x, ++row_in) { + float sum0 = (row_in[0] + row_in[32]) * scaled_kernel[0]; + float sum1 = (row_in[1] + row_in[31]) * scaled_kernel[1]; + float sum2 = (row_in[2] + row_in[30]) * scaled_kernel[2]; + float sum3 = (row_in[3] + row_in[29]) * scaled_kernel[3]; + sum0 += (row_in[4] + row_in[28]) * scaled_kernel[4]; + sum1 += (row_in[5] + row_in[27]) * scaled_kernel[5]; + sum2 += (row_in[6] + row_in[26]) * scaled_kernel[6]; + sum3 += (row_in[7] + row_in[25]) * scaled_kernel[7]; + sum0 += (row_in[8] + row_in[24]) * scaled_kernel[8]; + sum1 += (row_in[9] + row_in[23]) * scaled_kernel[9]; + sum2 += (row_in[10] + row_in[22]) * scaled_kernel[10]; + sum3 += (row_in[11] + row_in[21]) * scaled_kernel[11]; + sum0 += (row_in[12] + row_in[20]) * scaled_kernel[12]; + sum1 += (row_in[13] + row_in[19]) * scaled_kernel[13]; + sum2 += (row_in[14] + row_in[18]) * scaled_kernel[14]; + sum3 += (row_in[15] + row_in[17]) * scaled_kernel[15]; + const float sum = (row_in[16]) * scaled_kernel[16]; + float* BUTTERAUGLI_RESTRICT row_out = out->Row(x); + row_out[y] = sum + sum0 + sum1 + sum2 + sum3; + } + } + break; + } + default: + JXL_UNREACHABLE("Kernel size %" PRIuS " not implemented", len); + } + // left border + for (size_t x = 0; x < border1; ++x) { + ConvolveBorderColumn(in, kernel, x, out->Row(x)); + } + + // right border + for (size_t x = border2; x < in.xsize(); ++x) { + ConvolveBorderColumn(in, kernel, x, out->Row(x)); + } +} + +// A blur somewhat similar to a 2D Gaussian blur. +// See: https://en.wikipedia.org/wiki/Gaussian_blur +// +// This is a bottleneck because the sigma can be quite large (>7). We can use +// gauss_blur.cc (runtime independent of sigma, closer to a 4*sigma truncated +// Gaussian and our 2.25 in ComputeKernel), but its boundary conditions are +// zero-valued. This leads to noticeable differences at the edges of diffmaps. +// We retain a special case for 5x5 kernels (even faster than gauss_blur), +// optionally use gauss_blur followed by fixup of the borders for large images, +// or fall back to the previous truncated FIR followed by a transpose. +void Blur(const ImageF& in, float sigma, const ButteraugliParams& params, + BlurTemp* temp, ImageF* out) { + std::vector kernel = ComputeKernel(sigma); + // Separable5 does an in-place convolution, so this fast path is not safe if + // in aliases out. + if (kernel.size() == 5 && &in != out) { + float sum_weights = 0.0f; + for (const float w : kernel) { + sum_weights += w; + } + const float scale = 1.0f / sum_weights; + const float w0 = kernel[2] * scale; + const float w1 = kernel[1] * scale; + const float w2 = kernel[0] * scale; + const WeightsSeparable5 weights = { + {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, + {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, + }; + Separable5(in, Rect(in), weights, /*pool=*/nullptr, out); + return; + } + + ImageF* JXL_RESTRICT temp_t = temp->GetTransposed(in); + ConvolutionWithTranspose(in, kernel, temp_t); + ConvolutionWithTranspose(*temp_t, kernel, out); +} + +// Allows PaddedMaltaUnit to call either function via overloading. +struct MaltaTagLF {}; +struct MaltaTag {}; + +} // namespace jxl + +#endif // JXL_BUTTERAUGLI_ONCE + +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::MulSub; +using hwy::HWY_NAMESPACE::Neg; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +template +HWY_INLINE V MaximumClamp(D d, V v, double kMaxVal) { + static const double kMul = 0.724216145665; + const V mul = Set(d, kMul); + const V maxval = Set(d, kMaxVal); + // If greater than maxval or less than -maxval, replace with if_*. + const V if_pos = MulAdd(Sub(v, maxval), mul, maxval); + const V if_neg = MulSub(Add(v, maxval), mul, maxval); + const V pos_or_v = IfThenElse(Ge(v, maxval), if_pos, v); + return IfThenElse(Lt(v, Neg(maxval)), if_neg, pos_or_v); +} + +// Make area around zero less important (remove it). +template +HWY_INLINE V RemoveRangeAroundZero(const D d, const double kw, const V x) { + const auto w = Set(d, kw); + return IfThenElse(Gt(x, w), Sub(x, w), + IfThenElseZero(Lt(x, Neg(w)), Add(x, w))); +} + +// Make area around zero more important (2x it until the limit). +template +HWY_INLINE V AmplifyRangeAroundZero(const D d, const double kw, const V x) { + const auto w = Set(d, kw); + return IfThenElse(Gt(x, w), Add(x, w), + IfThenElse(Lt(x, Neg(w)), Sub(x, w), Add(x, x))); +} + +// XybLowFreqToVals converts from low-frequency XYB space to the 'vals' space. +// Vals space can be converted to L2-norm space (Euclidean and normalized) +// through visual masking. +template +HWY_INLINE void XybLowFreqToVals(const D d, const V& x, const V& y, + const V& b_arg, V* HWY_RESTRICT valx, + V* HWY_RESTRICT valy, V* HWY_RESTRICT valb) { + static const double xmul_scalar = 33.832837186260; + static const double ymul_scalar = 14.458268100570; + static const double bmul_scalar = 49.87984651440; + static const double y_to_b_mul_scalar = -0.362267051518; + const V xmul = Set(d, xmul_scalar); + const V ymul = Set(d, ymul_scalar); + const V bmul = Set(d, bmul_scalar); + const V y_to_b_mul = Set(d, y_to_b_mul_scalar); + const V b = MulAdd(y_to_b_mul, y, b_arg); + *valb = Mul(b, bmul); + *valx = Mul(x, xmul); + *valy = Mul(y, ymul); +} + +void SuppressXByY(const ImageF& in_x, const ImageF& in_y, const double yw, + ImageF* HWY_RESTRICT out) { + JXL_DASSERT(SameSize(in_x, in_y) && SameSize(in_x, *out)); + const size_t xsize = in_x.xsize(); + const size_t ysize = in_x.ysize(); + + const HWY_FULL(float) d; + static const double s = 0.653020556257; + const auto sv = Set(d, s); + const auto one_minus_s = Set(d, 1.0 - s); + const auto ywv = Set(d, yw); + + for (size_t y = 0; y < ysize; ++y) { + const float* HWY_RESTRICT row_x = in_x.ConstRow(y); + const float* HWY_RESTRICT row_y = in_y.ConstRow(y); + float* HWY_RESTRICT row_out = out->Row(y); + + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto vx = Load(d, row_x + x); + const auto vy = Load(d, row_y + x); + const auto scaler = + MulAdd(Div(ywv, MulAdd(vy, vy, ywv)), one_minus_s, sv); + Store(Mul(scaler, vx), d, row_out + x); + } + } +} + +static void SeparateFrequencies(size_t xsize, size_t ysize, + const ButteraugliParams& params, + BlurTemp* blur_temp, const Image3F& xyb, + PsychoImage& ps) { + const HWY_FULL(float) d; + + // Extract lf ... + static const double kSigmaLf = 7.15593339443; + static const double kSigmaHf = 3.22489901262; + static const double kSigmaUhf = 1.56416327805; + ps.mf = Image3F(xsize, ysize); + ps.hf[0] = ImageF(xsize, ysize); + ps.hf[1] = ImageF(xsize, ysize); + ps.lf = Image3F(xyb.xsize(), xyb.ysize()); + ps.mf = Image3F(xyb.xsize(), xyb.ysize()); + for (int i = 0; i < 3; ++i) { + Blur(xyb.Plane(i), kSigmaLf, params, blur_temp, &ps.lf.Plane(i)); + + // ... and keep everything else in mf. + for (size_t y = 0; y < ysize; ++y) { + const float* BUTTERAUGLI_RESTRICT row_xyb = xyb.PlaneRow(i, y); + const float* BUTTERAUGLI_RESTRICT row_lf = ps.lf.ConstPlaneRow(i, y); + float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto mf = Sub(Load(d, row_xyb + x), Load(d, row_lf + x)); + Store(mf, d, row_mf + x); + } + } + if (i == 2) { + Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i)); + break; + } + // Divide mf into mf and hf. + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(i, y); + float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + Store(Load(d, row_mf + x), d, row_hf + x); + } + } + Blur(ps.mf.Plane(i), kSigmaHf, params, blur_temp, &ps.mf.Plane(i)); + static const double kRemoveMfRange = 0.29; + static const double kAddMfRange = 0.1; + if (i == 0) { + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(0, y); + float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + auto mf = Load(d, row_mf + x); + auto hf = Sub(Load(d, row_hf + x), mf); + mf = RemoveRangeAroundZero(d, kRemoveMfRange, mf); + Store(mf, d, row_mf + x); + Store(hf, d, row_hf + x); + } + } + } else { + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_mf = ps.mf.PlaneRow(1, y); + float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + auto mf = Load(d, row_mf + x); + auto hf = Sub(Load(d, row_hf + x), mf); + + mf = AmplifyRangeAroundZero(d, kAddMfRange, mf); + Store(mf, d, row_mf + x); + Store(hf, d, row_hf + x); + } + } + } + } + + // Temporarily used as output of SuppressXByY + ps.uhf[0] = ImageF(xsize, ysize); + ps.uhf[1] = ImageF(xsize, ysize); + + // Suppress red-green by intensity change in the high freq channels. + static const double suppress = 46.0; + SuppressXByY(ps.hf[0], ps.hf[1], suppress, &ps.uhf[0]); + // hf is the SuppressXByY output, uhf will be written below. + ps.hf[0].Swap(ps.uhf[0]); + + for (int i = 0; i < 2; ++i) { + // Divide hf into hf and uhf. + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[i].Row(y); + float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[i].Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_uhf[x] = row_hf[x]; + } + } + Blur(ps.hf[i], kSigmaUhf, params, blur_temp, &ps.hf[i]); + static const double kRemoveHfRange = 1.5; + static const double kAddHfRange = 0.132; + static const double kRemoveUhfRange = 0.04; + static const double kMaxclampHf = 28.4691806922; + static const double kMaxclampUhf = 5.19175294647; + static double kMulYHf = 2.155; + static double kMulYUhf = 2.69313763794; + if (i == 0) { + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[0].Row(y); + float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[0].Row(y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + auto hf = Load(d, row_hf + x); + auto uhf = Sub(Load(d, row_uhf + x), hf); + hf = RemoveRangeAroundZero(d, kRemoveHfRange, hf); + uhf = RemoveRangeAroundZero(d, kRemoveUhfRange, uhf); + Store(hf, d, row_hf + x); + Store(uhf, d, row_uhf + x); + } + } + } else { + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_uhf = ps.uhf[1].Row(y); + float* BUTTERAUGLI_RESTRICT row_hf = ps.hf[1].Row(y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + auto hf = Load(d, row_hf + x); + hf = MaximumClamp(d, hf, kMaxclampHf); + + auto uhf = Sub(Load(d, row_uhf + x), hf); + uhf = MaximumClamp(d, uhf, kMaxclampUhf); + uhf = Mul(uhf, Set(d, kMulYUhf)); + Store(uhf, d, row_uhf + x); + + hf = Mul(hf, Set(d, kMulYHf)); + hf = AmplifyRangeAroundZero(d, kAddHfRange, hf); + Store(hf, d, row_hf + x); + } + } + } + } + // Modify range around zero code only concerns the high frequency + // planes and only the X and Y channels. + // Convert low freq xyb to vals space so that we can do a simple squared sum + // diff on the low frequencies later. + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_x = ps.lf.PlaneRow(0, y); + float* BUTTERAUGLI_RESTRICT row_y = ps.lf.PlaneRow(1, y); + float* BUTTERAUGLI_RESTRICT row_b = ps.lf.PlaneRow(2, y); + for (size_t x = 0; x < xsize; x += Lanes(d)) { + auto valx = Undefined(d); + auto valy = Undefined(d); + auto valb = Undefined(d); + XybLowFreqToVals(d, Load(d, row_x + x), Load(d, row_y + x), + Load(d, row_b + x), &valx, &valy, &valb); + Store(valx, d, row_x + x); + Store(valy, d, row_y + x); + Store(valb, d, row_b + x); + } + } +} + +namespace { +template +BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d) { + return Add(Add(a, b), Add(c, d)); +} +template +BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e) { + return Sum(a, b, c, Add(d, e)); +} +template +BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g) { + return Sum(a, b, c, Sum(d, e, f, g)); +} +template +BUTTERAUGLI_INLINE V Sum(V a, V b, V c, V d, V e, V f, V g, V h, V i) { + return Add(Add(Sum(a, b, c, d), Sum(e, f, g, h)), i); +} +} // namespace + +template +Vec MaltaUnit(MaltaTagLF /*tag*/, const D df, + const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { + const intptr_t xs3 = 3 * xs; + + const auto center = LoadU(df, d); + + // x grows, y constant + const auto sum_yconst = Sum(LoadU(df, d - 4), LoadU(df, d - 2), center, + LoadU(df, d + 2), LoadU(df, d + 4)); + // Will return this, sum of all line kernels + auto retval = Mul(sum_yconst, sum_yconst); + { + // y grows, x constant + auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs - xs), center, + LoadU(df, d + xs + xs), LoadU(df, d + xs3 + xs)); + retval = MulAdd(sum, sum, retval); + } + { + // both grow + auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), center, + LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); + retval = MulAdd(sum, sum, retval); + } + { + // y grows, x shrinks + auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), center, + LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); + retval = MulAdd(sum, sum, retval); + } + { + // y grows -4 to 4, x shrinks 1 -> -1 + auto sum = + Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs - xs + 1), center, + LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 1)); + retval = MulAdd(sum, sum, retval); + } + { + // y grows -4 to 4, x grows -1 -> 1 + auto sum = + Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs - xs - 1), center, + LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 1)); + retval = MulAdd(sum, sum, retval); + } + { + // x grows -4 to 4, y grows -1 to 1 + auto sum = Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 2 - xs), center, + LoadU(df, d + 2 + xs), LoadU(df, d + 4 + xs)); + retval = MulAdd(sum, sum, retval); + } + { + // x grows -4 to 4, y shrinks 1 to -1 + auto sum = Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 2 + xs), center, + LoadU(df, d + 2 - xs), LoadU(df, d + 4 - xs)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1__*______ + 2___*_____ + 3_________ + 4____0____ + 5_________ + 6_____*___ + 7______*__ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), center, + LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1______*__ + 2_____*___ + 3_________ + 4____0____ + 5_________ + 6___*_____ + 7__*______ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), center, + LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2_*_______ + 3__*______ + 4____0____ + 5______*__ + 6_______*_ + 7_________ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), center, + LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2_______*_ + 3______*__ + 4____0____ + 5__*______ + 6_*_______ + 7_________ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), center, + LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2________* + 3______*__ + 4____0____ + 5__*______ + 6*________ + 7_________ + 8_________ */ + + auto sum = Sum(LoadU(df, d + xs + xs - 4), LoadU(df, d + xs - 2), center, + LoadU(df, d - xs + 2), LoadU(df, d - xs - xs + 4)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2*________ + 3__*______ + 4____0____ + 5______*__ + 6________* + 7_________ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs - xs - 4), LoadU(df, d - xs - 2), center, + LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 4)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0__*______ + 1_________ + 2___*_____ + 3_________ + 4____0____ + 5_________ + 6_____*___ + 7_________ + 8______*__ */ + auto sum = + Sum(LoadU(df, d - xs3 - xs - 2), LoadU(df, d - xs - xs - 1), center, + LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + xs + 2)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0______*__ + 1_________ + 2_____*___ + 3_________ + 4____0____ + 5_________ + 6___*_____ + 7_________ + 8__*______ */ + auto sum = + Sum(LoadU(df, d - xs3 - xs + 2), LoadU(df, d - xs - xs + 1), center, + LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 + xs - 2)); + retval = MulAdd(sum, sum, retval); + } + return retval; +} + +template +Vec MaltaUnit(MaltaTag /*tag*/, const D df, + const float* BUTTERAUGLI_RESTRICT d, const intptr_t xs) { + const intptr_t xs3 = 3 * xs; + + const auto center = LoadU(df, d); + + // x grows, y constant + const auto sum_yconst = + Sum(LoadU(df, d - 4), LoadU(df, d - 3), LoadU(df, d - 2), + LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2), + LoadU(df, d + 3), LoadU(df, d + 4)); + // Will return this, sum of all line kernels + auto retval = Mul(sum_yconst, sum_yconst); + + { + // y grows, x constant + auto sum = Sum(LoadU(df, d - xs3 - xs), LoadU(df, d - xs3), + LoadU(df, d - xs - xs), LoadU(df, d - xs), center, + LoadU(df, d + xs), LoadU(df, d + xs + xs), + LoadU(df, d + xs3), LoadU(df, d + xs3 + xs)); + retval = MulAdd(sum, sum, retval); + } + { + // both grow + auto sum = Sum(LoadU(df, d - xs3 - 3), LoadU(df, d - xs - xs - 2), + LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), + LoadU(df, d + xs + xs + 2), LoadU(df, d + xs3 + 3)); + retval = MulAdd(sum, sum, retval); + } + { + // y grows, x shrinks + auto sum = Sum(LoadU(df, d - xs3 + 3), LoadU(df, d - xs - xs + 2), + LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), + LoadU(df, d + xs + xs - 2), LoadU(df, d + xs3 - 3)); + retval = MulAdd(sum, sum, retval); + } + { + // y grows -4 to 4, x shrinks 1 -> -1 + auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), + LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, + LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), + LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); + retval = MulAdd(sum, sum, retval); + } + { + // y grows -4 to 4, x grows -1 -> 1 + auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), + LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, + LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), + LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); + retval = MulAdd(sum, sum, retval); + } + { + // x grows -4 to 4, y grows -1 to 1 + auto sum = + Sum(LoadU(df, d - 4 - xs), LoadU(df, d - 3 - xs), LoadU(df, d - 2 - xs), + LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 + xs), + LoadU(df, d + 3 + xs), LoadU(df, d + 4 + xs)); + retval = MulAdd(sum, sum, retval); + } + { + // x grows -4 to 4, y shrinks 1 to -1 + auto sum = + Sum(LoadU(df, d - 4 + xs), LoadU(df, d - 3 + xs), LoadU(df, d - 2 + xs), + LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + 2 - xs), + LoadU(df, d + 3 - xs), LoadU(df, d + 4 - xs)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1__*______ + 2___*_____ + 3___*_____ + 4____0____ + 5_____*___ + 6_____*___ + 7______*__ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs3 - 2), LoadU(df, d - xs - xs - 1), + LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), + LoadU(df, d + xs + xs + 1), LoadU(df, d + xs3 + 2)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1______*__ + 2_____*___ + 3_____*___ + 4____0____ + 5___*_____ + 6___*_____ + 7__*______ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs3 + 2), LoadU(df, d - xs - xs + 1), + LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), + LoadU(df, d + xs + xs - 1), LoadU(df, d + xs3 - 2)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2_*_______ + 3__**_____ + 4____0____ + 5_____**__ + 6_______*_ + 7_________ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs - xs - 3), LoadU(df, d - xs - 2), + LoadU(df, d - xs - 1), center, LoadU(df, d + xs + 1), + LoadU(df, d + xs + 2), LoadU(df, d + xs + xs + 3)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2_______*_ + 3_____**__ + 4____0____ + 5__**_____ + 6_*_______ + 7_________ + 8_________ */ + auto sum = Sum(LoadU(df, d - xs - xs + 3), LoadU(df, d - xs + 2), + LoadU(df, d - xs + 1), center, LoadU(df, d + xs - 1), + LoadU(df, d + xs - 2), LoadU(df, d + xs + xs - 3)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2_________ + 3______*** + 4___*0*___ + 5***______ + 6_________ + 7_________ + 8_________ */ + + auto sum = + Sum(LoadU(df, d + xs - 4), LoadU(df, d + xs - 3), LoadU(df, d + xs - 2), + LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d - xs + 2), + LoadU(df, d - xs + 3), LoadU(df, d - xs + 4)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_________ + 1_________ + 2_________ + 3***______ + 4___*0*___ + 5______*** + 6_________ + 7_________ + 8_________ */ + auto sum = + Sum(LoadU(df, d - xs - 4), LoadU(df, d - xs - 3), LoadU(df, d - xs - 2), + LoadU(df, d - 1), center, LoadU(df, d + 1), LoadU(df, d + xs + 2), + LoadU(df, d + xs + 3), LoadU(df, d + xs + 4)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0___*_____ + 1___*_____ + 2___*_____ + 3____*____ + 4____0____ + 5____*____ + 6_____*___ + 7_____*___ + 8_____*___ */ + auto sum = Sum(LoadU(df, d - xs3 - xs - 1), LoadU(df, d - xs3 - 1), + LoadU(df, d - xs - xs - 1), LoadU(df, d - xs), center, + LoadU(df, d + xs), LoadU(df, d + xs + xs + 1), + LoadU(df, d + xs3 + 1), LoadU(df, d + xs3 + xs + 1)); + retval = MulAdd(sum, sum, retval); + } + { + /* 0_____*___ + 1_____*___ + 2____ *___ + 3____*____ + 4____0____ + 5____*____ + 6___*_____ + 7___*_____ + 8___*_____ */ + auto sum = Sum(LoadU(df, d - xs3 - xs + 1), LoadU(df, d - xs3 + 1), + LoadU(df, d - xs - xs + 1), LoadU(df, d - xs), center, + LoadU(df, d + xs), LoadU(df, d + xs + xs - 1), + LoadU(df, d + xs3 - 1), LoadU(df, d + xs3 + xs - 1)); + retval = MulAdd(sum, sum, retval); + } + return retval; +} + +// Returns MaltaUnit. Avoids bounds-checks when x0 and y0 are known +// to be far enough from the image borders. "diffs" is a packed image. +template +static BUTTERAUGLI_INLINE float PaddedMaltaUnit(const ImageF& diffs, + const size_t x0, + const size_t y0) { + const float* BUTTERAUGLI_RESTRICT d = diffs.ConstRow(y0) + x0; + const HWY_CAPPED(float, 1) df; + if ((x0 >= 4 && y0 >= 4 && x0 < (diffs.xsize() - 4) && + y0 < (diffs.ysize() - 4))) { + return GetLane(MaltaUnit(Tag(), df, d, diffs.PixelsPerRow())); + } + + float borderimage[12 * 9]; // round up to 4 + for (int dy = 0; dy < 9; ++dy) { + int y = y0 + dy - 4; + if (y < 0 || static_cast(y) >= diffs.ysize()) { + for (int dx = 0; dx < 12; ++dx) { + borderimage[dy * 12 + dx] = 0.0f; + } + continue; + } + + const float* row_diffs = diffs.ConstRow(y); + for (int dx = 0; dx < 9; ++dx) { + int x = x0 + dx - 4; + if (x < 0 || static_cast(x) >= diffs.xsize()) { + borderimage[dy * 12 + dx] = 0.0f; + } else { + borderimage[dy * 12 + dx] = row_diffs[x]; + } + } + std::fill(borderimage + dy * 12 + 9, borderimage + dy * 12 + 12, 0.0f); + } + return GetLane(MaltaUnit(Tag(), df, &borderimage[4 * 12 + 4], 12)); +} + +template +static void MaltaDiffMapT(const Tag tag, const ImageF& lum0, const ImageF& lum1, + const double w_0gt1, const double w_0lt1, + const double norm1, const double len, + const double mulli, ImageF* HWY_RESTRICT diffs, + Image3F* HWY_RESTRICT block_diff_ac, size_t c) { + JXL_DASSERT(SameSize(lum0, lum1) && SameSize(lum0, *diffs)); + const size_t xsize_ = lum0.xsize(); + const size_t ysize_ = lum0.ysize(); + + const float kWeight0 = 0.5; + const float kWeight1 = 0.33; + + const double w_pre0gt1 = mulli * std::sqrt(kWeight0 * w_0gt1) / (len * 2 + 1); + const double w_pre0lt1 = mulli * std::sqrt(kWeight1 * w_0lt1) / (len * 2 + 1); + const float norm2_0gt1 = w_pre0gt1 * norm1; + const float norm2_0lt1 = w_pre0lt1 * norm1; + + for (size_t y = 0; y < ysize_; ++y) { + const float* HWY_RESTRICT row0 = lum0.ConstRow(y); + const float* HWY_RESTRICT row1 = lum1.ConstRow(y); + float* HWY_RESTRICT row_diffs = diffs->Row(y); + for (size_t x = 0; x < xsize_; ++x) { + const float absval = 0.5f * (std::abs(row0[x]) + std::abs(row1[x])); + const float diff = row0[x] - row1[x]; + const float scaler = norm2_0gt1 / (static_cast(norm1) + absval); + + // Primary symmetric quadratic objective. + row_diffs[x] = scaler * diff; + + const float scaler2 = norm2_0lt1 / (static_cast(norm1) + absval); + const double fabs0 = std::fabs(row0[x]); + + // Secondary half-open quadratic objectives. + const double too_small = 0.55 * fabs0; + const double too_big = 1.05 * fabs0; + + if (row0[x] < 0) { + if (row1[x] > -too_small) { + double impact = scaler2 * (row1[x] + too_small); + row_diffs[x] -= impact; + } else if (row1[x] < -too_big) { + double impact = scaler2 * (-row1[x] - too_big); + row_diffs[x] += impact; + } + } else { + if (row1[x] < too_small) { + double impact = scaler2 * (too_small - row1[x]); + row_diffs[x] += impact; + } else if (row1[x] > too_big) { + double impact = scaler2 * (row1[x] - too_big); + row_diffs[x] -= impact; + } + } + } + } + + size_t y0 = 0; + // Top + for (; y0 < 4; ++y0) { + float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0); + for (size_t x0 = 0; x0 < xsize_; ++x0) { + row_diff[x0] += PaddedMaltaUnit(*diffs, x0, y0); + } + } + + const HWY_FULL(float) df; + const size_t aligned_x = std::max(size_t(4), Lanes(df)); + const intptr_t stride = diffs->PixelsPerRow(); + + // Middle + for (; y0 < ysize_ - 4; ++y0) { + const float* BUTTERAUGLI_RESTRICT row_in = diffs->ConstRow(y0); + float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0); + size_t x0 = 0; + for (; x0 < aligned_x; ++x0) { + row_diff[x0] += PaddedMaltaUnit(*diffs, x0, y0); + } + for (; x0 + Lanes(df) + 4 <= xsize_; x0 += Lanes(df)) { + auto diff = Load(df, row_diff + x0); + diff = Add(diff, MaltaUnit(Tag(), df, row_in + x0, stride)); + Store(diff, df, row_diff + x0); + } + + for (; x0 < xsize_; ++x0) { + row_diff[x0] += PaddedMaltaUnit(*diffs, x0, y0); + } + } + + // Bottom + for (; y0 < ysize_; ++y0) { + float* BUTTERAUGLI_RESTRICT row_diff = block_diff_ac->PlaneRow(c, y0); + for (size_t x0 = 0; x0 < xsize_; ++x0) { + row_diff[x0] += PaddedMaltaUnit(*diffs, x0, y0); + } + } +} + +// Need non-template wrapper functions for HWY_EXPORT. +void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, + const double w_0lt1, const double norm1, const double len, + const double mulli, ImageF* HWY_RESTRICT diffs, + Image3F* HWY_RESTRICT block_diff_ac, size_t c) { + MaltaDiffMapT(MaltaTag(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, + diffs, block_diff_ac, c); +} + +void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, + const double w_0lt1, const double norm1, const double len, + const double mulli, ImageF* HWY_RESTRICT diffs, + Image3F* HWY_RESTRICT block_diff_ac, size_t c) { + MaltaDiffMapT(MaltaTagLF(), lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, + diffs, block_diff_ac, c); +} + +void DiffPrecompute(const ImageF& xyb, float mul, float bias_arg, ImageF* out) { + const size_t xsize = xyb.xsize(); + const size_t ysize = xyb.ysize(); + const float bias = mul * bias_arg; + const float sqrt_bias = sqrt(bias); + for (size_t y = 0; y < ysize; ++y) { + const float* BUTTERAUGLI_RESTRICT row_in = xyb.Row(y); + float* BUTTERAUGLI_RESTRICT row_out = out->Row(y); + for (size_t x = 0; x < xsize; ++x) { + // kBias makes sqrt behave more linearly. + row_out[x] = sqrt(mul * std::abs(row_in[x]) + bias) - sqrt_bias; + } + } +} + +// std::log(80.0) / std::log(255.0); +constexpr float kIntensityTargetNormalizationHack = 0.79079917404f; +static const float kInternalGoodQualityThreshold = + 17.83f * kIntensityTargetNormalizationHack; +static const float kGlobalScale = 1.0 / kInternalGoodQualityThreshold; + +void StoreMin3(const float v, float& min0, float& min1, float& min2) { + if (v < min2) { + if (v < min0) { + min2 = min1; + min1 = min0; + min0 = v; + } else if (v < min1) { + min2 = min1; + min1 = v; + } else { + min2 = v; + } + } +} + +// Look for smooth areas near the area of degradation. +// If the areas area generally smooth, don't do masking. +void FuzzyErosion(const ImageF& from, ImageF* to) { + const size_t xsize = from.xsize(); + const size_t ysize = from.ysize(); + static const int kStep = 3; + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + float min0 = from.Row(y)[x]; + float min1 = 2 * min0; + float min2 = min1; + if (x >= kStep) { + float v = from.Row(y)[x - kStep]; + StoreMin3(v, min0, min1, min2); + if (y >= kStep) { + float v = from.Row(y - kStep)[x - kStep]; + StoreMin3(v, min0, min1, min2); + } + if (y < ysize - kStep) { + float v = from.Row(y + kStep)[x - kStep]; + StoreMin3(v, min0, min1, min2); + } + } + if (x < xsize - kStep) { + float v = from.Row(y)[x + kStep]; + StoreMin3(v, min0, min1, min2); + if (y >= kStep) { + float v = from.Row(y - kStep)[x + kStep]; + StoreMin3(v, min0, min1, min2); + } + if (y < ysize - kStep) { + float v = from.Row(y + kStep)[x + kStep]; + StoreMin3(v, min0, min1, min2); + } + } + if (y >= kStep) { + float v = from.Row(y - kStep)[x]; + StoreMin3(v, min0, min1, min2); + } + if (y < ysize - kStep) { + float v = from.Row(y + kStep)[x]; + StoreMin3(v, min0, min1, min2); + } + to->Row(y)[x] = (0.45f * min0 + 0.3f * min1 + 0.25f * min2); + } + } +} + +// Compute values of local frequency and dc masking based on the activity +// in the two images. img_diff_ac may be null. +void Mask(const ImageF& mask0, const ImageF& mask1, + const ButteraugliParams& params, BlurTemp* blur_temp, + ImageF* BUTTERAUGLI_RESTRICT mask, + ImageF* BUTTERAUGLI_RESTRICT diff_ac) { + // Only X and Y components are involved in masking. B's influence + // is considered less important in the high frequency area, and we + // don't model masking from lower frequency signals. + const size_t xsize = mask0.xsize(); + const size_t ysize = mask0.ysize(); + *mask = ImageF(xsize, ysize); + static const float kMul = 6.19424080439; + static const float kBias = 12.61050594197; + static const float kRadius = 2.7; + ImageF diff0(xsize, ysize); + ImageF diff1(xsize, ysize); + ImageF blurred0(xsize, ysize); + ImageF blurred1(xsize, ysize); + DiffPrecompute(mask0, kMul, kBias, &diff0); + DiffPrecompute(mask1, kMul, kBias, &diff1); + Blur(diff0, kRadius, params, blur_temp, &blurred0); + FuzzyErosion(blurred0, &diff0); + Blur(diff1, kRadius, params, blur_temp, &blurred1); + FuzzyErosion(blurred1, &diff1); + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + mask->Row(y)[x] = diff0.Row(y)[x]; + if (diff_ac != nullptr) { + static const float kMaskToErrorMul = 10.0; + float diff = blurred0.Row(y)[x] - blurred1.Row(y)[x]; + diff_ac->Row(y)[x] += kMaskToErrorMul * diff * diff; + } + } + } +} + +// `diff_ac` may be null. +void MaskPsychoImage(const PsychoImage& pi0, const PsychoImage& pi1, + const size_t xsize, const size_t ysize, + const ButteraugliParams& params, Image3F* temp, + BlurTemp* blur_temp, ImageF* BUTTERAUGLI_RESTRICT mask, + ImageF* BUTTERAUGLI_RESTRICT diff_ac) { + ImageF mask0(xsize, ysize); + ImageF mask1(xsize, ysize); + static const float muls[3] = { + 2.5f, + 0.4f, + 0.4f, + }; + // Silly and unoptimized approach here. TODO(jyrki): rework this. + for (size_t y = 0; y < ysize; ++y) { + const float* BUTTERAUGLI_RESTRICT row_y_hf0 = pi0.hf[1].Row(y); + const float* BUTTERAUGLI_RESTRICT row_y_hf1 = pi1.hf[1].Row(y); + const float* BUTTERAUGLI_RESTRICT row_y_uhf0 = pi0.uhf[1].Row(y); + const float* BUTTERAUGLI_RESTRICT row_y_uhf1 = pi1.uhf[1].Row(y); + const float* BUTTERAUGLI_RESTRICT row_x_hf0 = pi0.hf[0].Row(y); + const float* BUTTERAUGLI_RESTRICT row_x_hf1 = pi1.hf[0].Row(y); + const float* BUTTERAUGLI_RESTRICT row_x_uhf0 = pi0.uhf[0].Row(y); + const float* BUTTERAUGLI_RESTRICT row_x_uhf1 = pi1.uhf[0].Row(y); + float* BUTTERAUGLI_RESTRICT row0 = mask0.Row(y); + float* BUTTERAUGLI_RESTRICT row1 = mask1.Row(y); + for (size_t x = 0; x < xsize; ++x) { + float xdiff0 = (row_x_uhf0[x] + row_x_hf0[x]) * muls[0]; + float xdiff1 = (row_x_uhf1[x] + row_x_hf1[x]) * muls[0]; + float ydiff0 = row_y_uhf0[x] * muls[1] + row_y_hf0[x] * muls[2]; + float ydiff1 = row_y_uhf1[x] * muls[1] + row_y_hf1[x] * muls[2]; + row0[x] = xdiff0 * xdiff0 + ydiff0 * ydiff0; + row0[x] = sqrt(row0[x]); + row1[x] = xdiff1 * xdiff1 + ydiff1 * ydiff1; + row1[x] = sqrt(row1[x]); + } + } + Mask(mask0, mask1, params, blur_temp, mask, diff_ac); +} + +double MaskY(double delta) { + static const double offset = 0.829591754942; + static const double scaler = 0.451936922203; + static const double mul = 2.5485944793; + const double c = mul / ((scaler * delta) + offset); + const double retval = kGlobalScale * (1.0 + c); + return retval * retval; +} + +double MaskDcY(double delta) { + static const double offset = 0.20025578522; + static const double scaler = 3.87449418804; + static const double mul = 0.505054525019; + const double c = mul / ((scaler * delta) + offset); + const double retval = kGlobalScale * (1.0 + c); + return retval * retval; +} + +inline float MaskColor(const float color[3], const float mask) { + return color[0] * mask + color[1] * mask + color[2] * mask; +} + +// Diffmap := sqrt of sum{diff images by multiplied by X and Y/B masks} +void CombineChannelsToDiffmap(const ImageF& mask, const Image3F& block_diff_dc, + const Image3F& block_diff_ac, float xmul, + ImageF* result) { + JXL_CHECK(SameSize(mask, *result)); + size_t xsize = mask.xsize(); + size_t ysize = mask.ysize(); + for (size_t y = 0; y < ysize; ++y) { + float* BUTTERAUGLI_RESTRICT row_out = result->Row(y); + for (size_t x = 0; x < xsize; ++x) { + float val = mask.Row(y)[x]; + float maskval = MaskY(val); + float dc_maskval = MaskDcY(val); + float diff_dc[3]; + float diff_ac[3]; + for (int i = 0; i < 3; ++i) { + diff_dc[i] = block_diff_dc.PlaneRow(i, y)[x]; + diff_ac[i] = block_diff_ac.PlaneRow(i, y)[x]; + } + diff_ac[0] *= xmul; + diff_dc[0] *= xmul; + row_out[x] = + sqrt(MaskColor(diff_dc, dc_maskval) + MaskColor(diff_ac, maskval)); + } + } +} + +// Adds weighted L2 difference between i0 and i1 to diffmap. +static void L2Diff(const ImageF& i0, const ImageF& i1, const float w, + Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) { + if (w == 0) return; + + const HWY_FULL(float) d; + const auto weight = Set(d, w); + + for (size_t y = 0; y < i0.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); + const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); + float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y); + + for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { + const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); + const auto diff2 = Mul(diff, diff); + const auto prev = Load(d, row_diff + x); + Store(MulAdd(diff2, weight, prev), d, row_diff + x); + } + } +} + +// Initializes diffmap to the weighted L2 difference between i0 and i1. +static void SetL2Diff(const ImageF& i0, const ImageF& i1, const float w, + Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) { + if (w == 0) return; + + const HWY_FULL(float) d; + const auto weight = Set(d, w); + + for (size_t y = 0; y < i0.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row0 = i0.ConstRow(y); + const float* BUTTERAUGLI_RESTRICT row1 = i1.ConstRow(y); + float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y); + + for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { + const auto diff = Sub(Load(d, row0 + x), Load(d, row1 + x)); + const auto diff2 = Mul(diff, diff); + Store(Mul(diff2, weight), d, row_diff + x); + } + } +} + +// i0 is the original image. +// i1 is the deformed copy. +static void L2DiffAsymmetric(const ImageF& i0, const ImageF& i1, float w_0gt1, + float w_0lt1, + Image3F* BUTTERAUGLI_RESTRICT diffmap, size_t c) { + if (w_0gt1 == 0 && w_0lt1 == 0) { + return; + } + + const HWY_FULL(float) d; + const auto vw_0gt1 = Set(d, w_0gt1 * 0.8); + const auto vw_0lt1 = Set(d, w_0lt1 * 0.8); + + for (size_t y = 0; y < i0.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row0 = i0.Row(y); + const float* BUTTERAUGLI_RESTRICT row1 = i1.Row(y); + float* BUTTERAUGLI_RESTRICT row_diff = diffmap->PlaneRow(c, y); + + for (size_t x = 0; x < i0.xsize(); x += Lanes(d)) { + const auto val0 = Load(d, row0 + x); + const auto val1 = Load(d, row1 + x); + + // Primary symmetric quadratic objective. + const auto diff = Sub(val0, val1); + auto total = MulAdd(Mul(diff, diff), vw_0gt1, Load(d, row_diff + x)); + + // Secondary half-open quadratic objectives. + const auto fabs0 = Abs(val0); + const auto too_small = Mul(Set(d, 0.4), fabs0); + const auto too_big = fabs0; + + const auto if_neg = IfThenElse( + Gt(val1, Neg(too_small)), Add(val1, too_small), + IfThenElseZero(Lt(val1, Neg(too_big)), Sub(Neg(val1), too_big))); + const auto if_pos = + IfThenElse(Lt(val1, too_small), Sub(too_small, val1), + IfThenElseZero(Gt(val1, too_big), Sub(val1, too_big))); + const auto v = IfThenElse(Lt(val0, Zero(d)), if_neg, if_pos); + total = MulAdd(vw_0lt1, Mul(v, v), total); + Store(total, d, row_diff + x); + } + } +} + +// A simple HDR compatible gamma function. +template +V Gamma(const DF df, V v) { + // ln(2) constant folded in because we want std::log but have FastLog2f. + const auto kRetMul = Set(df, 19.245013259874995f * 0.693147180559945f); + const auto kRetAdd = Set(df, -23.16046239805755); + // This should happen rarely, but may lead to a NaN in log, which is + // undesirable. Since negative photons don't exist we solve the NaNs by + // clamping here. + v = ZeroIfNegative(v); + + const auto biased = Add(v, Set(df, 9.9710635769299145)); + const auto log = FastLog2f(df, biased); + // We could fold this into a custom Log2 polynomial, but there would be + // relatively little gain. + return MulAdd(kRetMul, log, kRetAdd); +} + +template +BUTTERAUGLI_INLINE void OpsinAbsorbance(const DF df, const V& in0, const V& in1, + const V& in2, V* JXL_RESTRICT out0, + V* JXL_RESTRICT out1, + V* JXL_RESTRICT out2) { + // https://en.wikipedia.org/wiki/Photopsin absorbance modeling. + static const double mixi0 = 0.29956550340058319; + static const double mixi1 = 0.63373087833825936; + static const double mixi2 = 0.077705617820981968; + static const double mixi3 = 1.7557483643287353; + static const double mixi4 = 0.22158691104574774; + static const double mixi5 = 0.69391388044116142; + static const double mixi6 = 0.0987313588422; + static const double mixi7 = 1.7557483643287353; + static const double mixi8 = 0.02; + static const double mixi9 = 0.02; + static const double mixi10 = 0.20480129041026129; + static const double mixi11 = 12.226454707163354; + + const V mix0 = Set(df, mixi0); + const V mix1 = Set(df, mixi1); + const V mix2 = Set(df, mixi2); + const V mix3 = Set(df, mixi3); + const V mix4 = Set(df, mixi4); + const V mix5 = Set(df, mixi5); + const V mix6 = Set(df, mixi6); + const V mix7 = Set(df, mixi7); + const V mix8 = Set(df, mixi8); + const V mix9 = Set(df, mixi9); + const V mix10 = Set(df, mixi10); + const V mix11 = Set(df, mixi11); + + *out0 = MulAdd(mix0, in0, MulAdd(mix1, in1, MulAdd(mix2, in2, mix3))); + *out1 = MulAdd(mix4, in0, MulAdd(mix5, in1, MulAdd(mix6, in2, mix7))); + *out2 = MulAdd(mix8, in0, MulAdd(mix9, in1, MulAdd(mix10, in2, mix11))); + + if (Clamp) { + *out0 = Max(*out0, mix3); + *out1 = Max(*out1, mix7); + *out2 = Max(*out2, mix11); + } +} + +// `blurred` is a temporary image used inside this function and not returned. +Image3F OpsinDynamicsImage(const Image3F& rgb, const ButteraugliParams& params, + Image3F* blurred, BlurTemp* blur_temp) { + Image3F xyb(rgb.xsize(), rgb.ysize()); + const double kSigma = 1.2; + Blur(rgb.Plane(0), kSigma, params, blur_temp, &blurred->Plane(0)); + Blur(rgb.Plane(1), kSigma, params, blur_temp, &blurred->Plane(1)); + Blur(rgb.Plane(2), kSigma, params, blur_temp, &blurred->Plane(2)); + const HWY_FULL(float) df; + const auto intensity_target_multiplier = Set(df, params.intensity_target); + for (size_t y = 0; y < rgb.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_r = rgb.ConstPlaneRow(0, y); + const float* BUTTERAUGLI_RESTRICT row_g = rgb.ConstPlaneRow(1, y); + const float* BUTTERAUGLI_RESTRICT row_b = rgb.ConstPlaneRow(2, y); + const float* BUTTERAUGLI_RESTRICT row_blurred_r = + blurred->ConstPlaneRow(0, y); + const float* BUTTERAUGLI_RESTRICT row_blurred_g = + blurred->ConstPlaneRow(1, y); + const float* BUTTERAUGLI_RESTRICT row_blurred_b = + blurred->ConstPlaneRow(2, y); + float* BUTTERAUGLI_RESTRICT row_out_x = xyb.PlaneRow(0, y); + float* BUTTERAUGLI_RESTRICT row_out_y = xyb.PlaneRow(1, y); + float* BUTTERAUGLI_RESTRICT row_out_b = xyb.PlaneRow(2, y); + const auto min = Set(df, 1e-4f); + for (size_t x = 0; x < rgb.xsize(); x += Lanes(df)) { + auto sensitivity0 = Undefined(df); + auto sensitivity1 = Undefined(df); + auto sensitivity2 = Undefined(df); + { + // Calculate sensitivity based on the smoothed image gamma derivative. + auto pre_mixed0 = Undefined(df); + auto pre_mixed1 = Undefined(df); + auto pre_mixed2 = Undefined(df); + OpsinAbsorbance( + df, Mul(Load(df, row_blurred_r + x), intensity_target_multiplier), + Mul(Load(df, row_blurred_g + x), intensity_target_multiplier), + Mul(Load(df, row_blurred_b + x), intensity_target_multiplier), + &pre_mixed0, &pre_mixed1, &pre_mixed2); + pre_mixed0 = Max(pre_mixed0, min); + pre_mixed1 = Max(pre_mixed1, min); + pre_mixed2 = Max(pre_mixed2, min); + sensitivity0 = Div(Gamma(df, pre_mixed0), pre_mixed0); + sensitivity1 = Div(Gamma(df, pre_mixed1), pre_mixed1); + sensitivity2 = Div(Gamma(df, pre_mixed2), pre_mixed2); + sensitivity0 = Max(sensitivity0, min); + sensitivity1 = Max(sensitivity1, min); + sensitivity2 = Max(sensitivity2, min); + } + auto cur_mixed0 = Undefined(df); + auto cur_mixed1 = Undefined(df); + auto cur_mixed2 = Undefined(df); + OpsinAbsorbance( + df, Mul(Load(df, row_r + x), intensity_target_multiplier), + Mul(Load(df, row_g + x), intensity_target_multiplier), + Mul(Load(df, row_b + x), intensity_target_multiplier), &cur_mixed0, + &cur_mixed1, &cur_mixed2); + cur_mixed0 = Mul(cur_mixed0, sensitivity0); + cur_mixed1 = Mul(cur_mixed1, sensitivity1); + cur_mixed2 = Mul(cur_mixed2, sensitivity2); + // This is a kludge. The negative values should be zeroed away before + // blurring. Ideally there would be no negative values in the first place. + const auto min01 = Set(df, 1.7557483643287353f); + const auto min2 = Set(df, 12.226454707163354f); + cur_mixed0 = Max(cur_mixed0, min01); + cur_mixed1 = Max(cur_mixed1, min01); + cur_mixed2 = Max(cur_mixed2, min2); + + Store(Sub(cur_mixed0, cur_mixed1), df, row_out_x + x); + Store(Add(cur_mixed0, cur_mixed1), df, row_out_y + x); + Store(cur_mixed2, df, row_out_b + x); + } + } + return xyb; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(SeparateFrequencies); // Local function. +HWY_EXPORT(MaskPsychoImage); // Local function. +HWY_EXPORT(L2DiffAsymmetric); // Local function. +HWY_EXPORT(L2Diff); // Local function. +HWY_EXPORT(SetL2Diff); // Local function. +HWY_EXPORT(CombineChannelsToDiffmap); // Local function. +HWY_EXPORT(MaltaDiffMap); // Local function. +HWY_EXPORT(MaltaDiffMapLF); // Local function. +HWY_EXPORT(OpsinDynamicsImage); // Local function. + +#if BUTTERAUGLI_ENABLE_CHECKS + +static inline bool IsNan(const float x) { + uint32_t bits; + memcpy(&bits, &x, sizeof(bits)); + const uint32_t bitmask_exp = 0x7F800000; + return (bits & bitmask_exp) == bitmask_exp && (bits & 0x7FFFFF); +} + +static inline bool IsNan(const double x) { + uint64_t bits; + memcpy(&bits, &x, sizeof(bits)); + return (0x7ff0000000000001ULL <= bits && bits <= 0x7fffffffffffffffULL) || + (0xfff0000000000001ULL <= bits && bits <= 0xffffffffffffffffULL); +} + +static inline void CheckImage(const ImageF& image, const char* name) { + for (size_t y = 0; y < image.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row = image.Row(y); + for (size_t x = 0; x < image.xsize(); ++x) { + if (IsNan(row[x])) { + printf("NAN: Image %s @ %" PRIuS ",%" PRIuS " (of %" PRIuS ",%" PRIuS + ")\n", + name, x, y, image.xsize(), image.ysize()); + exit(1); + } + } + } +} + +#define CHECK_NAN(x, str) \ + do { \ + if (IsNan(x)) { \ + printf("%d: %s\n", __LINE__, str); \ + abort(); \ + } \ + } while (0) + +#define CHECK_IMAGE(image, name) CheckImage(image, name) + +#else // BUTTERAUGLI_ENABLE_CHECKS + +#define CHECK_NAN(x, str) +#define CHECK_IMAGE(image, name) + +#endif // BUTTERAUGLI_ENABLE_CHECKS + +// Calculate a 2x2 subsampled image for purposes of recursive butteraugli at +// multiresolution. +static Image3F SubSample2x(const Image3F& in) { + size_t xs = (in.xsize() + 1) / 2; + size_t ys = (in.ysize() + 1) / 2; + Image3F retval(xs, ys); + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < ys; ++y) { + for (size_t x = 0; x < xs; ++x) { + retval.PlaneRow(c, y)[x] = 0; + } + } + } + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < in.ysize(); ++y) { + for (size_t x = 0; x < in.xsize(); ++x) { + retval.PlaneRow(c, y / 2)[x / 2] += 0.25f * in.PlaneRow(c, y)[x]; + } + } + if ((in.xsize() & 1) != 0) { + for (size_t y = 0; y < retval.ysize(); ++y) { + size_t last_column = retval.xsize() - 1; + retval.PlaneRow(c, y)[last_column] *= 2.0f; + } + } + if ((in.ysize() & 1) != 0) { + for (size_t x = 0; x < retval.xsize(); ++x) { + size_t last_row = retval.ysize() - 1; + retval.PlaneRow(c, last_row)[x] *= 2.0f; + } + } + } + return retval; +} + +// Supersample src by 2x and add it to dest. +static void AddSupersampled2x(const ImageF& src, float w, ImageF& dest) { + for (size_t y = 0; y < dest.ysize(); ++y) { + for (size_t x = 0; x < dest.xsize(); ++x) { + // There will be less errors from the more averaged images. + // We take it into account to some extent using a scaler. + static const double kHeuristicMixingValue = 0.3; + dest.Row(y)[x] *= 1.0 - kHeuristicMixingValue * w; + dest.Row(y)[x] += w * src.Row(y / 2)[x / 2]; + } + } +} + +Image3F* ButteraugliComparator::Temp() const { + bool was_in_use = temp_in_use_.test_and_set(std::memory_order_acq_rel); + JXL_ASSERT(!was_in_use); + (void)was_in_use; + return &temp_; +} + +void ButteraugliComparator::ReleaseTemp() const { temp_in_use_.clear(); } + +ButteraugliComparator::ButteraugliComparator(const Image3F& rgb0, + const ButteraugliParams& params) + : xsize_(rgb0.xsize()), + ysize_(rgb0.ysize()), + params_(params), + temp_(xsize_, ysize_) { + if (xsize_ < 8 || ysize_ < 8) { + return; + } + + Image3F xyb0 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)(rgb0, params, Temp(), + &blur_temp_); + ReleaseTemp(); + HWY_DYNAMIC_DISPATCH(SeparateFrequencies) + (xsize_, ysize_, params_, &blur_temp_, xyb0, pi0_); + + // Awful recursive construction of samples of different resolution. + // This is an after-thought and possibly somewhat parallel in + // functionality with the PsychoImage multi-resolution approach. + sub_.reset(new ButteraugliComparator(SubSample2x(rgb0), params)); +} + +void ButteraugliComparator::Mask(ImageF* BUTTERAUGLI_RESTRICT mask) const { + HWY_DYNAMIC_DISPATCH(MaskPsychoImage) + (pi0_, pi0_, xsize_, ysize_, params_, Temp(), &blur_temp_, mask, nullptr); + ReleaseTemp(); +} + +void ButteraugliComparator::Diffmap(const Image3F& rgb1, ImageF& result) const { + if (xsize_ < 8 || ysize_ < 8) { + ZeroFillImage(&result); + return; + } + const Image3F xyb1 = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( + rgb1, params_, Temp(), &blur_temp_); + ReleaseTemp(); + DiffmapOpsinDynamicsImage(xyb1, result); + if (sub_) { + if (sub_->xsize_ < 8 || sub_->ysize_ < 8) { + return; + } + const Image3F sub_xyb = HWY_DYNAMIC_DISPATCH(OpsinDynamicsImage)( + SubSample2x(rgb1), params_, sub_->Temp(), &sub_->blur_temp_); + sub_->ReleaseTemp(); + ImageF subresult; + sub_->DiffmapOpsinDynamicsImage(sub_xyb, subresult); + AddSupersampled2x(subresult, 0.5, result); + } +} + +void ButteraugliComparator::DiffmapOpsinDynamicsImage(const Image3F& xyb1, + ImageF& result) const { + if (xsize_ < 8 || ysize_ < 8) { + ZeroFillImage(&result); + return; + } + PsychoImage pi1; + HWY_DYNAMIC_DISPATCH(SeparateFrequencies) + (xsize_, ysize_, params_, &blur_temp_, xyb1, pi1); + result = ImageF(xsize_, ysize_); + DiffmapPsychoImage(pi1, result); +} + +namespace { + +void MaltaDiffMap(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, + const double w_0lt1, const double norm1, + ImageF* HWY_RESTRICT diffs, + Image3F* HWY_RESTRICT block_diff_ac, size_t c) { + const double len = 3.75; + static const double mulli = 0.39905817637; + HWY_DYNAMIC_DISPATCH(MaltaDiffMap) + (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c); +} + +void MaltaDiffMapLF(const ImageF& lum0, const ImageF& lum1, const double w_0gt1, + const double w_0lt1, const double norm1, + ImageF* HWY_RESTRICT diffs, + Image3F* HWY_RESTRICT block_diff_ac, size_t c) { + const double len = 3.75; + static const double mulli = 0.611612573796; + HWY_DYNAMIC_DISPATCH(MaltaDiffMapLF) + (lum0, lum1, w_0gt1, w_0lt1, norm1, len, mulli, diffs, block_diff_ac, c); +} + +} // namespace + +void ButteraugliComparator::DiffmapPsychoImage(const PsychoImage& pi1, + ImageF& diffmap) const { + if (xsize_ < 8 || ysize_ < 8) { + ZeroFillImage(&diffmap); + return; + } + + const float hf_asymmetry_ = params_.hf_asymmetry; + const float xmul_ = params_.xmul; + + ImageF diffs(xsize_, ysize_); + Image3F block_diff_ac(xsize_, ysize_); + ZeroFillImage(&block_diff_ac); + static const double wUhfMalta = 1.10039032555; + static const double norm1Uhf = 71.7800275169; + MaltaDiffMap(pi0_.uhf[1], pi1.uhf[1], wUhfMalta * hf_asymmetry_, + wUhfMalta / hf_asymmetry_, norm1Uhf, &diffs, &block_diff_ac, 1); + + static const double wUhfMaltaX = 173.5; + static const double norm1UhfX = 5.0; + MaltaDiffMap(pi0_.uhf[0], pi1.uhf[0], wUhfMaltaX * hf_asymmetry_, + wUhfMaltaX / hf_asymmetry_, norm1UhfX, &diffs, &block_diff_ac, + 0); + + static const double wHfMalta = 18.7237414387; + static const double norm1Hf = 4498534.45232; + MaltaDiffMapLF(pi0_.hf[1], pi1.hf[1], wHfMalta * std::sqrt(hf_asymmetry_), + wHfMalta / std::sqrt(hf_asymmetry_), norm1Hf, &diffs, + &block_diff_ac, 1); + + static const double wHfMaltaX = 6923.99476109; + static const double norm1HfX = 8051.15833247; + MaltaDiffMapLF(pi0_.hf[0], pi1.hf[0], wHfMaltaX * std::sqrt(hf_asymmetry_), + wHfMaltaX / std::sqrt(hf_asymmetry_), norm1HfX, &diffs, + &block_diff_ac, 0); + + static const double wMfMalta = 37.0819870399; + static const double norm1Mf = 130262059.556; + MaltaDiffMapLF(pi0_.mf.Plane(1), pi1.mf.Plane(1), wMfMalta, wMfMalta, norm1Mf, + &diffs, &block_diff_ac, 1); + + static const double wMfMaltaX = 8246.75321353; + static const double norm1MfX = 1009002.70582; + MaltaDiffMapLF(pi0_.mf.Plane(0), pi1.mf.Plane(0), wMfMaltaX, wMfMaltaX, + norm1MfX, &diffs, &block_diff_ac, 0); + + static const double wmul[9] = { + 400.0, 1.50815703118, 0, + 2150.0, 10.6195433239, 16.2176043152, + 29.2353797994, 0.844626970982, 0.703646627719, + }; + Image3F block_diff_dc(xsize_, ysize_); + for (size_t c = 0; c < 3; ++c) { + if (c < 2) { // No blue channel error accumulated at HF. + HWY_DYNAMIC_DISPATCH(L2DiffAsymmetric) + (pi0_.hf[c], pi1.hf[c], wmul[c] * hf_asymmetry_, wmul[c] / hf_asymmetry_, + &block_diff_ac, c); + } + HWY_DYNAMIC_DISPATCH(L2Diff) + (pi0_.mf.Plane(c), pi1.mf.Plane(c), wmul[3 + c], &block_diff_ac, c); + HWY_DYNAMIC_DISPATCH(SetL2Diff) + (pi0_.lf.Plane(c), pi1.lf.Plane(c), wmul[6 + c], &block_diff_dc, c); + } + + ImageF mask; + HWY_DYNAMIC_DISPATCH(MaskPsychoImage) + (pi0_, pi1, xsize_, ysize_, params_, Temp(), &blur_temp_, &mask, + &block_diff_ac.Plane(1)); + ReleaseTemp(); + + HWY_DYNAMIC_DISPATCH(CombineChannelsToDiffmap) + (mask, block_diff_dc, block_diff_ac, xmul_, &diffmap); +} + +double ButteraugliScoreFromDiffmap(const ImageF& diffmap, + const ButteraugliParams* params) { + float retval = 0.0f; + for (size_t y = 0; y < diffmap.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row = diffmap.ConstRow(y); + for (size_t x = 0; x < diffmap.xsize(); ++x) { + retval = std::max(retval, row[x]); + } + } + return retval; +} + +bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, + double hf_asymmetry, double xmul, ImageF& diffmap) { + ButteraugliParams params; + params.hf_asymmetry = hf_asymmetry; + params.xmul = xmul; + return ButteraugliDiffmap(rgb0, rgb1, params, diffmap); +} + +bool ButteraugliDiffmap(const Image3F& rgb0, const Image3F& rgb1, + const ButteraugliParams& params, ImageF& diffmap) { + const size_t xsize = rgb0.xsize(); + const size_t ysize = rgb0.ysize(); + if (xsize < 1 || ysize < 1) { + return JXL_FAILURE("Zero-sized image"); + } + if (!SameSize(rgb0, rgb1)) { + return JXL_FAILURE("Size mismatch"); + } + static const int kMax = 8; + if (xsize < kMax || ysize < kMax) { + // Butteraugli values for small (where xsize or ysize is smaller + // than 8 pixels) images are non-sensical, but most likely it is + // less disruptive to try to compute something than just give up. + // Temporarily extend the borders of the image to fit 8 x 8 size. + size_t xborder = xsize < kMax ? (kMax - xsize) / 2 : 0; + size_t yborder = ysize < kMax ? (kMax - ysize) / 2 : 0; + size_t xscaled = std::max(kMax, xsize); + size_t yscaled = std::max(kMax, ysize); + Image3F scaled0(xscaled, yscaled); + Image3F scaled1(xscaled, yscaled); + for (int i = 0; i < 3; ++i) { + for (size_t y = 0; y < yscaled; ++y) { + for (size_t x = 0; x < xscaled; ++x) { + size_t x2 = + std::min(xsize - 1, x > xborder ? x - xborder : 0); + size_t y2 = + std::min(ysize - 1, y > yborder ? y - yborder : 0); + scaled0.PlaneRow(i, y)[x] = rgb0.PlaneRow(i, y2)[x2]; + scaled1.PlaneRow(i, y)[x] = rgb1.PlaneRow(i, y2)[x2]; + } + } + } + ImageF diffmap_scaled; + const bool ok = + ButteraugliDiffmap(scaled0, scaled1, params, diffmap_scaled); + diffmap = ImageF(xsize, ysize); + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + diffmap.Row(y)[x] = diffmap_scaled.Row(y + yborder)[x + xborder]; + } + } + return ok; + } + ButteraugliComparator butteraugli(rgb0, params); + butteraugli.Diffmap(rgb1, diffmap); + return true; +} + +bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, + float hf_asymmetry, float xmul, ImageF& diffmap, + double& diffvalue) { + ButteraugliParams params; + params.hf_asymmetry = hf_asymmetry; + params.xmul = xmul; + return ButteraugliInterface(rgb0, rgb1, params, diffmap, diffvalue); +} + +bool ButteraugliInterface(const Image3F& rgb0, const Image3F& rgb1, + const ButteraugliParams& params, ImageF& diffmap, + double& diffvalue) { + if (!ButteraugliDiffmap(rgb0, rgb1, params, diffmap)) { + return false; + } + diffvalue = ButteraugliScoreFromDiffmap(diffmap, ¶ms); + return true; +} + +double ButteraugliFuzzyClass(double score) { + static const double fuzzy_width_up = 4.8; + static const double fuzzy_width_down = 4.8; + static const double m0 = 2.0; + static const double scaler = 0.7777; + double val; + if (score < 1.0) { + // val in [scaler .. 2.0] + val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_down)); + val -= 1.0; // from [1 .. 2] to [0 .. 1] + val *= 2.0 - scaler; // from [0 .. 1] to [0 .. 2.0 - scaler] + val += scaler; // from [0 .. 2.0 - scaler] to [scaler .. 2.0] + } else { + // val in [0 .. scaler] + val = m0 / (1.0 + exp((score - 1.0) * fuzzy_width_up)); + val *= scaler; + } + return val; +} + +// #define PRINT_OUT_NORMALIZATION + +double ButteraugliFuzzyInverse(double seek) { + double pos = 0; + // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) + for (double range = 1.0; range >= 1e-10; range *= 0.5) { + double cur = ButteraugliFuzzyClass(pos); + if (cur < seek) { + pos -= range; + } else { + pos += range; + } + } +#ifdef PRINT_OUT_NORMALIZATION + if (seek == 1.0) { + fprintf(stderr, "Fuzzy inverse %g\n", pos); + } +#endif + return pos; +} + +#ifdef PRINT_OUT_NORMALIZATION +static double print_out_normalization = ButteraugliFuzzyInverse(1.0); +#endif + +namespace { + +void ScoreToRgb(double score, double good_threshold, double bad_threshold, + float rgb[3]) { + double heatmap[12][3] = { + {0, 0, 0}, {0, 0, 1}, + {0, 1, 1}, {0, 1, 0}, // Good level + {1, 1, 0}, {1, 0, 0}, // Bad level + {1, 0, 1}, {0.5, 0.5, 1.0}, + {1.0, 0.5, 0.5}, // Pastel colors for the very bad quality range. + {1.0, 1.0, 0.5}, {1, 1, 1}, + {1, 1, 1}, // Last color repeated to have a solid range of white. + }; + if (score < good_threshold) { + score = (score / good_threshold) * 0.3; + } else if (score < bad_threshold) { + score = 0.3 + + (score - good_threshold) / (bad_threshold - good_threshold) * 0.15; + } else { + score = 0.45 + (score - bad_threshold) / (bad_threshold * 12) * 0.5; + } + static const int kTableSize = sizeof(heatmap) / sizeof(heatmap[0]); + score = std::min(std::max(score * (kTableSize - 1), 0.0), + kTableSize - 2); + int ix = static_cast(score); + ix = std::min(std::max(0, ix), kTableSize - 2); // Handle NaN + double mix = score - ix; + for (int i = 0; i < 3; ++i) { + double v = mix * heatmap[ix + 1][i] + (1 - mix) * heatmap[ix][i]; + rgb[i] = pow(v, 0.5); + } +} + +} // namespace + +Image3F CreateHeatMapImage(const ImageF& distmap, double good_threshold, + double bad_threshold) { + Image3F heatmap(distmap.xsize(), distmap.ysize()); + for (size_t y = 0; y < distmap.ysize(); ++y) { + const float* BUTTERAUGLI_RESTRICT row_distmap = distmap.ConstRow(y); + float* BUTTERAUGLI_RESTRICT row_h0 = heatmap.PlaneRow(0, y); + float* BUTTERAUGLI_RESTRICT row_h1 = heatmap.PlaneRow(1, y); + float* BUTTERAUGLI_RESTRICT row_h2 = heatmap.PlaneRow(2, y); + for (size_t x = 0; x < distmap.xsize(); ++x) { + const float d = row_distmap[x]; + float rgb[3]; + ScoreToRgb(d, good_threshold, bad_threshold, rgb); + row_h0[x] = rgb[0]; + row_h1[x] = rgb[1]; + row_h2[x] = rgb[2]; + } + } + return heatmap; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.h b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.h new file mode 100644 index 0000000000..652b9528c4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/butteraugli/butteraugli.h @@ -0,0 +1,209 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// +// Author: Jyrki Alakuijala (jyrki.alakuijala@gmail.com) + +#ifndef LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_ +#define LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" + +#define BUTTERAUGLI_ENABLE_CHECKS 0 +#define BUTTERAUGLI_RESTRICT JXL_RESTRICT + +// This is the main interface to butteraugli image similarity +// analysis function. + +namespace jxl { + +struct ButteraugliParams { + // Multiplier for penalizing new HF artifacts more than blurring away + // features. 1.0=neutral. + float hf_asymmetry = 1.0f; + + // Multiplier for the psychovisual difference in the X channel. + float xmul = 1.0f; + + // Number of nits that correspond to 1.0f input values. + float intensity_target = 80.0f; +}; + +// ButteraugliInterface defines the public interface for butteraugli. +// +// It calculates the difference between rgb0 and rgb1. +// +// rgb0 and rgb1 contain the images. rgb0[c][px] and rgb1[c][px] contains +// the red image for c == 0, green for c == 1, blue for c == 2. Location index +// px is calculated as y * xsize + x. +// +// Value of pixels of images rgb0 and rgb1 need to be represented as raw +// intensity. Most image formats store gamma corrected intensity in pixel +// values. This gamma correction has to be removed, by applying the following +// function to values in the 0-1 range: +// butteraugli_val = pow(input_val, gamma); +// A typical value of gamma is 2.2. It is usually stored in the image header. +// Take care not to confuse that value with its inverse. The gamma value should +// be always greater than one. +// Butteraugli does not work as intended if the caller does not perform +// gamma correction. +// +// hf_asymmetry is a multiplier for penalizing new HF artifacts more than +// blurring away features (1.0 -> neutral). +// +// diffmap will contain an image of the size xsize * ysize, containing +// localized differences for values px (indexed with the px the same as rgb0 +// and rgb1). diffvalue will give a global score of similarity. +// +// A diffvalue smaller than kButteraugliGood indicates that images can be +// observed as the same image. +// diffvalue larger than kButteraugliBad indicates that a difference between +// the images can be observed. +// A diffvalue between kButteraugliGood and kButteraugliBad indicates that +// a subtle difference can be observed between the images. +// +// Returns true on success. +bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1, + const ButteraugliParams ¶ms, ImageF &diffmap, + double &diffvalue); + +// Deprecated (calls the previous function) +bool ButteraugliInterface(const Image3F &rgb0, const Image3F &rgb1, + float hf_asymmetry, float xmul, ImageF &diffmap, + double &diffvalue); + +// Converts the butteraugli score into fuzzy class values that are continuous +// at the class boundary. The class boundary location is based on human +// raters, but the slope is arbitrary. Particularly, it does not reflect +// the expectation value of probabilities of the human raters. It is just +// expected that a smoother class boundary will allow for higher-level +// optimization algorithms to work faster. +// +// Returns 2.0 for a perfect match, and 1.0 for 'ok', 0.0 for bad. Because the +// scoring is fuzzy, a butteraugli score of 0.96 would return a class of +// around 1.9. +double ButteraugliFuzzyClass(double score); + +// Input values should be in range 0 (bad) to 2 (good). Use +// kButteraugliNormalization as normalization. +double ButteraugliFuzzyInverse(double seek); + +// Implementation details, don't use anything below or your code will +// break in the future. + +#ifdef _MSC_VER +#define BUTTERAUGLI_INLINE __forceinline +#else +#define BUTTERAUGLI_INLINE inline +#endif + +#ifdef __clang__ +// Early versions of Clang did not support __builtin_assume_aligned. +#define BUTTERAUGLI_HAS_ASSUME_ALIGNED __has_builtin(__builtin_assume_aligned) +#elif defined(__GNUC__) +#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 1 +#else +#define BUTTERAUGLI_HAS_ASSUME_ALIGNED 0 +#endif + +// Returns a void* pointer which the compiler then assumes is N-byte aligned. +// Example: float* JXL_RESTRICT aligned = (float*)JXL_ASSUME_ALIGNED(in, 32); +// +// The assignment semantics are required by GCC/Clang. ICC provides an in-place +// __assume_aligned, whereas MSVC's __assume appears unsuitable. +#if BUTTERAUGLI_HAS_ASSUME_ALIGNED +#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) \ + __builtin_assume_aligned((ptr), (align)) +#else +#define BUTTERAUGLI_ASSUME_ALIGNED(ptr, align) (ptr) +#endif // BUTTERAUGLI_HAS_ASSUME_ALIGNED + +struct PsychoImage { + ImageF uhf[2]; // XY + ImageF hf[2]; // XY + Image3F mf; // XYB + Image3F lf; // XYB +}; + +// Blur needs a transposed image. +// Hold it here and only allocate on demand to reduce memory usage. +struct BlurTemp { + ImageF *GetTransposed(const ImageF &in) { + if (transposed_temp.xsize() == 0) { + transposed_temp = ImageF(in.ysize(), in.xsize()); + } + return &transposed_temp; + } + + ImageF transposed_temp; +}; + +class ButteraugliComparator { + public: + // Butteraugli is calibrated at xmul = 1.0. We add a multiplier here so that + // we can test the hypothesis that a higher weighing of the X channel would + // improve results at higher Butteraugli values. + ButteraugliComparator(const Image3F &rgb0, const ButteraugliParams ¶ms); + virtual ~ButteraugliComparator() = default; + + // Computes the butteraugli map between the original image given in the + // constructor and the distorted image give here. + void Diffmap(const Image3F &rgb1, ImageF &result) const; + + // Same as above, but OpsinDynamicsImage() was already applied. + void DiffmapOpsinDynamicsImage(const Image3F &xyb1, ImageF &result) const; + + // Same as above, but the frequency decomposition was already applied. + void DiffmapPsychoImage(const PsychoImage &pi1, ImageF &diffmap) const; + + void Mask(ImageF *BUTTERAUGLI_RESTRICT mask) const; + + private: + Image3F *Temp() const; + void ReleaseTemp() const; + + const size_t xsize_; + const size_t ysize_; + ButteraugliParams params_; + PsychoImage pi0_; + + // Shared temporary image storage to reduce the number of allocations; + // obtained via Temp(), must call ReleaseTemp when no longer needed. + mutable Image3F temp_; + mutable std::atomic_flag temp_in_use_ = ATOMIC_FLAG_INIT; + + mutable BlurTemp blur_temp_; + std::unique_ptr sub_; +}; + +// Deprecated. +bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1, + double hf_asymmetry, double xmul, ImageF &diffmap); + +bool ButteraugliDiffmap(const Image3F &rgb0, const Image3F &rgb1, + const ButteraugliParams ¶ms, ImageF &diffmap); + +double ButteraugliScoreFromDiffmap(const ImageF &diffmap, + const ButteraugliParams *params = nullptr); + +// Generate rgb-representation of the distance between two images. +Image3F CreateHeatMapImage(const ImageF &distmap, double good_threshold, + double bad_threshold); + +} // namespace jxl + +#endif // LIB_JXL_BUTTERAUGLI_BUTTERAUGLI_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/byte_order_test.cc b/third-party/libjxl/libjxl/lib/jxl/byte_order_test.cc new file mode 100644 index 0000000000..17d7ef6643 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/byte_order_test.cc @@ -0,0 +1,53 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/byte_order.h" + +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(ByteOrderTest, TestRoundTripBE16) { + const uint32_t in = 0x1234; + uint8_t buf[2]; + StoreBE16(in, buf); + EXPECT_EQ(in, LoadBE16(buf)); + EXPECT_NE(in, LoadLE16(buf)); +} + +TEST(ByteOrderTest, TestRoundTripLE16) { + const uint32_t in = 0x1234; + uint8_t buf[2]; + StoreLE16(in, buf); + EXPECT_EQ(in, LoadLE16(buf)); + EXPECT_NE(in, LoadBE16(buf)); +} + +TEST(ByteOrderTest, TestRoundTripBE32) { + const uint32_t in = 0xFEDCBA98u; + uint8_t buf[4]; + StoreBE32(in, buf); + EXPECT_EQ(in, LoadBE32(buf)); + EXPECT_NE(in, LoadLE32(buf)); +} + +TEST(ByteOrderTest, TestRoundTripLE32) { + const uint32_t in = 0xFEDCBA98u; + uint8_t buf[4]; + StoreLE32(in, buf); + EXPECT_EQ(in, LoadLE32(buf)); + EXPECT_NE(in, LoadBE32(buf)); +} + +TEST(ByteOrderTest, TestRoundTripLE64) { + const uint64_t in = 0xFEDCBA9876543210ull; + uint8_t buf[8]; + StoreLE64(in, buf); + EXPECT_EQ(in, LoadLE64(buf)); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.cc b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.cc new file mode 100644 index 0000000000..63d21cbb4b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.cc @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/chroma_from_luma.h" + +namespace jxl { + +ColorCorrelationMap::ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB) + : ytox_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)), + ytob_map(DivCeil(xsize, kColorTileDim), DivCeil(ysize, kColorTileDim)) { + ZeroFillImage(&ytox_map); + ZeroFillImage(&ytob_map); + if (!XYB) { + base_correlation_b_ = 0; + } + RecomputeDCFactors(); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.h b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.h new file mode 100644 index 0000000000..9a7f3d45bc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/chroma_from_luma.h @@ -0,0 +1,147 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_CHROMA_FROM_LUMA_H_ +#define LIB_JXL_CHROMA_FROM_LUMA_H_ + +// Chroma-from-luma, computed using heuristics to determine the best linear +// model for the X and B channels from the Y channel. + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/quant_weights.h" + +namespace jxl { + +// Tile is the rectangular grid of blocks that share color correlation +// parameters ("factor_x/b" such that residual_b = blue - Y * factor_b). +static constexpr size_t kColorTileDim = 64; + +static_assert(kColorTileDim % kBlockDim == 0, + "Color tile dim should be divisible by block dim"); +static constexpr size_t kColorTileDimInBlocks = kColorTileDim / kBlockDim; + +static_assert(kGroupDimInBlocks % kColorTileDimInBlocks == 0, + "Group dim should be divisible by color tile dim"); + +static constexpr uint8_t kDefaultColorFactor = 84; + +// JPEG DCT coefficients are at most 1024. CfL constants are at most 127, and +// the ratio of two entries in a JPEG quantization table is at most 255. Thus, +// since the CfL denominator is 84, this leaves 12 bits of mantissa to be used. +// For extra caution, we use 11. +static constexpr uint8_t kCFLFixedPointPrecision = 11; + +static constexpr U32Enc kColorFactorDist(Val(kDefaultColorFactor), Val(256), + BitsOffset(8, 2), BitsOffset(16, 258)); + +struct ColorCorrelationMap { + ColorCorrelationMap() = default; + // xsize/ysize are in pixels + // set XYB=false to do something close to no-op cmap (needed for now since + // cmap is mandatory) + ColorCorrelationMap(size_t xsize, size_t ysize, bool XYB = true); + + float YtoXRatio(int32_t x_factor) const { + return base_correlation_x_ + x_factor * color_scale_; + } + + float YtoBRatio(int32_t b_factor) const { + return base_correlation_b_ + b_factor * color_scale_; + } + + Status DecodeDC(BitReader* br) { + if (br->ReadFixedBits<1>() == 1) { + // All default. + return true; + } + SetColorFactor(U32Coder::Read(kColorFactorDist, br)); + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_x_)); + if (std::abs(base_correlation_x_) > 4.0f) { + return JXL_FAILURE("Base X correlation is out of range"); + } + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &base_correlation_b_)); + if (std::abs(base_correlation_b_) > 4.0f) { + return JXL_FAILURE("Base B correlation is out of range"); + } + ytox_dc_ = static_cast(br->ReadFixedBits()) + + std::numeric_limits::min(); + ytob_dc_ = static_cast(br->ReadFixedBits()) + + std::numeric_limits::min(); + RecomputeDCFactors(); + return true; + } + + // We consider a CfL map to be JPEG-reconstruction-compatible if base + // correlation is 0, no DC correlation is used, and we use the default color + // factor. + bool IsJPEGCompatible() const { + return base_correlation_x_ == 0 && base_correlation_b_ == 0 && + ytob_dc_ == 0 && ytox_dc_ == 0 && + color_factor_ == kDefaultColorFactor; + } + + int32_t RatioJPEG(int32_t factor) const { + return factor * (1 << kCFLFixedPointPrecision) / kDefaultColorFactor; + } + + void SetColorFactor(uint32_t factor) { + color_factor_ = factor; + color_scale_ = 1.0f / color_factor_; + RecomputeDCFactors(); + } + + void SetYToBDC(int32_t ytob_dc) { + ytob_dc_ = ytob_dc; + RecomputeDCFactors(); + } + void SetYToXDC(int32_t ytox_dc) { + ytox_dc_ = ytox_dc; + RecomputeDCFactors(); + } + + int32_t GetYToXDC() const { return ytox_dc_; } + int32_t GetYToBDC() const { return ytob_dc_; } + float GetColorFactor() const { return color_factor_; } + float GetBaseCorrelationX() const { return base_correlation_x_; } + float GetBaseCorrelationB() const { return base_correlation_b_; } + + const float* DCFactors() const { return dc_factors_; } + + void RecomputeDCFactors() { + dc_factors_[0] = YtoXRatio(ytox_dc_); + dc_factors_[2] = YtoBRatio(ytob_dc_); + } + + ImageSB ytox_map; + ImageSB ytob_map; + + private: + float dc_factors_[4] = {}; + // range of factor: -1.51 to +1.52 + uint32_t color_factor_ = kDefaultColorFactor; + float color_scale_ = 1.0f / color_factor_; + float base_correlation_x_ = 0.0f; + float base_correlation_b_ = kYToBRatio; + int32_t ytox_dc_ = 0; + int32_t ytob_dc_ = 0; +}; + +} // namespace jxl + +#endif // LIB_JXL_CHROMA_FROM_LUMA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/codec_in_out.h b/third-party/libjxl/libjxl/lib/jxl/codec_in_out.h new file mode 100644 index 0000000000..9e48b5e937 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/codec_in_out.h @@ -0,0 +1,116 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_CODEC_IN_OUT_H_ +#define LIB_JXL_CODEC_IN_OUT_H_ + +// Holds inputs/outputs for decoding/encoding images. + +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/luminance.h" + +namespace jxl { + +// Optional text/EXIF metadata. +struct Blobs { + std::vector exif; + std::vector iptc; + std::vector jumbf; + std::vector xmp; +}; + +// Holds a preview, a main image or one or more frames, plus the inputs/outputs +// to/from decoding/encoding. +class CodecInOut { + public: + CodecInOut() : preview_frame(&metadata.m) { + frames.reserve(1); + frames.emplace_back(&metadata.m); + } + + // Move-only. + CodecInOut(CodecInOut&&) = default; + CodecInOut& operator=(CodecInOut&&) = default; + + size_t LastStillFrame() const { + JXL_DASSERT(!frames.empty()); + size_t last = 0; + for (size_t i = 0; i < frames.size(); i++) { + last = i; + if (frames[i].duration > 0) break; + } + return last; + } + + ImageBundle& Main() { return frames[LastStillFrame()]; } + const ImageBundle& Main() const { return frames[LastStillFrame()]; } + + // If c_current.IsGray(), all planes must be identical. + void SetFromImage(Image3F&& color, const ColorEncoding& c_current) { + Main().SetFromImage(std::move(color), c_current); + SetIntensityTarget(&this->metadata.m); + SetSize(Main().xsize(), Main().ysize()); + } + + void SetSize(size_t xsize, size_t ysize) { + JXL_CHECK(metadata.size.Set(xsize, ysize)); + } + + void CheckMetadata() const { + JXL_CHECK(metadata.m.bit_depth.bits_per_sample != 0); + JXL_CHECK(!metadata.m.color_encoding.ICC().empty()); + + if (preview_frame.xsize() != 0) preview_frame.VerifyMetadata(); + JXL_CHECK(preview_frame.metadata() == &metadata.m); + + for (const ImageBundle& ib : frames) { + ib.VerifyMetadata(); + JXL_CHECK(ib.metadata() == &metadata.m); + } + } + + size_t xsize() const { return metadata.size.xsize(); } + size_t ysize() const { return metadata.size.ysize(); } + void ShrinkTo(size_t xsize, size_t ysize) { + // preview is unaffected. + for (ImageBundle& ib : frames) { + ib.ShrinkTo(xsize, ysize); + } + SetSize(xsize, ysize); + } + + // -- DECODER OUTPUT, ENCODER INPUT: + + // Metadata stored into / retrieved from bitstreams. + + Blobs blobs; + + CodecMetadata metadata; // applies to preview and all frames + + // If metadata.have_preview: + ImageBundle preview_frame; + + std::vector frames; // size=1 if !metadata.have_animation + + // If the image should be written to a JPEG, use this quality for encoding. + size_t jpeg_quality; +}; + +} // namespace jxl + +#endif // LIB_JXL_CODEC_IN_OUT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order.cc b/third-party/libjxl/libjxl/lib/jxl/coeff_order.cc new file mode 100644 index 0000000000..75ad2f26f6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order.cc @@ -0,0 +1,151 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/coeff_order.h" + +#include + +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/lehmer_code.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +uint32_t CoeffOrderContext(uint32_t val) { + uint32_t token, nbits, bits; + HybridUintConfig(0, 0, 0).Encode(val, &token, &nbits, &bits); + return std::min(token, kPermutationContexts - 1); +} + +namespace { +Status ReadPermutation(size_t skip, size_t size, coeff_order_t* order, + BitReader* br, ANSSymbolReader* reader, + const std::vector& context_map) { + std::vector lehmer(size); + // temp space needs to be as large as the next power of 2, so doubling the + // allocated size is enough. + std::vector temp(size * 2); + uint32_t end = + reader->ReadHybridUint(CoeffOrderContext(size), br, context_map) + skip; + if (end > size) { + return JXL_FAILURE("Invalid permutation size"); + } + uint32_t last = 0; + for (size_t i = skip; i < end; ++i) { + lehmer[i] = + reader->ReadHybridUint(CoeffOrderContext(last), br, context_map); + last = lehmer[i]; + if (lehmer[i] + i >= size) { + return JXL_FAILURE("Invalid lehmer code"); + } + } + if (order == nullptr) return true; + DecodeLehmerCode(lehmer.data(), temp.data(), size, order); + return true; +} + +} // namespace + +Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order, + BitReader* br) { + std::vector context_map; + ANSCode code; + JXL_RETURN_IF_ERROR( + DecodeHistograms(br, kPermutationContexts, &code, &context_map)); + ANSSymbolReader reader(&code, br); + JXL_RETURN_IF_ERROR( + ReadPermutation(skip, size, order, br, &reader, context_map)); + if (!reader.CheckANSFinalState()) { + return JXL_FAILURE("Invalid ANS stream"); + } + return true; +} + +namespace { + +Status DecodeCoeffOrder(AcStrategy acs, coeff_order_t* order, BitReader* br, + ANSSymbolReader* reader, + std::vector& natural_order, + const std::vector& context_map) { + const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); + const size_t size = kDCTBlockSize * llf; + + JXL_RETURN_IF_ERROR( + ReadPermutation(llf, size, order, br, reader, context_map)); + if (order == nullptr) return true; + for (size_t k = 0; k < size; ++k) { + order[k] = natural_order[order[k]]; + } + return true; +} + +} // namespace + +Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs, + coeff_order_t* order, BitReader* br) { + uint16_t computed = 0; + std::vector context_map; + ANSCode code; + std::unique_ptr reader; + std::vector natural_order; + // Bitstream does not have histograms if no coefficient order is used. + if (used_orders != 0) { + JXL_RETURN_IF_ERROR( + DecodeHistograms(br, kPermutationContexts, &code, &context_map)); + reader = make_unique(&code, br); + } + uint32_t acs_mask = 0; + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + if ((used_acs & (1 << o)) == 0) continue; + acs_mask |= 1 << kStrategyOrder[o]; + } + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + uint8_t ord = kStrategyOrder[o]; + if (computed & (1 << ord)) continue; + computed |= 1 << ord; + AcStrategy acs = AcStrategy::FromRawStrategy(o); + bool used = (acs_mask & (1 << ord)) != 0; + + const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); + const size_t size = kDCTBlockSize * llf; + + if (used || (used_orders & (1 << ord))) { + if (natural_order.size() < size) natural_order.resize(size); + acs.ComputeNaturalCoeffOrder(natural_order.data()); + } + + if ((used_orders & (1 << ord)) == 0) { + // No need to set the default order if no ACS uses this order. + if (used) { + for (size_t c = 0; c < 3; c++) { + memcpy(&order[CoeffOrderOffset(ord, c)], natural_order.data(), + size * sizeof(*order)); + } + } + } else { + for (size_t c = 0; c < 3; c++) { + coeff_order_t* dest = used ? &order[CoeffOrderOffset(ord, c)] : nullptr; + JXL_RETURN_IF_ERROR(DecodeCoeffOrder(acs, dest, br, reader.get(), + natural_order, context_map)); + } + } + } + if (used_orders && !reader->CheckANSFinalState()) { + return JXL_FAILURE("Invalid ANS stream"); + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order.h b/third-party/libjxl/libjxl/lib/jxl/coeff_order.h new file mode 100644 index 0000000000..fb32499f2f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order.h @@ -0,0 +1,64 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_COEFF_ORDER_H_ +#define LIB_JXL_COEFF_ORDER_H_ + +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" + +namespace jxl { + +class BitReader; + +// Those offsets get multiplied by kDCTBlockSize. +static constexpr size_t kCoeffOrderOffset[] = { + 0, 1, 2, 3, 4, 5, 6, 10, 14, 18, + 34, 50, 66, 68, 70, 72, 76, 80, 84, 92, + 100, 108, 172, 236, 300, 332, 364, 396, 652, 908, + 1164, 1292, 1420, 1548, 2572, 3596, 4620, 5132, 5644, 6156, +}; +static_assert(3 * kNumOrders + 1 == + sizeof(kCoeffOrderOffset) / sizeof(*kCoeffOrderOffset), + "Update this array when adding or removing order types."); + +static constexpr size_t CoeffOrderOffset(size_t order, size_t c) { + return kCoeffOrderOffset[3 * order + c] * kDCTBlockSize; +} + +static constexpr size_t kCoeffOrderMaxSize = + kCoeffOrderOffset[3 * kNumOrders] * kDCTBlockSize; + +// Mapping from AC strategy to order bucket. Strategies with different natural +// orders must have different buckets. +constexpr uint8_t kStrategyOrder[] = { + 0, 1, 1, 1, 2, 3, 4, 4, 5, 5, 6, 6, 1, 1, + 1, 1, 1, 1, 7, 8, 8, 9, 10, 10, 11, 12, 12, +}; + +static_assert(AcStrategy::kNumValidStrategies == + sizeof(kStrategyOrder) / sizeof(*kStrategyOrder), + "Update this array when adding or removing AC strategies."); + +constexpr uint32_t kPermutationContexts = 8; + +uint32_t CoeffOrderContext(uint32_t val); + +Status DecodeCoeffOrders(uint16_t used_orders, uint32_t used_acs, + coeff_order_t* order, BitReader* br); + +Status DecodePermutation(size_t skip, size_t size, coeff_order_t* order, + BitReader* br); + +} // namespace jxl + +#endif // LIB_JXL_COEFF_ORDER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order_fwd.h b/third-party/libjxl/libjxl/lib/jxl/coeff_order_fwd.h new file mode 100644 index 0000000000..26306575c1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order_fwd.h @@ -0,0 +1,47 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_COEFF_ORDER_FWD_H_ +#define LIB_JXL_COEFF_ORDER_FWD_H_ + +// Breaks circular dependency between ac_strategy and coeff_order. + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +// Needs at least 16 bits. A 32-bit type speeds up DecodeAC by 2% at the cost of +// more memory. +using coeff_order_t = uint32_t; + +// Maximum number of orders to be used. Note that this needs to be multiplied by +// the number of channels. One per "size class" (plus one extra for DCT8), +// shared between transforms of size XxY and of size YxX. +constexpr uint8_t kNumOrders = 13; + +// DCT coefficients are laid out in such a way that the number of rows of +// coefficients is always the smaller coordinate. +JXL_INLINE constexpr size_t CoefficientRows(size_t rows, size_t columns) { + return rows < columns ? rows : columns; +} + +JXL_INLINE constexpr size_t CoefficientColumns(size_t rows, size_t columns) { + return rows < columns ? columns : rows; +} + +JXL_INLINE void CoefficientLayout(size_t* JXL_RESTRICT rows, + size_t* JXL_RESTRICT columns) { + size_t r = *rows; + size_t c = *columns; + *rows = CoefficientRows(r, c); + *columns = CoefficientColumns(r, c); +} + +} // namespace jxl + +#endif // LIB_JXL_COEFF_ORDER_FWD_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/coeff_order_test.cc b/third-party/libjxl/libjxl/lib/jxl/coeff_order_test.cc new file mode 100644 index 0000000000..6fa0775697 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/coeff_order_test.cc @@ -0,0 +1,97 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/coeff_order.h" + +#include + +#include +#include // iota +#include +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_coeff_order.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +void RoundtripPermutation(coeff_order_t* perm, coeff_order_t* out, size_t len, + size_t* size) { + BitWriter writer; + EncodePermutation(perm, 0, len, &writer, 0, nullptr); + writer.ZeroPadToByte(); + Status status = true; + { + BitReader reader(writer.GetSpan()); + BitReaderScopedCloser closer(&reader, &status); + ASSERT_TRUE(DecodePermutation(0, len, out, &reader)); + } + ASSERT_TRUE(status); + *size = writer.GetSpan().size(); +} + +enum Permutation { kIdentity, kFewSwaps, kFewSlides, kRandom }; + +constexpr size_t kSwaps = 32; + +void TestPermutation(Permutation kind, size_t len) { + std::vector perm(len); + std::iota(perm.begin(), perm.end(), 0); + Rng rng(0); + if (kind == kFewSwaps) { + for (size_t i = 0; i < kSwaps; i++) { + size_t a = rng.UniformU(0, len - 1); + size_t b = rng.UniformU(0, len - 1); + std::swap(perm[a], perm[b]); + } + } + if (kind == kFewSlides) { + for (size_t i = 0; i < kSwaps; i++) { + size_t a = rng.UniformU(0, len - 1); + size_t b = rng.UniformU(0, len - 1); + size_t from = std::min(a, b); + size_t to = std::max(a, b); + size_t start = perm[from]; + for (size_t j = from; j < to; j++) { + perm[j] = perm[j + 1]; + } + perm[to] = start; + } + } + if (kind == kRandom) { + rng.Shuffle(perm.data(), perm.size()); + } + std::vector out(len); + size_t size = 0; + RoundtripPermutation(perm.data(), out.data(), len, &size); + for (size_t idx = 0; idx < len; idx++) { + EXPECT_EQ(perm[idx], out[idx]); + } + printf("Encoded size: %" PRIuS "\n", size); +} + +TEST(CoeffOrderTest, IdentitySmall) { TestPermutation(kIdentity, 256); } +TEST(CoeffOrderTest, FewSlidesSmall) { TestPermutation(kFewSlides, 256); } +TEST(CoeffOrderTest, FewSwapsSmall) { TestPermutation(kFewSwaps, 256); } +TEST(CoeffOrderTest, RandomSmall) { TestPermutation(kRandom, 256); } + +TEST(CoeffOrderTest, IdentityMedium) { TestPermutation(kIdentity, 1 << 12); } +TEST(CoeffOrderTest, FewSlidesMedium) { TestPermutation(kFewSlides, 1 << 12); } +TEST(CoeffOrderTest, FewSwapsMedium) { TestPermutation(kFewSwaps, 1 << 12); } +TEST(CoeffOrderTest, RandomMedium) { TestPermutation(kRandom, 1 << 12); } + +TEST(CoeffOrderTest, IdentityBig) { TestPermutation(kIdentity, 1 << 16); } +TEST(CoeffOrderTest, FewSlidesBig) { TestPermutation(kFewSlides, 1 << 16); } +TEST(CoeffOrderTest, FewSwapsBig) { TestPermutation(kFewSwaps, 1 << 16); } +TEST(CoeffOrderTest, RandomBig) { TestPermutation(kRandom, 1 << 16); } + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.cc b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.cc new file mode 100644 index 0000000000..a59a9fd2ac --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.cc @@ -0,0 +1,789 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/color_encoding_internal.h" + +#include + +#include +#include + +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/matrix_ops.h" + +namespace jxl { +namespace { + +// Highest reasonable value for the gamma of a transfer curve. +constexpr uint32_t kMaxGamma = 8192; + +// These strings are baked into Description - do not change. + +std::string ToString(ColorSpace color_space) { + switch (color_space) { + case ColorSpace::kRGB: + return "RGB"; + case ColorSpace::kGray: + return "Gra"; + case ColorSpace::kXYB: + return "XYB"; + case ColorSpace::kUnknown: + return "CS?"; + } + // Should not happen - visitor fails if enum is invalid. + JXL_UNREACHABLE("Invalid ColorSpace %u", static_cast(color_space)); +} + +std::string ToString(WhitePoint white_point) { + switch (white_point) { + case WhitePoint::kD65: + return "D65"; + case WhitePoint::kCustom: + return "Cst"; + case WhitePoint::kE: + return "EER"; + case WhitePoint::kDCI: + return "DCI"; + } + // Should not happen - visitor fails if enum is invalid. + JXL_UNREACHABLE("Invalid WhitePoint %u", static_cast(white_point)); +} + +std::string ToString(Primaries primaries) { + switch (primaries) { + case Primaries::kSRGB: + return "SRG"; + case Primaries::k2100: + return "202"; + case Primaries::kP3: + return "DCI"; + case Primaries::kCustom: + return "Cst"; + } + // Should not happen - visitor fails if enum is invalid. + JXL_UNREACHABLE("Invalid Primaries %u", static_cast(primaries)); +} + +std::string ToString(TransferFunction transfer_function) { + switch (transfer_function) { + case TransferFunction::kSRGB: + return "SRG"; + case TransferFunction::kLinear: + return "Lin"; + case TransferFunction::k709: + return "709"; + case TransferFunction::kPQ: + return "PeQ"; + case TransferFunction::kHLG: + return "HLG"; + case TransferFunction::kDCI: + return "DCI"; + case TransferFunction::kUnknown: + return "TF?"; + } + // Should not happen - visitor fails if enum is invalid. + JXL_UNREACHABLE("Invalid TransferFunction %u", + static_cast(transfer_function)); +} + +std::string ToString(RenderingIntent rendering_intent) { + switch (rendering_intent) { + case RenderingIntent::kPerceptual: + return "Per"; + case RenderingIntent::kRelative: + return "Rel"; + case RenderingIntent::kSaturation: + return "Sat"; + case RenderingIntent::kAbsolute: + return "Abs"; + } + // Should not happen - visitor fails if enum is invalid. + JXL_UNREACHABLE("Invalid RenderingIntent %u", + static_cast(rendering_intent)); +} + +static double F64FromCustomxyI32(const int32_t i) { return i * 1E-6; } +static Status F64ToCustomxyI32(const double f, int32_t* JXL_RESTRICT i) { + if (!(-4 <= f && f <= 4)) { + return JXL_FAILURE("F64 out of bounds for CustomxyI32"); + } + *i = static_cast(roundf(f * 1E6)); + return true; +} + +Status ConvertExternalToInternalWhitePoint(const JxlWhitePoint external, + WhitePoint* internal) { + switch (external) { + case JXL_WHITE_POINT_D65: + *internal = WhitePoint::kD65; + return true; + case JXL_WHITE_POINT_CUSTOM: + *internal = WhitePoint::kCustom; + return true; + case JXL_WHITE_POINT_E: + *internal = WhitePoint::kE; + return true; + case JXL_WHITE_POINT_DCI: + *internal = WhitePoint::kDCI; + return true; + } + return JXL_FAILURE("Invalid WhitePoint enum value %d", + static_cast(external)); +} + +Status ConvertExternalToInternalPrimaries(const JxlPrimaries external, + Primaries* internal) { + switch (external) { + case JXL_PRIMARIES_SRGB: + *internal = Primaries::kSRGB; + return true; + case JXL_PRIMARIES_CUSTOM: + *internal = Primaries::kCustom; + return true; + case JXL_PRIMARIES_2100: + *internal = Primaries::k2100; + return true; + case JXL_PRIMARIES_P3: + *internal = Primaries::kP3; + return true; + } + return JXL_FAILURE("Invalid Primaries enum value"); +} + +Status ConvertExternalToInternalTransferFunction( + const JxlTransferFunction external, TransferFunction* internal) { + switch (external) { + case JXL_TRANSFER_FUNCTION_709: + *internal = TransferFunction::k709; + return true; + case JXL_TRANSFER_FUNCTION_UNKNOWN: + *internal = TransferFunction::kUnknown; + return true; + case JXL_TRANSFER_FUNCTION_LINEAR: + *internal = TransferFunction::kLinear; + return true; + case JXL_TRANSFER_FUNCTION_SRGB: + *internal = TransferFunction::kSRGB; + return true; + case JXL_TRANSFER_FUNCTION_PQ: + *internal = TransferFunction::kPQ; + return true; + case JXL_TRANSFER_FUNCTION_DCI: + *internal = TransferFunction::kDCI; + return true; + case JXL_TRANSFER_FUNCTION_HLG: + *internal = TransferFunction::kHLG; + return true; + case JXL_TRANSFER_FUNCTION_GAMMA: + return JXL_FAILURE("Gamma should be handled separately"); + } + return JXL_FAILURE("Invalid TransferFunction enum value"); +} + +Status ConvertExternalToInternalRenderingIntent( + const JxlRenderingIntent external, RenderingIntent* internal) { + switch (external) { + case JXL_RENDERING_INTENT_PERCEPTUAL: + *internal = RenderingIntent::kPerceptual; + return true; + case JXL_RENDERING_INTENT_RELATIVE: + *internal = RenderingIntent::kRelative; + return true; + case JXL_RENDERING_INTENT_SATURATION: + *internal = RenderingIntent::kSaturation; + return true; + case JXL_RENDERING_INTENT_ABSOLUTE: + *internal = RenderingIntent::kAbsolute; + return true; + } + return JXL_FAILURE("Invalid RenderingIntent enum value"); +} + +} // namespace + +CIExy Customxy::Get() const { + CIExy xy; + xy.x = F64FromCustomxyI32(x); + xy.y = F64FromCustomxyI32(y); + return xy; +} + +Status Customxy::Set(const CIExy& xy) { + JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.x, &x)); + JXL_RETURN_IF_ERROR(F64ToCustomxyI32(xy.y, &y)); + size_t extension_bits, total_bits; + if (!Bundle::CanEncode(*this, &extension_bits, &total_bits)) { + return JXL_FAILURE("Unable to encode XY %f %f", xy.x, xy.y); + } + return true; +} + +bool CustomTransferFunction::SetImplicit() { + if (nonserialized_color_space == ColorSpace::kXYB) { + if (!SetGamma(1.0 / 3)) JXL_ASSERT(false); + return true; + } + return false; +} + +Status CustomTransferFunction::SetGamma(double gamma) { + if (gamma < (1.0f / kMaxGamma) || gamma > 1.0) { + return JXL_FAILURE("Invalid gamma %f", gamma); + } + + have_gamma_ = false; + if (ApproxEq(gamma, 1.0)) { + transfer_function_ = TransferFunction::kLinear; + return true; + } + if (ApproxEq(gamma, 1.0 / 2.6)) { + transfer_function_ = TransferFunction::kDCI; + return true; + } + // Don't translate 0.45.. to kSRGB nor k709 - that might change pixel + // values because those curves also have a linear part. + + have_gamma_ = true; + gamma_ = roundf(gamma * kGammaMul); + transfer_function_ = TransferFunction::kUnknown; + return true; +} + +namespace { + +std::array CreateC2(const Primaries pr, + const TransferFunction tf) { + std::array c2; + + { + ColorEncoding* c_rgb = c2.data() + 0; + c_rgb->SetColorSpace(ColorSpace::kRGB); + c_rgb->white_point = WhitePoint::kD65; + c_rgb->primaries = pr; + c_rgb->tf.SetTransferFunction(tf); + JXL_CHECK(c_rgb->CreateICC()); + } + + { + ColorEncoding* c_gray = c2.data() + 1; + c_gray->SetColorSpace(ColorSpace::kGray); + c_gray->white_point = WhitePoint::kD65; + c_gray->primaries = pr; + c_gray->tf.SetTransferFunction(tf); + JXL_CHECK(c_gray->CreateICC()); + } + + return c2; +} + +} // namespace + +const ColorEncoding& ColorEncoding::SRGB(bool is_gray) { + static std::array c2 = + CreateC2(Primaries::kSRGB, TransferFunction::kSRGB); + return c2[is_gray]; +} +const ColorEncoding& ColorEncoding::LinearSRGB(bool is_gray) { + static std::array c2 = + CreateC2(Primaries::kSRGB, TransferFunction::kLinear); + return c2[is_gray]; +} + +CIExy ColorEncoding::GetWhitePoint() const { + JXL_DASSERT(have_fields_); + CIExy xy; + switch (white_point) { + case WhitePoint::kCustom: + return white_.Get(); + + case WhitePoint::kD65: + xy.x = 0.3127; + xy.y = 0.3290; + return xy; + + case WhitePoint::kDCI: + // From https://ieeexplore.ieee.org/document/7290729 C.2 page 11 + xy.x = 0.314; + xy.y = 0.351; + return xy; + + case WhitePoint::kE: + xy.x = xy.y = 1.0 / 3; + return xy; + } + JXL_UNREACHABLE("Invalid WhitePoint %u", static_cast(white_point)); +} + +Status ColorEncoding::SetWhitePoint(const CIExy& xy) { + JXL_DASSERT(have_fields_); + if (xy.x == 0.0 || xy.y == 0.0) { + return JXL_FAILURE("Invalid white point %f %f", xy.x, xy.y); + } + if (ApproxEq(xy.x, 0.3127) && ApproxEq(xy.y, 0.3290)) { + white_point = WhitePoint::kD65; + return true; + } + if (ApproxEq(xy.x, 1.0 / 3) && ApproxEq(xy.y, 1.0 / 3)) { + white_point = WhitePoint::kE; + return true; + } + if (ApproxEq(xy.x, 0.314) && ApproxEq(xy.y, 0.351)) { + white_point = WhitePoint::kDCI; + return true; + } + white_point = WhitePoint::kCustom; + return white_.Set(xy); +} + +PrimariesCIExy ColorEncoding::GetPrimaries() const { + JXL_DASSERT(have_fields_); + JXL_ASSERT(HasPrimaries()); + PrimariesCIExy xy; + switch (primaries) { + case Primaries::kCustom: + xy.r = red_.Get(); + xy.g = green_.Get(); + xy.b = blue_.Get(); + return xy; + + case Primaries::kSRGB: + xy.r.x = 0.639998686; + xy.r.y = 0.330010138; + xy.g.x = 0.300003784; + xy.g.y = 0.600003357; + xy.b.x = 0.150002046; + xy.b.y = 0.059997204; + return xy; + + case Primaries::k2100: + xy.r.x = 0.708; + xy.r.y = 0.292; + xy.g.x = 0.170; + xy.g.y = 0.797; + xy.b.x = 0.131; + xy.b.y = 0.046; + return xy; + + case Primaries::kP3: + xy.r.x = 0.680; + xy.r.y = 0.320; + xy.g.x = 0.265; + xy.g.y = 0.690; + xy.b.x = 0.150; + xy.b.y = 0.060; + return xy; + } + JXL_UNREACHABLE("Invalid Primaries %u", static_cast(primaries)); +} + +Status ColorEncoding::SetPrimaries(const PrimariesCIExy& xy) { + JXL_DASSERT(have_fields_); + JXL_ASSERT(HasPrimaries()); + if (xy.r.x == 0.0 || xy.r.y == 0.0 || xy.g.x == 0.0 || xy.g.y == 0.0 || + xy.b.x == 0.0 || xy.b.y == 0.0) { + return JXL_FAILURE("Invalid primaries %f %f %f %f %f %f", xy.r.x, xy.r.y, + xy.g.x, xy.g.y, xy.b.x, xy.b.y); + } + + if (ApproxEq(xy.r.x, 0.64) && ApproxEq(xy.r.y, 0.33) && + ApproxEq(xy.g.x, 0.30) && ApproxEq(xy.g.y, 0.60) && + ApproxEq(xy.b.x, 0.15) && ApproxEq(xy.b.y, 0.06)) { + primaries = Primaries::kSRGB; + return true; + } + + if (ApproxEq(xy.r.x, 0.708) && ApproxEq(xy.r.y, 0.292) && + ApproxEq(xy.g.x, 0.170) && ApproxEq(xy.g.y, 0.797) && + ApproxEq(xy.b.x, 0.131) && ApproxEq(xy.b.y, 0.046)) { + primaries = Primaries::k2100; + return true; + } + if (ApproxEq(xy.r.x, 0.680) && ApproxEq(xy.r.y, 0.320) && + ApproxEq(xy.g.x, 0.265) && ApproxEq(xy.g.y, 0.690) && + ApproxEq(xy.b.x, 0.150) && ApproxEq(xy.b.y, 0.060)) { + primaries = Primaries::kP3; + return true; + } + + primaries = Primaries::kCustom; + JXL_RETURN_IF_ERROR(red_.Set(xy.r)); + JXL_RETURN_IF_ERROR(green_.Set(xy.g)); + JXL_RETURN_IF_ERROR(blue_.Set(xy.b)); + return true; +} + +Status ColorEncoding::CreateICC() { + InternalRemoveICC(); + return MaybeCreateProfile(*this, &icc_); +} + +Status ColorEncoding::SetFieldsFromICC(const JxlCmsInterface& cms) { + // In case parsing fails, mark the ColorEncoding as invalid. + SetColorSpace(ColorSpace::kUnknown); + tf.SetTransferFunction(TransferFunction::kUnknown); + + if (icc_.empty()) return JXL_FAILURE("Empty ICC profile"); + + JxlColorEncoding external; + JXL_BOOL cmyk; + JXL_RETURN_IF_ERROR(cms.set_fields_from_icc(cms.set_fields_data, icc_.data(), + icc_.size(), &external, &cmyk)); + if (cmyk) { + cmyk_ = true; + return true; + } + PaddedBytes icc = std::move(icc_); + JXL_RETURN_IF_ERROR(ConvertExternalToInternalColorEncoding(external, this)); + icc_ = std::move(icc); + return true; +} + +void ColorEncoding::DecideIfWantICC(const JxlCmsInterface& cms) { + if (icc_.empty()) return; + + JxlColorEncoding c; + JXL_BOOL cmyk; + if (!cms.set_fields_from_icc(cms.set_fields_data, icc_.data(), icc_.size(), + &c, &cmyk)) { + return; + } + if (cmyk) return; + + PaddedBytes new_icc; + if (!MaybeCreateProfile(*this, &new_icc)) return; + + want_icc_ = false; +} + +std::string Description(const ColorEncoding& c_in) { + // Copy required for Implicit* + ColorEncoding c = c_in; + + std::string d = ToString(c.GetColorSpace()); + + if (!c.ImplicitWhitePoint()) { + d += '_'; + if (c.white_point == WhitePoint::kCustom) { + const CIExy wp = c.GetWhitePoint(); + d += ToString(wp.x) + ';'; + d += ToString(wp.y); + } else { + d += ToString(c.white_point); + } + } + + if (c.HasPrimaries()) { + d += '_'; + if (c.primaries == Primaries::kCustom) { + const PrimariesCIExy pr = c.GetPrimaries(); + d += ToString(pr.r.x) + ';'; + d += ToString(pr.r.y) + ';'; + d += ToString(pr.g.x) + ';'; + d += ToString(pr.g.y) + ';'; + d += ToString(pr.b.x) + ';'; + d += ToString(pr.b.y); + } else { + d += ToString(c.primaries); + } + } + + d += '_'; + d += ToString(c.rendering_intent); + + if (!c.tf.SetImplicit()) { + d += '_'; + if (c.tf.IsGamma()) { + d += 'g'; + d += ToString(c.tf.GetGamma()); + } else { + d += ToString(c.tf.GetTransferFunction()); + } + } + + return d; +} + +Customxy::Customxy() { Bundle::Init(this); } +Status Customxy::VisitFields(Visitor* JXL_RESTRICT visitor) { + uint32_t ux = PackSigned(x); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288), + BitsOffset(20, 1048576), + BitsOffset(21, 2097152), 0, &ux)); + x = UnpackSigned(ux); + uint32_t uy = PackSigned(y); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(19), BitsOffset(19, 524288), + BitsOffset(20, 1048576), + BitsOffset(21, 2097152), 0, &uy)); + y = UnpackSigned(uy); + return true; +} + +CustomTransferFunction::CustomTransferFunction() { Bundle::Init(this); } +Status CustomTransferFunction::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->Conditional(!SetImplicit())) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_gamma_)); + + if (visitor->Conditional(have_gamma_)) { + // Gamma is represented as a 24-bit int, the exponent used is + // gamma_ / 1e7. Valid values are (0, 1]. On the low end side, we also + // limit it to kMaxGamma/1e7. + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(24, kGammaMul, &gamma_)); + if (gamma_ > kGammaMul || + static_cast(gamma_) * kMaxGamma < kGammaMul) { + return JXL_FAILURE("Invalid gamma %u", gamma_); + } + } + + if (visitor->Conditional(!have_gamma_)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->Enum(TransferFunction::kSRGB, &transfer_function_)); + } + } + + return true; +} + +ColorEncoding::ColorEncoding() { Bundle::Init(this); } +Status ColorEncoding::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &want_icc_)); + + // Always send even if want_icc_ because this affects decoding. + // We can skip the white point/primaries because they do not. + JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ColorSpace::kRGB, &color_space_)); + + if (visitor->Conditional(!WantICC())) { + // Serialize enums. NOTE: we set the defaults to the most common values so + // ImageMetadata.all_default is true in the common case. + + if (visitor->Conditional(!ImplicitWhitePoint())) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(WhitePoint::kD65, &white_point)); + if (visitor->Conditional(white_point == WhitePoint::kCustom)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&white_)); + } + } + + if (visitor->Conditional(HasPrimaries())) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(Primaries::kSRGB, &primaries)); + if (visitor->Conditional(primaries == Primaries::kCustom)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&red_)); + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&green_)); + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blue_)); + } + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tf)); + + JXL_QUIET_RETURN_IF_ERROR( + visitor->Enum(RenderingIntent::kRelative, &rendering_intent)); + + // We didn't have ICC, so all fields should be known. + if (color_space_ == ColorSpace::kUnknown || tf.IsUnknown()) { + return JXL_FAILURE( + "No ICC but cs %u and tf %u%s", + static_cast(color_space_), + tf.IsGamma() ? 0 + : static_cast(tf.GetTransferFunction()), + tf.IsGamma() ? "(gamma)" : ""); + } + + JXL_RETURN_IF_ERROR(CreateICC()); + } + + if (WantICC() && visitor->IsReading()) { + // Haven't called SetICC() yet, do nothing. + } else { + if (ICC().empty()) return JXL_FAILURE("Empty ICC"); + } + + return true; +} + +void ConvertInternalToExternalColorEncoding(const ColorEncoding& internal, + JxlColorEncoding* external) { + external->color_space = static_cast(internal.GetColorSpace()); + + external->white_point = static_cast(internal.white_point); + + jxl::CIExy whitepoint = internal.GetWhitePoint(); + external->white_point_xy[0] = whitepoint.x; + external->white_point_xy[1] = whitepoint.y; + + if (external->color_space == JXL_COLOR_SPACE_RGB || + external->color_space == JXL_COLOR_SPACE_UNKNOWN) { + external->primaries = static_cast(internal.primaries); + jxl::PrimariesCIExy primaries = internal.GetPrimaries(); + external->primaries_red_xy[0] = primaries.r.x; + external->primaries_red_xy[1] = primaries.r.y; + external->primaries_green_xy[0] = primaries.g.x; + external->primaries_green_xy[1] = primaries.g.y; + external->primaries_blue_xy[0] = primaries.b.x; + external->primaries_blue_xy[1] = primaries.b.y; + } + + if (internal.tf.IsGamma()) { + external->transfer_function = JXL_TRANSFER_FUNCTION_GAMMA; + external->gamma = internal.tf.GetGamma(); + } else { + external->transfer_function = + static_cast(internal.tf.GetTransferFunction()); + external->gamma = 0; + } + + external->rendering_intent = + static_cast(internal.rendering_intent); +} + +Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external, + ColorEncoding* internal) { + internal->SetColorSpace(static_cast(external.color_space)); + + JXL_RETURN_IF_ERROR(ConvertExternalToInternalWhitePoint( + external.white_point, &internal->white_point)); + if (external.white_point == JXL_WHITE_POINT_CUSTOM) { + CIExy wp; + wp.x = external.white_point_xy[0]; + wp.y = external.white_point_xy[1]; + JXL_RETURN_IF_ERROR(internal->SetWhitePoint(wp)); + } + + if (external.color_space == JXL_COLOR_SPACE_RGB || + external.color_space == JXL_COLOR_SPACE_UNKNOWN) { + JXL_RETURN_IF_ERROR(ConvertExternalToInternalPrimaries( + external.primaries, &internal->primaries)); + if (external.primaries == JXL_PRIMARIES_CUSTOM) { + PrimariesCIExy primaries; + primaries.r.x = external.primaries_red_xy[0]; + primaries.r.y = external.primaries_red_xy[1]; + primaries.g.x = external.primaries_green_xy[0]; + primaries.g.y = external.primaries_green_xy[1]; + primaries.b.x = external.primaries_blue_xy[0]; + primaries.b.y = external.primaries_blue_xy[1]; + JXL_RETURN_IF_ERROR(internal->SetPrimaries(primaries)); + } + } + CustomTransferFunction tf; + tf.nonserialized_color_space = internal->GetColorSpace(); + if (external.transfer_function == JXL_TRANSFER_FUNCTION_GAMMA) { + JXL_RETURN_IF_ERROR(tf.SetGamma(external.gamma)); + } else { + TransferFunction tf_enum; + // JXL_TRANSFER_FUNCTION_GAMMA is not handled by this function since there's + // no internal enum value for it. + JXL_RETURN_IF_ERROR(ConvertExternalToInternalTransferFunction( + external.transfer_function, &tf_enum)); + tf.SetTransferFunction(tf_enum); + } + internal->tf = tf; + + JXL_RETURN_IF_ERROR(ConvertExternalToInternalRenderingIntent( + external.rendering_intent, &internal->rendering_intent)); + + // The ColorEncoding caches an ICC profile it created earlier that may no + // longer match the profile with the changed fields, so re-create it. + if (!(internal->CreateICC())) { + // This is not an error: for example, it doesn't have ICC profile creation + // implemented for XYB. This should not be returned as error, since + // ConvertExternalToInternalColorEncoding still worked correctly, and what + // matters is that internal->ICC() will not return the wrong profile. + } + + return true; +} + +/* Chromatic adaptation matrices*/ +static const float kBradford[9] = { + 0.8951f, 0.2664f, -0.1614f, -0.7502f, 1.7135f, + 0.0367f, 0.0389f, -0.0685f, 1.0296f, +}; + +static const float kBradfordInv[9] = { + 0.9869929f, -0.1470543f, 0.1599627f, 0.4323053f, 0.5183603f, + 0.0492912f, -0.0085287f, 0.0400428f, 0.9684867f, +}; + +// Adapts whitepoint x, y to D50 +Status AdaptToXYZD50(float wx, float wy, float matrix[9]) { + if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) { + // Out of range values can cause division through zero + // further down with the bradford adaptation too. + return JXL_FAILURE("Invalid white point"); + } + float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy}; + // 1 / tiny float can still overflow + JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2])); + float w50[3] = {0.96422f, 1.0f, 0.82521f}; + + float lms[3]; + float lms50[3]; + + Mul3x3Vector(kBradford, w, lms); + Mul3x3Vector(kBradford, w50, lms50); + + if (lms[0] == 0 || lms[1] == 0 || lms[2] == 0) { + return JXL_FAILURE("Invalid white point"); + } + float a[9] = { + // /----> 0, 1, 2, 3, /----> 4, 5, 6, 7, /----> 8, + lms50[0] / lms[0], 0, 0, 0, lms50[1] / lms[1], 0, 0, 0, lms50[2] / lms[2], + }; + if (!std::isfinite(a[0]) || !std::isfinite(a[4]) || !std::isfinite(a[8])) { + return JXL_FAILURE("Invalid white point"); + } + + float b[9]; + Mul3x3Matrix(a, kBradford, b); + Mul3x3Matrix(kBradfordInv, b, matrix); + + return true; +} + +Status PrimariesToXYZ(float rx, float ry, float gx, float gy, float bx, + float by, float wx, float wy, float matrix[9]) { + if (wx < 0 || wx > 1 || wy <= 0 || wy > 1) { + return JXL_FAILURE("Invalid white point"); + } + // TODO(lode): also require rx, ry, gx, gy, bx, to be in range 0-1? ICC + // profiles in theory forbid negative XYZ values, but in practice the ACES P0 + // color space uses a negative y for the blue primary. + float primaries[9] = { + rx, gx, bx, ry, gy, by, 1.0f - rx - ry, 1.0f - gx - gy, 1.0f - bx - by}; + float primaries_inv[9]; + memcpy(primaries_inv, primaries, sizeof(float) * 9); + JXL_RETURN_IF_ERROR(Inv3x3Matrix(primaries_inv)); + + float w[3] = {wx / wy, 1.0f, (1.0f - wx - wy) / wy}; + // 1 / tiny float can still overflow + JXL_RETURN_IF_ERROR(std::isfinite(w[0]) && std::isfinite(w[2])); + float xyz[3]; + Mul3x3Vector(primaries_inv, w, xyz); + + float a[9] = { + xyz[0], 0, 0, 0, xyz[1], 0, 0, 0, xyz[2], + }; + + Mul3x3Matrix(primaries, a, matrix); + return true; +} + +Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx, + float by, float wx, float wy, float matrix[9]) { + float toXYZ[9]; + JXL_RETURN_IF_ERROR(PrimariesToXYZ(rx, ry, gx, gy, bx, by, wx, wy, toXYZ)); + float d50[9]; + JXL_RETURN_IF_ERROR(AdaptToXYZD50(wx, wy, d50)); + + Mul3x3Matrix(d50, toXYZ, matrix); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.h b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.h new file mode 100644 index 0000000000..f2f0b4675e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal.h @@ -0,0 +1,464 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_COLOR_ENCODING_INTERNAL_H_ +#define LIB_JXL_COLOR_ENCODING_INTERNAL_H_ + +// Metadata for color space conversions. + +#include +#include +#include +#include +#include + +#include // std::abs +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/field_encodings.h" + +namespace jxl { + +// (All CIE units are for the standard 1931 2 degree observer) + +// Color space the color pixel data is encoded in. The color pixel data is +// 3-channel in all cases except in case of kGray, where it uses only 1 channel. +// This also determines the amount of channels used in modular encoding. +enum class ColorSpace : uint32_t { + // Trichromatic color data. This also includes CMYK if a kBlack + // ExtraChannelInfo is present. This implies, if there is an ICC profile, that + // the ICC profile uses a 3-channel color space if no kBlack extra channel is + // present, or uses color space 'CMYK' if a kBlack extra channel is present. + kRGB, + // Single-channel data. This implies, if there is an ICC profile, that the ICC + // profile also represents single-channel data and has the appropriate color + // space ('GRAY'). + kGray, + // Like kRGB, but implies fixed values for primaries etc. + kXYB, + // For non-RGB/gray data, e.g. from non-electro-optical sensors. Otherwise + // the same conditions as kRGB apply. + kUnknown +}; + +static inline const char* EnumName(ColorSpace /*unused*/) { + return "ColorSpace"; +} +static inline constexpr uint64_t EnumBits(ColorSpace /*unused*/) { + using CS = ColorSpace; + return MakeBit(CS::kRGB) | MakeBit(CS::kGray) | MakeBit(CS::kXYB) | + MakeBit(CS::kUnknown); +} + +// Values from CICP ColourPrimaries. +enum class WhitePoint : uint32_t { + kD65 = 1, // sRGB/BT.709/Display P3/BT.2020 + kCustom = 2, // Actual values encoded in separate fields + kE = 10, // XYZ + kDCI = 11, // DCI-P3 +}; + +static inline const char* EnumName(WhitePoint /*unused*/) { + return "WhitePoint"; +} +static inline constexpr uint64_t EnumBits(WhitePoint /*unused*/) { + return MakeBit(WhitePoint::kD65) | MakeBit(WhitePoint::kCustom) | + MakeBit(WhitePoint::kE) | MakeBit(WhitePoint::kDCI); +} + +// Values from CICP ColourPrimaries +enum class Primaries : uint32_t { + kSRGB = 1, // Same as BT.709 + kCustom = 2, // Actual values encoded in separate fields + k2100 = 9, // Same as BT.2020 + kP3 = 11, +}; + +static inline const char* EnumName(Primaries /*unused*/) { return "Primaries"; } +static inline constexpr uint64_t EnumBits(Primaries /*unused*/) { + using Pr = Primaries; + return MakeBit(Pr::kSRGB) | MakeBit(Pr::kCustom) | MakeBit(Pr::k2100) | + MakeBit(Pr::kP3); +} + +// Values from CICP TransferCharacteristics +enum class TransferFunction : uint32_t { + k709 = 1, + kUnknown = 2, + kLinear = 8, + kSRGB = 13, + kPQ = 16, // from BT.2100 + kDCI = 17, // from SMPTE RP 431-2 reference projector + kHLG = 18, // from BT.2100 +}; + +static inline const char* EnumName(TransferFunction /*unused*/) { + return "TransferFunction"; +} +static inline constexpr uint64_t EnumBits(TransferFunction /*unused*/) { + using TF = TransferFunction; + return MakeBit(TF::k709) | MakeBit(TF::kLinear) | MakeBit(TF::kSRGB) | + MakeBit(TF::kPQ) | MakeBit(TF::kDCI) | MakeBit(TF::kHLG) | + MakeBit(TF::kUnknown); +} + +enum class RenderingIntent : uint32_t { + // Values match ICC sRGB encodings. + kPerceptual = 0, // good for photos, requires a profile with LUT. + kRelative, // good for logos. + kSaturation, // perhaps useful for CG with fully saturated colors. + kAbsolute, // leaves white point unchanged; good for proofing. +}; + +static inline const char* EnumName(RenderingIntent /*unused*/) { + return "RenderingIntent"; +} +static inline constexpr uint64_t EnumBits(RenderingIntent /*unused*/) { + using RI = RenderingIntent; + return MakeBit(RI::kPerceptual) | MakeBit(RI::kRelative) | + MakeBit(RI::kSaturation) | MakeBit(RI::kAbsolute); +} + +// Chromaticity (Y is omitted because it is 1 for primaries/white points) +struct CIExy { + double x = 0.0; + double y = 0.0; +}; + +struct PrimariesCIExy { + CIExy r; + CIExy g; + CIExy b; +}; + +// Serializable form of CIExy. +struct Customxy : public Fields { + Customxy(); + JXL_FIELDS_NAME(Customxy) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + CIExy Get() const; + // Returns false if x or y do not fit in the encoding. + Status Set(const CIExy& xy); + + int32_t x; + int32_t y; +}; + +struct CustomTransferFunction : public Fields { + CustomTransferFunction(); + JXL_FIELDS_NAME(CustomTransferFunction) + + // Sets fields and returns true if nonserialized_color_space has an implicit + // transfer function, otherwise leaves fields unchanged and returns false. + bool SetImplicit(); + + // Gamma: only used for PNG inputs + bool IsGamma() const { return have_gamma_; } + double GetGamma() const { + JXL_ASSERT(IsGamma()); + return gamma_ * 1E-7; // (0, 1) + } + Status SetGamma(double gamma); + + TransferFunction GetTransferFunction() const { + JXL_ASSERT(!IsGamma()); + return transfer_function_; + } + void SetTransferFunction(const TransferFunction tf) { + have_gamma_ = false; + transfer_function_ = tf; + } + + bool IsUnknown() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::kUnknown); + } + bool IsSRGB() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::kSRGB); + } + bool IsLinear() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::kLinear); + } + bool IsPQ() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::kPQ); + } + bool IsHLG() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::kHLG); + } + bool Is709() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::k709); + } + bool IsDCI() const { + return !have_gamma_ && (transfer_function_ == TransferFunction::kDCI); + } + bool IsSame(const CustomTransferFunction& other) const { + if (have_gamma_ != other.have_gamma_) return false; + if (have_gamma_) { + if (gamma_ != other.gamma_) return false; + } else { + if (transfer_function_ != other.transfer_function_) return false; + } + return true; + } + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // Must be set before calling VisitFields! + ColorSpace nonserialized_color_space = ColorSpace::kRGB; + + private: + static constexpr uint32_t kGammaMul = 10000000; + + bool have_gamma_; + + // OETF exponent to go from linear to gamma-compressed. + uint32_t gamma_; // Only used if have_gamma_. + + // Can be kUnknown. + TransferFunction transfer_function_; // Only used if !have_gamma_. +}; + +// Compact encoding of data required to interpret and translate pixels to a +// known color space. Stored in Metadata. Thread-compatible. +struct ColorEncoding : public Fields { + ColorEncoding(); + JXL_FIELDS_NAME(ColorEncoding) + + // Returns ready-to-use color encodings (initialized on-demand). + static const ColorEncoding& SRGB(bool is_gray = false); + static const ColorEncoding& LinearSRGB(bool is_gray = false); + + // Returns true if an ICC profile was successfully created from fields. + // Must be called after modifying fields. Defined in color_management.cc. + Status CreateICC(); + + // Returns non-empty and valid ICC profile, unless: + // - between calling InternalRemoveICC() and CreateICC() in tests; + // - WantICC() == true and SetICC() was not yet called; + // - after a failed call to SetSRGB(), SetICC(), or CreateICC(). + const PaddedBytes& ICC() const { return icc_; } + + // Internal only, do not call except from tests. + void InternalRemoveICC() { icc_.clear(); } + + // Returns true if `icc` is assigned and decoded successfully. If so, + // subsequent WantICC() will return true until DecideIfWantICC() changes it. + // Returning false indicates data has been lost. + Status SetICC(PaddedBytes&& icc, const JxlCmsInterface* cms) { + if (icc.empty()) return false; + icc_ = std::move(icc); + + if (cms == nullptr) { + want_icc_ = true; + have_fields_ = false; + return true; + } + + if (!SetFieldsFromICC(*cms)) { + InternalRemoveICC(); + return false; + } + + want_icc_ = true; + return true; + } + + // Sets the raw ICC profile bytes, without parsing the ICC, and without + // updating the direct fields such as whitepoint, primaries and color + // space. Functions to get and set fields, such as SetWhitePoint, cannot be + // used anymore after this and functions such as IsSRGB return false no matter + // what the contents of the icc profile. + Status SetICCRaw(PaddedBytes&& icc) { + if (icc.empty()) return false; + icc_ = std::move(icc); + + want_icc_ = true; + have_fields_ = false; + return true; + } + + // Returns whether to send the ICC profile in the codestream. + bool WantICC() const { return want_icc_; } + + // Return whether the direct fields are set, if false but ICC is set, only + // raw ICC bytes are known. + bool HaveFields() const { return have_fields_; } + + // Causes WantICC() to return false if ICC() can be reconstructed from fields. + void DecideIfWantICC(const JxlCmsInterface& cms); + + bool IsGray() const { return color_space_ == ColorSpace::kGray; } + bool IsCMYK() const { return cmyk_; } + size_t Channels() const { return IsGray() ? 1 : 3; } + + // Returns false if the field is invalid and unusable. + bool HasPrimaries() const { + return !IsGray() && color_space_ != ColorSpace::kXYB; + } + + // Returns true after setting the field to a value defined by color_space, + // otherwise false and leaves the field unchanged. + bool ImplicitWhitePoint() { + if (color_space_ == ColorSpace::kXYB) { + white_point = WhitePoint::kD65; + return true; + } + return false; + } + + // Returns whether the color space is known to be sRGB. If a raw unparsed ICC + // profile is set without the fields being set, this returns false, even if + // the content of the ICC profile would match sRGB. + bool IsSRGB() const { + if (!have_fields_) return false; + if (!IsGray() && color_space_ != ColorSpace::kRGB) return false; + if (white_point != WhitePoint::kD65) return false; + if (primaries != Primaries::kSRGB) return false; + if (!tf.IsSRGB()) return false; + return true; + } + + // Returns whether the color space is known to be linear sRGB. If a raw + // unparsed ICC profile is set without the fields being set, this returns + // false, even if the content of the ICC profile would match linear sRGB. + bool IsLinearSRGB() const { + if (!have_fields_) return false; + if (!IsGray() && color_space_ != ColorSpace::kRGB) return false; + if (white_point != WhitePoint::kD65) return false; + if (primaries != Primaries::kSRGB) return false; + if (!tf.IsLinear()) return false; + return true; + } + + Status SetSRGB(const ColorSpace cs, + const RenderingIntent ri = RenderingIntent::kRelative) { + InternalRemoveICC(); + JXL_ASSERT(cs == ColorSpace::kGray || cs == ColorSpace::kRGB); + color_space_ = cs; + white_point = WhitePoint::kD65; + primaries = Primaries::kSRGB; + tf.SetTransferFunction(TransferFunction::kSRGB); + rendering_intent = ri; + return CreateICC(); + } + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // Accessors ensure tf.nonserialized_color_space is updated at the same time. + ColorSpace GetColorSpace() const { return color_space_; } + void SetColorSpace(const ColorSpace cs) { + color_space_ = cs; + tf.nonserialized_color_space = cs; + } + + CIExy GetWhitePoint() const; + Status SetWhitePoint(const CIExy& xy); + + PrimariesCIExy GetPrimaries() const; + Status SetPrimaries(const PrimariesCIExy& xy); + + // Checks if the color spaces (including white point / primaries) are the + // same, but ignores the transfer function, rendering intent and ICC bytes. + bool SameColorSpace(const ColorEncoding& other) const { + if (color_space_ != other.color_space_) return false; + + if (white_point != other.white_point) return false; + if (white_point == WhitePoint::kCustom) { + if (white_.x != other.white_.x || white_.y != other.white_.y) + return false; + } + + if (HasPrimaries() != other.HasPrimaries()) return false; + if (HasPrimaries()) { + if (primaries != other.primaries) return false; + if (primaries == Primaries::kCustom) { + if (red_.x != other.red_.x || red_.y != other.red_.y) return false; + if (green_.x != other.green_.x || green_.y != other.green_.y) + return false; + if (blue_.x != other.blue_.x || blue_.y != other.blue_.y) return false; + } + } + return true; + } + + // Checks if the color space and transfer function are the same, ignoring + // rendering intent and ICC bytes + bool SameColorEncoding(const ColorEncoding& other) const { + return SameColorSpace(other) && tf.IsSame(other.tf); + } + + mutable bool all_default; + + // Only valid if HaveFields() + WhitePoint white_point; + Primaries primaries; // Only valid if HasPrimaries() + CustomTransferFunction tf; + RenderingIntent rendering_intent; + + private: + // Returns true if all fields have been initialized (possibly to kUnknown). + // Returns false if the ICC profile is invalid or decoding it fails. + Status SetFieldsFromICC(const JxlCmsInterface& cms); + + // If true, the codestream contains an ICC profile and we do not serialize + // fields. Otherwise, fields are serialized and we create an ICC profile. + bool want_icc_; + + // When false, fields such as white_point and tf are invalid and must not be + // used. This occurs after setting a raw bytes-only ICC profile, only the + // ICC bytes may be used. The color_space_ field is still valid. + bool have_fields_ = true; + + PaddedBytes icc_; // Valid ICC profile + + ColorSpace color_space_; // Can be kUnknown + bool cmyk_ = false; + + // Only used if white_point == kCustom. + Customxy white_; + + // Only used if primaries == kCustom. + Customxy red_; + Customxy green_; + Customxy blue_; +}; + +// Returns whether the two inputs are approximately equal. +static inline bool ApproxEq(const double a, const double b, + double max_l1 = 1E-3) { + // Threshold should be sufficient for ICC's 15-bit fixed-point numbers. + // We have seen differences of 7.1E-5 with lcms2 and 1E-3 with skcms. + return std::abs(a - b) <= max_l1; +} + +// Returns a representation of the ColorEncoding fields (not icc). +// Example description: "RGB_D65_SRG_Rel_Lin" +std::string Description(const ColorEncoding& c); +static inline std::ostream& operator<<(std::ostream& os, + const ColorEncoding& c) { + return os << Description(c); +} + +void ConvertInternalToExternalColorEncoding(const jxl::ColorEncoding& internal, + JxlColorEncoding* external); + +Status ConvertExternalToInternalColorEncoding(const JxlColorEncoding& external, + jxl::ColorEncoding* internal); + +Status PrimariesToXYZ(float rx, float ry, float gx, float gy, float bx, + float by, float wx, float wy, float matrix[9]); +Status PrimariesToXYZD50(float rx, float ry, float gx, float gy, float bx, + float by, float wx, float wy, float matrix[9]); +Status AdaptToXYZD50(float wx, float wy, float matrix[9]); + +} // namespace jxl + +#endif // LIB_JXL_COLOR_ENCODING_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal_test.cc b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal_test.cc new file mode 100644 index 0000000000..6ad47e1923 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/color_encoding_internal_test.cc @@ -0,0 +1,157 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/color_encoding_internal.h" + +#include + +#include "lib/jxl/encode_internal.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(ColorEncodingTest, RoundTripAll) { + for (const test::ColorEncodingDescriptor& cdesc : test::AllEncodings()) { + const ColorEncoding c_original = test::ColorEncodingFromDescriptor(cdesc); + // Verify Set(Get) yields the same white point/primaries/gamma. + { + ColorEncoding c; + EXPECT_TRUE(c.SetWhitePoint(c_original.GetWhitePoint())); + EXPECT_EQ(c_original.white_point, c.white_point); + } + { + ColorEncoding c; + EXPECT_TRUE(c.SetPrimaries(c_original.GetPrimaries())); + EXPECT_EQ(c_original.primaries, c.primaries); + } + if (c_original.tf.IsGamma()) { + ColorEncoding c; + EXPECT_TRUE(c.tf.SetGamma(c_original.tf.GetGamma())); + EXPECT_TRUE(c_original.tf.IsSame(c.tf)); + } + } +} + +TEST(ColorEncodingTest, CustomWhitePoint) { + ColorEncoding c; + // Nonsensical values + CIExy xy_in; + xy_in.x = 0.8; + xy_in.y = 0.01; + EXPECT_TRUE(c.SetWhitePoint(xy_in)); + const CIExy xy = c.GetWhitePoint(); + + ColorEncoding c2; + EXPECT_TRUE(c2.SetWhitePoint(xy)); + EXPECT_TRUE(c.SameColorSpace(c2)); +} + +TEST(ColorEncodingTest, CustomPrimaries) { + ColorEncoding c; + PrimariesCIExy xy_in; + // Nonsensical values + xy_in.r.x = -0.01; + xy_in.r.y = 0.2; + xy_in.g.x = 0.4; + xy_in.g.y = 0.401; + xy_in.b.x = 1.1; + xy_in.b.y = -1.2; + EXPECT_TRUE(c.SetPrimaries(xy_in)); + const PrimariesCIExy xy = c.GetPrimaries(); + + ColorEncoding c2; + EXPECT_TRUE(c2.SetPrimaries(xy)); + EXPECT_TRUE(c.SameColorSpace(c2)); +} + +TEST(ColorEncodingTest, CustomGamma) { + ColorEncoding c; +#ifndef JXL_CRASH_ON_ERROR + EXPECT_FALSE(c.tf.SetGamma(0.0)); + EXPECT_FALSE(c.tf.SetGamma(-1E-6)); + EXPECT_FALSE(c.tf.SetGamma(1.001)); +#endif + EXPECT_TRUE(c.tf.SetGamma(1.0)); + EXPECT_FALSE(c.tf.IsGamma()); + EXPECT_TRUE(c.tf.IsLinear()); + + EXPECT_TRUE(c.tf.SetGamma(0.123)); + EXPECT_TRUE(c.tf.IsGamma()); + const double gamma = c.tf.GetGamma(); + + ColorEncoding c2; + EXPECT_TRUE(c2.tf.SetGamma(gamma)); + EXPECT_TRUE(c.SameColorEncoding(c2)); + EXPECT_TRUE(c2.tf.IsGamma()); +} + +TEST(ColorEncodingTest, InternalExternalConversion) { + ColorEncoding source_internal; + JxlColorEncoding external; + ColorEncoding destination_internal; + + for (int i = 0; i < 100; i++) { + source_internal.SetColorSpace(static_cast(rand() % 4)); + CIExy wp; + wp.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + wp.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + EXPECT_TRUE(source_internal.SetWhitePoint(wp)); + if (source_internal.HasPrimaries()) { + PrimariesCIExy primaries; + primaries.r.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + primaries.r.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + primaries.g.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + primaries.g.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + primaries.b.x = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + primaries.b.y = (float(rand()) / float((RAND_MAX)) * 0.5) + 0.25; + EXPECT_TRUE(source_internal.SetPrimaries(primaries)); + } + CustomTransferFunction tf; + EXPECT_TRUE(tf.SetGamma((float(rand()) / float((RAND_MAX)) * 0.5) + 0.25)); + source_internal.tf = tf; + source_internal.rendering_intent = static_cast(rand() % 4); + + ConvertInternalToExternalColorEncoding(source_internal, &external); + EXPECT_TRUE(ConvertExternalToInternalColorEncoding(external, + &destination_internal)); + + EXPECT_EQ(source_internal.GetColorSpace(), + destination_internal.GetColorSpace()); + EXPECT_EQ(source_internal.white_point, destination_internal.white_point); + EXPECT_EQ(source_internal.GetWhitePoint().x, + destination_internal.GetWhitePoint().x); + EXPECT_EQ(source_internal.GetWhitePoint().y, + destination_internal.GetWhitePoint().y); + if (source_internal.HasPrimaries()) { + EXPECT_EQ(source_internal.GetPrimaries().r.x, + destination_internal.GetPrimaries().r.x); + EXPECT_EQ(source_internal.GetPrimaries().r.y, + destination_internal.GetPrimaries().r.y); + EXPECT_EQ(source_internal.GetPrimaries().g.x, + destination_internal.GetPrimaries().g.x); + EXPECT_EQ(source_internal.GetPrimaries().g.y, + destination_internal.GetPrimaries().g.y); + EXPECT_EQ(source_internal.GetPrimaries().b.x, + destination_internal.GetPrimaries().b.x); + EXPECT_EQ(source_internal.GetPrimaries().b.y, + destination_internal.GetPrimaries().b.y); + } + EXPECT_EQ(source_internal.tf.IsGamma(), destination_internal.tf.IsGamma()); + if (source_internal.tf.IsGamma()) { + EXPECT_EQ(source_internal.tf.GetGamma(), + destination_internal.tf.GetGamma()); + } else { + EXPECT_EQ(source_internal.tf.GetTransferFunction(), + destination_internal.tf.GetTransferFunction()); + } + EXPECT_EQ(source_internal.rendering_intent, + destination_internal.rendering_intent); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/color_management.cc b/third-party/libjxl/libjxl/lib/jxl/color_management.cc new file mode 100644 index 0000000000..9715d7f149 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/color_management.cc @@ -0,0 +1,877 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/color_management.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/color_management.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_tone_mapping-inl.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/matrix_ops.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/transfer_functions-inl.h" + +#ifndef JXL_ENABLE_3D_ICC_TONEMAPPING +#define JXL_ENABLE_3D_ICC_TONEMAPPING 1 +#endif + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +Status ToneMapPixel(const ColorEncoding& c, const float in[3], + uint8_t pcslab_out[3]) { + const PrimariesCIExy primaries = c.GetPrimaries(); + const CIExy white_point = c.GetWhitePoint(); + float primaries_XYZ[9]; + JXL_RETURN_IF_ERROR(PrimariesToXYZ( + primaries.r.x, primaries.r.y, primaries.g.x, primaries.g.y, primaries.b.x, + primaries.b.y, white_point.x, white_point.y, primaries_XYZ)); + const float luminances[3] = {primaries_XYZ[3], primaries_XYZ[4], + primaries_XYZ[5]}; + float linear[3]; + HWY_CAPPED(float, 1) d; + if (c.tf.IsPQ()) { + for (size_t i = 0; i < 3; ++i) { + linear[i] = TF_PQ().DisplayFromEncoded(in[i]); + } + } else { + for (size_t i = 0; i < 3; ++i) { + linear[i] = TF_HLG().DisplayFromEncoded(in[i]); + } + } + auto r = LoadU(d, &linear[0]), g = LoadU(d, &linear[1]), + b = LoadU(d, &linear[2]); + if (c.tf.IsPQ()) { + Rec2408ToneMapper tone_mapper({0, 10000}, {0, 250}, + luminances); + tone_mapper.ToneMap(&r, &g, &b); + } else { + HlgOOTF ootf(/*source_luminance=*/300, /*target_luminance=*/80, luminances); + ootf.Apply(&r, &g, &b); + } + GamutMap(&r, &g, &b, luminances, /*preserve_saturation=*/0.3f); + StoreU(r, d, &linear[0]); + StoreU(g, d, &linear[1]); + StoreU(b, d, &linear[2]); + + float chad[9]; + JXL_RETURN_IF_ERROR(AdaptToXYZD50(white_point.x, white_point.y, chad)); + float to_xyzd50[9]; + Mul3x3Matrix(chad, primaries_XYZ, to_xyzd50); + + float xyz[3] = {0, 0, 0}; + for (size_t xyz_c = 0; xyz_c < 3; ++xyz_c) { + for (size_t rgb_c = 0; rgb_c < 3; ++rgb_c) { + xyz[xyz_c] += linear[rgb_c] * to_xyzd50[3 * xyz_c + rgb_c]; + } + } + + const auto lab_f = [](const float x) { + static constexpr float kDelta = 6. / 29; + return x <= kDelta * kDelta * kDelta + ? x * (1 / (3 * kDelta * kDelta)) + 4.f / 29 + : std::cbrt(x); + }; + static constexpr float kXn = 0.964212; + static constexpr float kYn = 1; + static constexpr float kZn = 0.825188; + + const float f_x = lab_f(xyz[0] / kXn); + const float f_y = lab_f(xyz[1] / kYn); + const float f_z = lab_f(xyz[2] / kZn); + + pcslab_out[0] = + static_cast(.5f + 255.f * Clamp1(1.16f * f_y - .16f, 0.f, 1.f)); + pcslab_out[1] = static_cast( + .5f + 128.f + Clamp1(500 * (f_x - f_y), -128.f, 127.f)); + pcslab_out[2] = static_cast( + .5f + 128.f + Clamp1(200 * (f_y - f_z), -128.f, 127.f)); + + return true; +} + +std::vector CreateTableCurve(uint32_t N, const ExtraTF tf, + bool tone_map) { + // The generated PQ curve will make room for highlights up to this luminance. + // TODO(sboukortt): make this variable? + static constexpr float kPQIntensityTarget = 10000; + + JXL_ASSERT(N <= 4096); // ICC MFT2 only allows 4K entries + JXL_ASSERT(tf == ExtraTF::kPQ || tf == ExtraTF::kHLG); + + static constexpr float kLuminances[] = {1.f / 3, 1.f / 3, 1.f / 3}; + using D = HWY_CAPPED(float, 1); + Rec2408ToneMapper tone_mapper({0, kPQIntensityTarget}, + {0, kDefaultIntensityTarget}, kLuminances); + // No point using float - LCMS converts to 16-bit for A2B/MFT. + std::vector table(N); + for (uint32_t i = 0; i < N; ++i) { + const float x = static_cast(i) / (N - 1); // 1.0 at index N - 1. + const double dx = static_cast(x); + // LCMS requires EOTF (e.g. 2.4 exponent). + double y = (tf == ExtraTF::kHLG) ? TF_HLG().DisplayFromEncoded(dx) + : TF_PQ().DisplayFromEncoded(dx); + if (tone_map && tf == ExtraTF::kPQ && + kPQIntensityTarget > kDefaultIntensityTarget) { + D df; + auto r = Set(df, y * 10000 / kPQIntensityTarget), g = r, b = r; + tone_mapper.ToneMap(&r, &g, &b); + float fy; + StoreU(r, df, &fy); + y = fy; + } + JXL_ASSERT(y >= 0.0); + // Clamp to table range - necessary for HLG. + if (y > 1.0) y = 1.0; + // 1.0 corresponds to table value 0xFFFF. + table[i] = static_cast(roundf(y * 65535.0)); + } + return table; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +// Local functions. +HWY_EXPORT(ToneMapPixel); +HWY_EXPORT(CreateTableCurve); + +Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]) { + // Target Y = 1. + if (std::abs(xy.y) < 1e-12) return JXL_FAILURE("Y value is too small"); + const float factor = 1 / xy.y; + XYZ[0] = xy.x * factor; + XYZ[1] = 1; + XYZ[2] = (1 - xy.x - xy.y) * factor; + return true; +} + +namespace { + +constexpr bool kEnable3DToneMapping = JXL_ENABLE_3D_ICC_TONEMAPPING; + +bool CanToneMap(const ColorEncoding& encoding) { + // If the color space cannot be represented by a CICP tag in the ICC profile + // then the rest of the profile must unambiguously identify it; we have less + // freedom to do use it for tone mapping. + return encoding.GetColorSpace() == ColorSpace::kRGB && + encoding.HasPrimaries() && + (encoding.tf.IsPQ() || encoding.tf.IsHLG()) && + ((encoding.primaries == Primaries::kP3 && + (encoding.white_point == WhitePoint::kD65 || + encoding.white_point == WhitePoint::kDCI)) || + (encoding.primaries != Primaries::kCustom && + encoding.white_point == WhitePoint::kD65)); +} + +void ICCComputeMD5(const PaddedBytes& data, uint8_t sum[16]) + JXL_NO_SANITIZE("unsigned-integer-overflow") { + PaddedBytes data64 = data; + data64.push_back(128); + // Add bytes such that ((size + 8) & 63) == 0. + size_t extra = ((64 - ((data64.size() + 8) & 63)) & 63); + data64.resize(data64.size() + extra, 0); + for (uint64_t i = 0; i < 64; i += 8) { + data64.push_back(static_cast(data.size() << 3u) >> i); + } + + static const uint32_t sineparts[64] = { + 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee, 0xf57c0faf, 0x4787c62a, + 0xa8304613, 0xfd469501, 0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be, + 0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821, 0xf61e2562, 0xc040b340, + 0x265e5a51, 0xe9b6c7aa, 0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8, + 0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed, 0xa9e3e905, 0xfcefa3f8, + 0x676f02d9, 0x8d2a4c8a, 0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c, + 0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70, 0x289b7ec6, 0xeaa127fa, + 0xd4ef3085, 0x04881d05, 0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665, + 0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039, 0x655b59c3, 0x8f0ccc92, + 0xffeff47d, 0x85845dd1, 0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1, + 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391, + }; + static const uint32_t shift[64] = { + 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, + 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, + 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, + 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, + }; + + uint32_t a0 = 0x67452301, b0 = 0xefcdab89, c0 = 0x98badcfe, d0 = 0x10325476; + + for (size_t i = 0; i < data64.size(); i += 64) { + uint32_t a = a0, b = b0, c = c0, d = d0, f, g; + for (size_t j = 0; j < 64; j++) { + if (j < 16) { + f = (b & c) | ((~b) & d); + g = j; + } else if (j < 32) { + f = (d & b) | ((~d) & c); + g = (5 * j + 1) & 0xf; + } else if (j < 48) { + f = b ^ c ^ d; + g = (3 * j + 5) & 0xf; + } else { + f = c ^ (b | (~d)); + g = (7 * j) & 0xf; + } + uint32_t dg0 = data64[i + g * 4 + 0], dg1 = data64[i + g * 4 + 1], + dg2 = data64[i + g * 4 + 2], dg3 = data64[i + g * 4 + 3]; + uint32_t u = dg0 | (dg1 << 8u) | (dg2 << 16u) | (dg3 << 24u); + f += a + sineparts[j] + u; + a = d; + d = c; + c = b; + b += (f << shift[j]) | (f >> (32u - shift[j])); + } + a0 += a; + b0 += b; + c0 += c; + d0 += d; + } + sum[0] = a0; + sum[1] = a0 >> 8u; + sum[2] = a0 >> 16u; + sum[3] = a0 >> 24u; + sum[4] = b0; + sum[5] = b0 >> 8u; + sum[6] = b0 >> 16u; + sum[7] = b0 >> 24u; + sum[8] = c0; + sum[9] = c0 >> 8u; + sum[10] = c0 >> 16u; + sum[11] = c0 >> 24u; + sum[12] = d0; + sum[13] = d0 >> 8u; + sum[14] = d0 >> 16u; + sum[15] = d0 >> 24u; +} + +Status CreateICCChadMatrix(CIExy w, float result[9]) { + float m[9]; + if (w.y == 0) { // WhitePoint can not be pitch-black. + return JXL_FAILURE("Invalid WhitePoint"); + } + JXL_RETURN_IF_ERROR(AdaptToXYZD50(w.x, w.y, m)); + memcpy(result, m, sizeof(float) * 9); + return true; +} + +// Creates RGB to XYZ matrix given RGB primaries and whitepoint in xy. +Status CreateICCRGBMatrix(CIExy r, CIExy g, CIExy b, CIExy w, float result[9]) { + float m[9]; + JXL_RETURN_IF_ERROR( + PrimariesToXYZD50(r.x, r.y, g.x, g.y, b.x, b.y, w.x, w.y, m)); + memcpy(result, m, sizeof(float) * 9); + return true; +} + +void WriteICCUint32(uint32_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) { + if (icc->size() < pos + 4) icc->resize(pos + 4); + (*icc)[pos + 0] = (value >> 24u) & 255; + (*icc)[pos + 1] = (value >> 16u) & 255; + (*icc)[pos + 2] = (value >> 8u) & 255; + (*icc)[pos + 3] = value & 255; +} + +void WriteICCUint16(uint16_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) { + if (icc->size() < pos + 2) icc->resize(pos + 2); + (*icc)[pos + 0] = (value >> 8u) & 255; + (*icc)[pos + 1] = value & 255; +} + +void WriteICCUint8(uint8_t value, size_t pos, PaddedBytes* JXL_RESTRICT icc) { + if (icc->size() < pos + 1) icc->resize(pos + 1); + (*icc)[pos] = value; +} + +// Writes a 4-character tag +void WriteICCTag(const char* value, size_t pos, PaddedBytes* JXL_RESTRICT icc) { + if (icc->size() < pos + 4) icc->resize(pos + 4); + memcpy(icc->data() + pos, value, 4); +} + +Status WriteICCS15Fixed16(float value, size_t pos, + PaddedBytes* JXL_RESTRICT icc) { + // "nextafterf" for 32768.0f towards zero are: + // 32767.998046875, 32767.99609375, 32767.994140625 + // Even the first value works well,... + bool ok = (-32767.995f <= value) && (value <= 32767.995f); + if (!ok) return JXL_FAILURE("ICC value is out of range / NaN"); + int32_t i = value * 65536.0f + 0.5f; + // Use two's complement + uint32_t u = static_cast(i); + WriteICCUint32(u, pos, icc); + return true; +} + +Status CreateICCHeader(const ColorEncoding& c, + PaddedBytes* JXL_RESTRICT header) { + // TODO(lode): choose color management engine name, e.g. "skia" if + // integrated in skia. + static const char* kCmm = "jxl "; + + header->resize(128, 0); + + WriteICCUint32(0, 0, header); // size, correct value filled in at end + WriteICCTag(kCmm, 4, header); + WriteICCUint32(0x04400000u, 8, header); + const char* profile_type = + c.GetColorSpace() == ColorSpace::kXYB ? "scnr" : "mntr"; + WriteICCTag(profile_type, 12, header); + WriteICCTag(c.IsGray() ? "GRAY" : "RGB ", 16, header); + if (kEnable3DToneMapping && CanToneMap(c)) { + // We are going to use a 3D LUT for tone mapping, which will be more compact + // with an 8-bit LUT to CIELAB than with a 16-bit LUT to XYZ. 8-bit XYZ + // would not be viable due to XYZ being linear, whereas it is fine with + // CIELAB's ~cube root. + WriteICCTag("Lab ", 20, header); + } else { + WriteICCTag("XYZ ", 20, header); + } + + // Three uint32_t's date/time encoding. + // TODO(lode): encode actual date and time, this is a placeholder + uint32_t year = 2019, month = 12, day = 1; + uint32_t hour = 0, minute = 0, second = 0; + WriteICCUint16(year, 24, header); + WriteICCUint16(month, 26, header); + WriteICCUint16(day, 28, header); + WriteICCUint16(hour, 30, header); + WriteICCUint16(minute, 32, header); + WriteICCUint16(second, 34, header); + + WriteICCTag("acsp", 36, header); + WriteICCTag("APPL", 40, header); + WriteICCUint32(0, 44, header); // flags + WriteICCUint32(0, 48, header); // device manufacturer + WriteICCUint32(0, 52, header); // device model + WriteICCUint32(0, 56, header); // device attributes + WriteICCUint32(0, 60, header); // device attributes + WriteICCUint32(static_cast(c.rendering_intent), 64, header); + + // Mandatory D50 white point of profile connection space + WriteICCUint32(0x0000f6d6, 68, header); + WriteICCUint32(0x00010000, 72, header); + WriteICCUint32(0x0000d32d, 76, header); + + WriteICCTag(kCmm, 80, header); + + return true; +} + +void AddToICCTagTable(const char* tag, size_t offset, size_t size, + PaddedBytes* JXL_RESTRICT tagtable, + std::vector* offsets) { + WriteICCTag(tag, tagtable->size(), tagtable); + // writing true offset deferred to later + WriteICCUint32(0, tagtable->size(), tagtable); + offsets->push_back(offset); + WriteICCUint32(size, tagtable->size(), tagtable); +} + +void FinalizeICCTag(PaddedBytes* JXL_RESTRICT tags, size_t* offset, + size_t* size) { + while ((tags->size() & 3) != 0) { + tags->push_back(0); + } + *offset += *size; + *size = tags->size() - *offset; +} + +// The input text must be ASCII, writing other characters to UTF-16 is not +// implemented. +void CreateICCMlucTag(const std::string& text, PaddedBytes* JXL_RESTRICT tags) { + WriteICCTag("mluc", tags->size(), tags); + WriteICCUint32(0, tags->size(), tags); + WriteICCUint32(1, tags->size(), tags); + WriteICCUint32(12, tags->size(), tags); + WriteICCTag("enUS", tags->size(), tags); + WriteICCUint32(text.size() * 2, tags->size(), tags); + WriteICCUint32(28, tags->size(), tags); + for (size_t i = 0; i < text.size(); i++) { + tags->push_back(0); // prepend 0 for UTF-16 + tags->push_back(text[i]); + } +} + +Status CreateICCXYZTag(float xyz[3], PaddedBytes* JXL_RESTRICT tags) { + WriteICCTag("XYZ ", tags->size(), tags); + WriteICCUint32(0, tags->size(), tags); + for (size_t i = 0; i < 3; ++i) { + JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(xyz[i], tags->size(), tags)); + } + return true; +} + +Status CreateICCChadTag(float chad[9], PaddedBytes* JXL_RESTRICT tags) { + WriteICCTag("sf32", tags->size(), tags); + WriteICCUint32(0, tags->size(), tags); + for (size_t i = 0; i < 9; i++) { + JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(chad[i], tags->size(), tags)); + } + return true; +} + +void MaybeCreateICCCICPTag(const ColorEncoding& c, + PaddedBytes* JXL_RESTRICT tags, size_t* offset, + size_t* size, PaddedBytes* JXL_RESTRICT tagtable, + std::vector* offsets) { + if (c.GetColorSpace() != ColorSpace::kRGB) { + return; + } + uint8_t primaries = 0; + if (c.primaries == Primaries::kP3) { + if (c.white_point == WhitePoint::kD65) { + primaries = 12; + } else if (c.white_point == WhitePoint::kDCI) { + primaries = 11; + } else { + return; + } + } else if (c.primaries != Primaries::kCustom && + c.white_point == WhitePoint::kD65) { + primaries = static_cast(c.primaries); + } else { + return; + } + if (c.tf.IsUnknown() || c.tf.IsGamma()) { + return; + } + WriteICCTag("cicp", tags->size(), tags); + WriteICCUint32(0, tags->size(), tags); + WriteICCUint8(primaries, tags->size(), tags); + WriteICCUint8(static_cast(c.tf.GetTransferFunction()), tags->size(), + tags); + // Matrix + WriteICCUint8(0, tags->size(), tags); + // Full range + WriteICCUint8(1, tags->size(), tags); + FinalizeICCTag(tags, offset, size); + AddToICCTagTable("cicp", *offset, *size, tagtable, offsets); +} + +void CreateICCCurvCurvTag(const std::vector& curve, + PaddedBytes* JXL_RESTRICT tags) { + size_t pos = tags->size(); + tags->resize(tags->size() + 12 + curve.size() * 2, 0); + WriteICCTag("curv", pos, tags); + WriteICCUint32(0, pos + 4, tags); + WriteICCUint32(curve.size(), pos + 8, tags); + for (size_t i = 0; i < curve.size(); i++) { + WriteICCUint16(curve[i], pos + 12 + i * 2, tags); + } +} + +// Writes 12 + 4*params.size() bytes +Status CreateICCCurvParaTag(std::vector params, size_t curve_type, + PaddedBytes* JXL_RESTRICT tags) { + WriteICCTag("para", tags->size(), tags); + WriteICCUint32(0, tags->size(), tags); + WriteICCUint16(curve_type, tags->size(), tags); + WriteICCUint16(0, tags->size(), tags); + for (size_t i = 0; i < params.size(); i++) { + JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(params[i], tags->size(), tags)); + } + return true; +} + +Status CreateICCLutAtoBTagForXYB(PaddedBytes* JXL_RESTRICT tags) { + WriteICCTag("mAB ", tags->size(), tags); + // 4 reserved bytes set to 0 + WriteICCUint32(0, tags->size(), tags); + // number of input channels + WriteICCUint8(3, tags->size(), tags); + // number of output channels + WriteICCUint8(3, tags->size(), tags); + // 2 reserved bytes for padding + WriteICCUint16(0, tags->size(), tags); + // offset to first B curve + WriteICCUint32(32, tags->size(), tags); + // offset to matrix + WriteICCUint32(244, tags->size(), tags); + // offset to first M curve + WriteICCUint32(148, tags->size(), tags); + // offset to CLUT + WriteICCUint32(80, tags->size(), tags); + // offset to first A curve + // (reuse linear B curves) + WriteICCUint32(32, tags->size(), tags); + + // offset = 32 + // no-op curves + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags)); + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags)); + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags)); + // offset = 80 + // number of grid points for each input channel + for (int i = 0; i < 16; ++i) { + WriteICCUint8(i < 3 ? 2 : 0, tags->size(), tags); + } + // precision = 2 + WriteICCUint8(2, tags->size(), tags); + // 3 bytes of padding + WriteICCUint8(0, tags->size(), tags); + WriteICCUint16(0, tags->size(), tags); + const float kOffsets[3] = { + kScaledXYBOffset[0] + kScaledXYBOffset[1], + kScaledXYBOffset[1] - kScaledXYBOffset[0] + 1.0f / kScaledXYBScale[0], + kScaledXYBOffset[1] + kScaledXYBOffset[2]}; + const float kScaling[3] = { + 1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]), + 1.0f / (1.0f / kScaledXYBScale[0] + 1.0f / kScaledXYBScale[1]), + 1.0f / (1.0f / kScaledXYBScale[1] + 1.0f / kScaledXYBScale[2])}; + // 2*2*2*3 entries of 2 bytes each = 48 bytes + for (size_t ix = 0; ix < 2; ++ix) { + for (size_t iy = 0; iy < 2; ++iy) { + for (size_t ib = 0; ib < 2; ++ib) { + float in_f[3] = {ix * 1.0f, iy * 1.0f, ib * 1.0f}; + for (size_t c = 0; c < 3; ++c) { + in_f[c] /= kScaledXYBScale[c]; + in_f[c] -= kScaledXYBOffset[c]; + } + float out_f[3]; + out_f[0] = in_f[1] + in_f[0]; + out_f[1] = in_f[1] - in_f[0]; + out_f[2] = in_f[2] + in_f[1]; + for (int i = 0; i < 3; ++i) { + out_f[i] += kOffsets[i]; + out_f[i] *= kScaling[i]; + } + for (int i = 0; i < 3; ++i) { + JXL_RETURN_IF_ERROR(out_f[i] >= 0.f && out_f[i] <= 1.f); + uint16_t val = static_cast( + 0.5f + 65535 * std::max(0.f, std::min(1.f, out_f[i]))); + WriteICCUint16(val, tags->size(), tags); + } + } + } + } + // offset = 148 + // 3 curves with 5 parameters = 3 * (12 + 5 * 4) = 96 bytes + for (size_t i = 0; i < 3; ++i) { + const float b = + -kOffsets[i] - std::cbrt(jxl::kNegOpsinAbsorbanceBiasRGB[i]); + std::vector params = { + 3, + 1.0f / kScaling[i], + b, + 0, // unused + std::max(0.f, -b * kScaling[i]), // make skcms happy + }; + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag(params, 3, tags)); + } + // offset = 244 + const double matrix[] = {1.5170095, -1.1065225, 0.071623, + -0.050022, 0.5683655, -0.018344, + -1.387676, 1.1145555, 0.6857255}; + // 12 * 4 = 48 bytes + for (size_t i = 0; i < 9; ++i) { + JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(matrix[i], tags->size(), tags)); + } + for (size_t i = 0; i < 3; ++i) { + float intercept = 0; + for (size_t j = 0; j < 3; ++j) { + intercept += matrix[i * 3 + j] * jxl::kNegOpsinAbsorbanceBiasRGB[j]; + } + JXL_RETURN_IF_ERROR(WriteICCS15Fixed16(intercept, tags->size(), tags)); + } + return true; +} + +Status CreateICCLutAtoBTagForHDR(ColorEncoding c, + PaddedBytes* JXL_RESTRICT tags) { + static constexpr size_t k3DLutDim = 9; + WriteICCTag("mft1", tags->size(), tags); + // 4 reserved bytes set to 0 + WriteICCUint32(0, tags->size(), tags); + // number of input channels + WriteICCUint8(3, tags->size(), tags); + // number of output channels + WriteICCUint8(3, tags->size(), tags); + // number of CLUT grid points + WriteICCUint8(k3DLutDim, tags->size(), tags); + // 1 reserved bytes for padding + WriteICCUint8(0, tags->size(), tags); + + // Matrix (per specification, must be identity if input is not XYZ) + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 3; ++j) { + JXL_RETURN_IF_ERROR( + WriteICCS15Fixed16(i == j ? 1.f : 0.f, tags->size(), tags)); + } + } + + // Input tables + for (size_t c = 0; c < 3; ++c) { + for (size_t i = 0; i < 256; ++i) { + WriteICCUint8(i, tags->size(), tags); + } + } + + for (size_t ix = 0; ix < k3DLutDim; ++ix) { + for (size_t iy = 0; iy < k3DLutDim; ++iy) { + for (size_t ib = 0; ib < k3DLutDim; ++ib) { + float f[3] = {ix * (1.0f / (k3DLutDim - 1)), + iy * (1.0f / (k3DLutDim - 1)), + ib * (1.0f / (k3DLutDim - 1))}; + uint8_t pcslab_out[3]; + JXL_RETURN_IF_ERROR( + HWY_DYNAMIC_DISPATCH(ToneMapPixel)(c, f, pcslab_out)); + for (uint8_t val : pcslab_out) { + WriteICCUint8(val, tags->size(), tags); + } + } + } + } + + // Output tables + for (size_t c = 0; c < 3; ++c) { + for (size_t i = 0; i < 256; ++i) { + WriteICCUint8(i, tags->size(), tags); + } + } + + return true; +} + +// Some software (Apple Safari, Preview) requires this. +Status CreateICCNoOpBToATag(PaddedBytes* JXL_RESTRICT tags) { + WriteICCTag("mBA ", tags->size(), tags); + // 4 reserved bytes set to 0 + WriteICCUint32(0, tags->size(), tags); + // number of input channels + WriteICCUint8(3, tags->size(), tags); + // number of output channels + WriteICCUint8(3, tags->size(), tags); + // 2 reserved bytes for padding + WriteICCUint16(0, tags->size(), tags); + // offset to first B curve + WriteICCUint32(32, tags->size(), tags); + // offset to matrix + WriteICCUint32(0, tags->size(), tags); + // offset to first M curve + WriteICCUint32(0, tags->size(), tags); + // offset to CLUT + WriteICCUint32(0, tags->size(), tags); + // offset to first A curve + WriteICCUint32(0, tags->size(), tags); + + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags)); + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags)); + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({1.0f}, 0, tags)); + + return true; +} + +} // namespace + +Status MaybeCreateProfile(const ColorEncoding& c, + PaddedBytes* JXL_RESTRICT icc) { + PaddedBytes header, tagtable, tags; + + if (c.GetColorSpace() == ColorSpace::kUnknown || c.tf.IsUnknown()) { + return false; // Not an error + } + + switch (c.GetColorSpace()) { + case ColorSpace::kRGB: + case ColorSpace::kGray: + case ColorSpace::kXYB: + break; // OK + default: + return JXL_FAILURE("Invalid CS %u", + static_cast(c.GetColorSpace())); + } + + if (c.GetColorSpace() == ColorSpace::kXYB && + c.rendering_intent != RenderingIntent::kPerceptual) { + return JXL_FAILURE( + "Only perceptual rendering intent implemented for XYB " + "ICC profile."); + } + + JXL_RETURN_IF_ERROR(CreateICCHeader(c, &header)); + + std::vector offsets; + // tag count, deferred to later + WriteICCUint32(0, tagtable.size(), &tagtable); + + size_t tag_offset = 0, tag_size = 0; + + CreateICCMlucTag(Description(c), &tags); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("desc", tag_offset, tag_size, &tagtable, &offsets); + + const std::string copyright = "CC0"; + CreateICCMlucTag(copyright, &tags); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("cprt", tag_offset, tag_size, &tagtable, &offsets); + + // TODO(eustas): isn't it the other way round: gray image has d50 WhitePoint? + if (c.IsGray()) { + float wtpt[3]; + JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(c.GetWhitePoint(), wtpt)); + JXL_RETURN_IF_ERROR(CreateICCXYZTag(wtpt, &tags)); + } else { + float d50[3] = {0.964203, 1.0, 0.824905}; + JXL_RETURN_IF_ERROR(CreateICCXYZTag(d50, &tags)); + } + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("wtpt", tag_offset, tag_size, &tagtable, &offsets); + + if (!c.IsGray()) { + // Chromatic adaptation matrix + float chad[9]; + JXL_RETURN_IF_ERROR(CreateICCChadMatrix(c.GetWhitePoint(), chad)); + + JXL_RETURN_IF_ERROR(CreateICCChadTag(chad, &tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("chad", tag_offset, tag_size, &tagtable, &offsets); + } + + if (c.GetColorSpace() == ColorSpace::kRGB) { + MaybeCreateICCCICPTag(c, &tags, &tag_offset, &tag_size, &tagtable, + &offsets); + + const PrimariesCIExy primaries = c.GetPrimaries(); + float m[9]; + JXL_RETURN_IF_ERROR(CreateICCRGBMatrix(primaries.r, primaries.g, + primaries.b, c.GetWhitePoint(), m)); + float r[3] = {m[0], m[3], m[6]}; + float g[3] = {m[1], m[4], m[7]}; + float b[3] = {m[2], m[5], m[8]}; + + JXL_RETURN_IF_ERROR(CreateICCXYZTag(r, &tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("rXYZ", tag_offset, tag_size, &tagtable, &offsets); + + JXL_RETURN_IF_ERROR(CreateICCXYZTag(g, &tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("gXYZ", tag_offset, tag_size, &tagtable, &offsets); + + JXL_RETURN_IF_ERROR(CreateICCXYZTag(b, &tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("bXYZ", tag_offset, tag_size, &tagtable, &offsets); + } + + if (c.GetColorSpace() == ColorSpace::kXYB) { + JXL_RETURN_IF_ERROR(CreateICCLutAtoBTagForXYB(&tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("A2B0", tag_offset, tag_size, &tagtable, &offsets); + JXL_RETURN_IF_ERROR(CreateICCNoOpBToATag(&tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("B2A0", tag_offset, tag_size, &tagtable, &offsets); + } else if (kEnable3DToneMapping && CanToneMap(c)) { + JXL_RETURN_IF_ERROR(CreateICCLutAtoBTagForHDR(c, &tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("A2B0", tag_offset, tag_size, &tagtable, &offsets); + JXL_RETURN_IF_ERROR(CreateICCNoOpBToATag(&tags)); + FinalizeICCTag(&tags, &tag_offset, &tag_size); + AddToICCTagTable("B2A0", tag_offset, tag_size, &tagtable, &offsets); + } else { + if (c.tf.IsGamma()) { + float gamma = 1.0 / c.tf.GetGamma(); + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag({gamma}, 0, &tags)); + } else if (c.GetColorSpace() != ColorSpace::kXYB) { + switch (c.tf.GetTransferFunction()) { + case TransferFunction::kHLG: + CreateICCCurvCurvTag(HWY_DYNAMIC_DISPATCH(CreateTableCurve)( + 64, ExtraTF::kHLG, CanToneMap(c)), + &tags); + break; + case TransferFunction::kPQ: + CreateICCCurvCurvTag(HWY_DYNAMIC_DISPATCH(CreateTableCurve)( + 64, ExtraTF::kPQ, CanToneMap(c)), + &tags); + break; + case TransferFunction::kSRGB: + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag( + {2.4, 1.0 / 1.055, 0.055 / 1.055, 1.0 / 12.92, 0.04045}, 3, + &tags)); + break; + case TransferFunction::k709: + JXL_RETURN_IF_ERROR(CreateICCCurvParaTag( + {1.0 / 0.45, 1.0 / 1.099, 0.099 / 1.099, 1.0 / 4.5, 0.081}, 3, + &tags)); + break; + case TransferFunction::kLinear: + JXL_RETURN_IF_ERROR( + CreateICCCurvParaTag({1.0, 1.0, 0.0, 1.0, 0.0}, 3, &tags)); + break; + case TransferFunction::kDCI: + JXL_RETURN_IF_ERROR( + CreateICCCurvParaTag({2.6, 1.0, 0.0, 1.0, 0.0}, 3, &tags)); + break; + default: + JXL_UNREACHABLE("Unknown TF %u", static_cast( + c.tf.GetTransferFunction())); + } + } + FinalizeICCTag(&tags, &tag_offset, &tag_size); + if (c.IsGray()) { + AddToICCTagTable("kTRC", tag_offset, tag_size, &tagtable, &offsets); + } else { + AddToICCTagTable("rTRC", tag_offset, tag_size, &tagtable, &offsets); + AddToICCTagTable("gTRC", tag_offset, tag_size, &tagtable, &offsets); + AddToICCTagTable("bTRC", tag_offset, tag_size, &tagtable, &offsets); + } + } + + // Tag count + WriteICCUint32(offsets.size(), 0, &tagtable); + for (size_t i = 0; i < offsets.size(); i++) { + WriteICCUint32(offsets[i] + header.size() + tagtable.size(), 4 + 12 * i + 4, + &tagtable); + } + + // ICC profile size + WriteICCUint32(header.size() + tagtable.size() + tags.size(), 0, &header); + + *icc = header; + icc->append(tagtable); + icc->append(tags); + + // The MD5 checksum must be computed on the profile with profile flags, + // rendering intent, and region of the checksum itself, set to 0. + // TODO(lode): manually verify with a reliable tool that this creates correct + // signature (profile id) for ICC profiles. + PaddedBytes icc_sum = *icc; + if (icc_sum.size() >= 64 + 4) { + memset(icc_sum.data() + 44, 0, 4); + memset(icc_sum.data() + 64, 0, 4); + } + uint8_t checksum[16]; + ICCComputeMD5(icc_sum, checksum); + + memcpy(icc->data() + 84, checksum, sizeof(checksum)); + + return true; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/color_management.h b/third-party/libjxl/libjxl/lib/jxl/color_management.h new file mode 100644 index 0000000000..f623aa1c90 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/color_management.h @@ -0,0 +1,40 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_COLOR_MANAGEMENT_H_ +#define LIB_JXL_COLOR_MANAGEMENT_H_ + +// ICC profiles and color space conversions. + +#include +#include + +#include + +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" + +namespace jxl { + +enum class ExtraTF { + kNone, + kPQ, + kHLG, + kSRGB, +}; + +// NOTE: for XYB colorspace, the created profile can be used to transform a +// *scaled* XYB image (created by ScaleXYB()) to another colorspace. +Status MaybeCreateProfile(const ColorEncoding& c, + PaddedBytes* JXL_RESTRICT icc); + +Status CIEXYZFromWhiteCIExy(const CIExy& xy, float XYZ[3]); + +} // namespace jxl + +#endif // LIB_JXL_COLOR_MANAGEMENT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/color_management_test.cc b/third-party/libjxl/libjxl/lib/jxl/color_management_test.cc new file mode 100644 index 0000000000..69c9f83499 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/color_management_test.cc @@ -0,0 +1,435 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/color_management.h" + +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_xyb.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +std::ostream& operator<<(std::ostream& os, const CIExy& xy) { + return os << "{x=" << xy.x << ", y=" << xy.y << "}"; +} + +std::ostream& operator<<(std::ostream& os, const PrimariesCIExy& primaries) { + return os << "{r=" << primaries.r << ", g=" << primaries.g + << ", b=" << primaries.b << "}"; +} + +namespace { + +using ::testing::ElementsAre; +using ::testing::FloatNear; + +// Small enough to be fast. If changed, must update Generate*. +static constexpr size_t kWidth = 16; + +static constexpr size_t kNumThreads = 1; // only have a single row. + +MATCHER_P(HasSameFieldsAs, expected, "") { + if (arg.rendering_intent != expected.rendering_intent) { + *result_listener << "which has a different rendering intent: " + << ToString(arg.rendering_intent) << " instead of " + << ToString(expected.rendering_intent); + return false; + } + if (arg.GetColorSpace() != expected.GetColorSpace()) { + *result_listener << "which has a different color space: " + << ToString(arg.GetColorSpace()) << " instead of " + << ToString(expected.GetColorSpace()); + return false; + } + if (arg.white_point != expected.white_point) { + *result_listener << "which has a different white point: " + << ToString(arg.white_point) << " instead of " + << ToString(expected.white_point); + return false; + } + if (arg.HasPrimaries() && arg.primaries != expected.primaries) { + *result_listener << "which has different primaries: " + << ToString(arg.primaries) << " instead of " + << ToString(expected.primaries); + return false; + } + if (!arg.tf.IsSame(expected.tf)) { + static const auto tf_to_string = [](const CustomTransferFunction& tf) { + if (tf.IsGamma()) { + return "g" + ToString(tf.GetGamma()); + } + return ToString(tf.GetTransferFunction()); + }; + *result_listener << "which has a different transfer function: " + << tf_to_string(arg.tf) << " instead of " + << tf_to_string(expected.tf); + return false; + } + return true; +} + +struct Globals { + // TODO(deymo): Make this a const. + static Globals* GetInstance() { + static Globals ret; + return &ret; + } + + private: + Globals() { + in_gray = GenerateGray(); + in_color = GenerateColor(); + out_gray = ImageF(kWidth, 1); + out_color = ImageF(kWidth * 3, 1); + + c_native = ColorEncoding::LinearSRGB(/*is_gray=*/false); + c_gray = ColorEncoding::LinearSRGB(/*is_gray=*/true); + } + + static ImageF GenerateGray() { + ImageF gray(kWidth, 1); + float* JXL_RESTRICT row = gray.Row(0); + // Increasing left to right + for (uint32_t x = 0; x < kWidth; ++x) { + row[x] = x * 1.0f / (kWidth - 1); // [0, 1] + } + return gray; + } + + static ImageF GenerateColor() { + ImageF image(kWidth * 3, 1); + float* JXL_RESTRICT interleaved = image.Row(0); + std::fill(interleaved, interleaved + kWidth * 3, 0.0f); + + // [0, 4): neutral + for (int32_t x = 0; x < 4; ++x) { + interleaved[3 * x + 0] = x * 1.0f / 3; // [0, 1] + interleaved[3 * x + 2] = interleaved[3 * x + 1] = interleaved[3 * x + 0]; + } + + // [4, 13): pure RGB with low/medium/high saturation + for (int32_t c = 0; c < 3; ++c) { + interleaved[3 * (4 + c) + c] = 0.08f + c * 0.01f; + interleaved[3 * (7 + c) + c] = 0.75f + c * 0.01f; + interleaved[3 * (10 + c) + c] = 1.0f; + } + + // [13, 16): impure, not quite saturated RGB + interleaved[3 * 13 + 0] = 0.86f; + interleaved[3 * 13 + 2] = interleaved[3 * 13 + 1] = 0.16f; + interleaved[3 * 14 + 1] = 0.87f; + interleaved[3 * 14 + 2] = interleaved[3 * 14 + 0] = 0.16f; + interleaved[3 * 15 + 2] = 0.88f; + interleaved[3 * 15 + 1] = interleaved[3 * 15 + 0] = 0.16f; + + return image; + } + + public: + // ImageF so we can use VerifyRelativeError; all are interleaved RGB. + ImageF in_gray; + ImageF in_color; + ImageF out_gray; + ImageF out_color; + ColorEncoding c_native; + ColorEncoding c_gray; +}; + +class ColorManagementTest + : public ::testing::TestWithParam { + public: + // "Same" pixels after converting g->c_native -> c -> g->c_native. + static void VerifyPixelRoundTrip(const ColorEncoding& c) { + Globals* g = Globals::GetInstance(); + const ColorEncoding& c_native = c.IsGray() ? g->c_gray : g->c_native; + const JxlCmsInterface& cms = GetJxlCms(); + ColorSpaceTransform xform_fwd(cms); + ColorSpaceTransform xform_rev(cms); + const float intensity_target = + c.tf.IsHLG() ? 1000 : kDefaultIntensityTarget; + ASSERT_TRUE( + xform_fwd.Init(c_native, c, intensity_target, kWidth, kNumThreads)); + ASSERT_TRUE( + xform_rev.Init(c, c_native, intensity_target, kWidth, kNumThreads)); + + const size_t thread = 0; + const ImageF& in = c.IsGray() ? g->in_gray : g->in_color; + ImageF* JXL_RESTRICT out = c.IsGray() ? &g->out_gray : &g->out_color; + ASSERT_TRUE(xform_fwd.Run(thread, in.Row(0), xform_fwd.BufDst(thread))); + ASSERT_TRUE(xform_rev.Run(thread, xform_fwd.BufDst(thread), out->Row(0))); + +#if JPEGXL_ENABLE_SKCMS + double max_l1 = 7E-4; + double max_rel = 4E-7; +#else + double max_l1 = 5E-5; + // Most are lower; reached 3E-7 with D60 AP0. + double max_rel = 4E-7; +#endif + if (c.IsGray()) max_rel = 2E-5; + JXL_ASSERT_OK(VerifyRelativeError(in, *out, max_l1, max_rel, _)); + } +}; +JXL_GTEST_INSTANTIATE_TEST_SUITE_P(ColorManagementTestInstantiation, + ColorManagementTest, + ::testing::ValuesIn(test::AllEncodings())); + +// Exercises the ColorManagement interface for ALL ColorEncoding synthesizable +// via enums. +TEST_P(ColorManagementTest, VerifyAllProfiles) { + ColorEncoding c = ColorEncodingFromDescriptor(GetParam()); + printf("%s\n", Description(c).c_str()); + + // Can create profile. + ASSERT_TRUE(c.CreateICC()); + + // Can set an equivalent ColorEncoding from the generated ICC profile. + ColorEncoding c3; + ASSERT_TRUE(c3.SetICC(PaddedBytes(c.ICC()), &GetJxlCms())); + EXPECT_THAT(c3, HasSameFieldsAs(c)); + + VerifyPixelRoundTrip(c); +} + +testing::Matcher CIExyIs(const double x, const double y) { + static constexpr double kMaxError = 1e-4; + return testing::AllOf( + testing::Field(&CIExy::x, testing::DoubleNear(x, kMaxError)), + testing::Field(&CIExy::y, testing::DoubleNear(y, kMaxError))); +} + +testing::Matcher PrimariesAre( + const testing::Matcher& r, const testing::Matcher& g, + const testing::Matcher& b) { + return testing::AllOf(testing::Field(&PrimariesCIExy::r, r), + testing::Field(&PrimariesCIExy::g, g), + testing::Field(&PrimariesCIExy::b, b)); +} + +TEST_F(ColorManagementTest, sRGBChromaticity) { + const ColorEncoding sRGB = ColorEncoding::SRGB(); + EXPECT_THAT(sRGB.GetWhitePoint(), CIExyIs(0.3127, 0.3290)); + EXPECT_THAT(sRGB.GetPrimaries(), + PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60), + CIExyIs(0.15, 0.06))); +} + +TEST_F(ColorManagementTest, D2700Chromaticity) { + PaddedBytes icc = + jxl::test::ReadTestData("jxl/color_management/sRGB-D2700.icc"); + ColorEncoding sRGB_D2700; + ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc), &GetJxlCms())); + + EXPECT_THAT(sRGB_D2700.GetWhitePoint(), CIExyIs(0.45986, 0.41060)); + // The illuminant-relative chromaticities of this profile's primaries are the + // same as for sRGB. It is the PCS-relative chromaticities that would be + // different. + EXPECT_THAT(sRGB_D2700.GetPrimaries(), + PrimariesAre(CIExyIs(0.64, 0.33), CIExyIs(0.30, 0.60), + CIExyIs(0.15, 0.06))); +} + +TEST_F(ColorManagementTest, D2700ToSRGB) { + const JxlCmsInterface& cms = GetJxlCms(); + PaddedBytes icc = + jxl::test::ReadTestData("jxl/color_management/sRGB-D2700.icc"); + ColorEncoding sRGB_D2700; + ASSERT_TRUE(sRGB_D2700.SetICC(std::move(icc), &cms)); + + ColorSpaceTransform transform(cms); + ASSERT_TRUE(transform.Init(sRGB_D2700, ColorEncoding::SRGB(), + kDefaultIntensityTarget, 1, 1)); + const float sRGB_D2700_values[3] = {0.863, 0.737, 0.490}; + float sRGB_values[3]; + ASSERT_TRUE(transform.Run(0, sRGB_D2700_values, sRGB_values)); + EXPECT_THAT(sRGB_values, + ElementsAre(FloatNear(0.914, 1e-3), FloatNear(0.745, 1e-3), + FloatNear(0.601, 1e-3))); +} + +TEST_F(ColorManagementTest, P3HlgTo2020Hlg) { + ColorEncoding p3_hlg; + p3_hlg.SetColorSpace(ColorSpace::kRGB); + p3_hlg.white_point = WhitePoint::kD65; + p3_hlg.primaries = Primaries::kP3; + p3_hlg.tf.SetTransferFunction(TransferFunction::kHLG); + ASSERT_TRUE(p3_hlg.CreateICC()); + + ColorEncoding rec2020_hlg = p3_hlg; + rec2020_hlg.primaries = Primaries::k2100; + ASSERT_TRUE(rec2020_hlg.CreateICC()); + + ColorSpaceTransform transform(GetJxlCms()); + ASSERT_TRUE(transform.Init(p3_hlg, rec2020_hlg, 1000, 1, 1)); + const float p3_hlg_values[3] = {0., 0.75, 0.}; + float rec2020_hlg_values[3]; + ASSERT_TRUE(transform.Run(0, p3_hlg_values, rec2020_hlg_values)); + EXPECT_THAT(rec2020_hlg_values, + ElementsAre(FloatNear(0.3973, 1e-4), FloatNear(0.7382, 1e-4), + FloatNear(0.1183, 1e-4))); +} + +TEST_F(ColorManagementTest, HlgOotf) { + ColorEncoding p3_hlg; + p3_hlg.SetColorSpace(ColorSpace::kRGB); + p3_hlg.white_point = WhitePoint::kD65; + p3_hlg.primaries = Primaries::kP3; + p3_hlg.tf.SetTransferFunction(TransferFunction::kHLG); + ASSERT_TRUE(p3_hlg.CreateICC()); + + ColorSpaceTransform transform_to_1000(GetJxlCms()); + ASSERT_TRUE( + transform_to_1000.Init(p3_hlg, ColorEncoding::LinearSRGB(), 1000, 1, 1)); + // HDR reference white: https://www.itu.int/pub/R-REP-BT.2408-4-2021 + float p3_hlg_values[3] = {0.75, 0.75, 0.75}; + float linear_srgb_values[3]; + ASSERT_TRUE(transform_to_1000.Run(0, p3_hlg_values, linear_srgb_values)); + // On a 1000-nit display, HDR reference white should be 203 cd/m² which is + // 0.203 times the maximum. + EXPECT_THAT(linear_srgb_values, + ElementsAre(FloatNear(0.203, 1e-3), FloatNear(0.203, 1e-3), + FloatNear(0.203, 1e-3))); + + ColorSpaceTransform transform_to_400(GetJxlCms()); + ASSERT_TRUE( + transform_to_400.Init(p3_hlg, ColorEncoding::LinearSRGB(), 400, 1, 1)); + ASSERT_TRUE(transform_to_400.Run(0, p3_hlg_values, linear_srgb_values)); + // On a 400-nit display, it should be 100 cd/m². + EXPECT_THAT(linear_srgb_values, + ElementsAre(FloatNear(0.250, 1e-3), FloatNear(0.250, 1e-3), + FloatNear(0.250, 1e-3))); + + p3_hlg_values[2] = 0.50; + ASSERT_TRUE(transform_to_1000.Run(0, p3_hlg_values, linear_srgb_values)); + EXPECT_THAT(linear_srgb_values, + ElementsAre(FloatNear(0.201, 1e-3), FloatNear(0.201, 1e-3), + FloatNear(0.050, 1e-3))); + + ColorSpaceTransform transform_from_400(GetJxlCms()); + ASSERT_TRUE( + transform_from_400.Init(ColorEncoding::LinearSRGB(), p3_hlg, 400, 1, 1)); + linear_srgb_values[0] = linear_srgb_values[1] = linear_srgb_values[2] = 0.250; + ASSERT_TRUE(transform_from_400.Run(0, linear_srgb_values, p3_hlg_values)); + EXPECT_THAT(p3_hlg_values, + ElementsAre(FloatNear(0.75, 1e-3), FloatNear(0.75, 1e-3), + FloatNear(0.75, 1e-3))); + + ColorEncoding grayscale_hlg; + grayscale_hlg.SetColorSpace(ColorSpace::kGray); + grayscale_hlg.white_point = WhitePoint::kD65; + grayscale_hlg.tf.SetTransferFunction(TransferFunction::kHLG); + ASSERT_TRUE(grayscale_hlg.CreateICC()); + + ColorSpaceTransform grayscale_transform(GetJxlCms()); + ASSERT_TRUE(grayscale_transform.Init( + grayscale_hlg, ColorEncoding::LinearSRGB(/*is_gray=*/true), 1000, 1, 1)); + const float grayscale_hlg_value = 0.75; + float linear_grayscale_value; + ASSERT_TRUE(grayscale_transform.Run(0, &grayscale_hlg_value, + &linear_grayscale_value)); + EXPECT_THAT(linear_grayscale_value, FloatNear(0.203, 1e-3)); +} + +TEST_F(ColorManagementTest, XYBProfile) { + ColorEncoding c_xyb; + c_xyb.SetColorSpace(ColorSpace::kXYB); + c_xyb.rendering_intent = RenderingIntent::kPerceptual; + ASSERT_TRUE(c_xyb.CreateICC()); + ColorEncoding c_native = ColorEncoding::LinearSRGB(false); + + static const size_t kGridDim = 17; + static const size_t kNumColors = kGridDim * kGridDim * kGridDim; + const JxlCmsInterface& cms = GetJxlCms(); + ColorSpaceTransform xform(cms); + ASSERT_TRUE( + xform.Init(c_xyb, c_native, kDefaultIntensityTarget, kNumColors, 1)); + + ImageMetadata metadata; + metadata.color_encoding = c_native; + ImageBundle ib(&metadata); + Image3F native(kNumColors, 1); + float mul = 1.0f / (kGridDim - 1); + for (size_t ir = 0, x = 0; ir < kGridDim; ++ir) { + for (size_t ig = 0; ig < kGridDim; ++ig) { + for (size_t ib = 0; ib < kGridDim; ++ib, ++x) { + native.PlaneRow(0, 0)[x] = ir * mul; + native.PlaneRow(1, 0)[x] = ig * mul; + native.PlaneRow(2, 0)[x] = ib * mul; + } + } + } + ib.SetFromImage(std::move(native), c_native); + const Image3F& in = *ib.color(); + Image3F opsin(kNumColors, 1); + ToXYB(ib, nullptr, &opsin, cms, nullptr); + + Image3F opsin2(kNumColors, 1); + CopyImageTo(opsin, &opsin2); + ScaleXYB(&opsin2); + + float* src = xform.BufSrc(0); + for (size_t i = 0; i < kNumColors; ++i) { + for (size_t c = 0; c < 3; ++c) { + src[3 * i + c] = opsin2.PlaneRow(c, 0)[i]; + } + } + + float* dst = xform.BufDst(0); + ASSERT_TRUE(xform.Run(0, src, dst)); + + Image3F out(kNumColors, 1); + for (size_t i = 0; i < kNumColors; ++i) { + for (size_t c = 0; c < 3; ++c) { + out.PlaneRow(c, 0)[i] = dst[3 * i + c]; + } + } + + auto debug_print_color = [&](size_t i) { + printf( + "(%f, %f, %f) -> (%9.6f, %f, %f) -> (%f, %f, %f) -> " + "(%9.6f, %9.6f, %9.6f)", + in.PlaneRow(0, 0)[i], in.PlaneRow(1, 0)[i], in.PlaneRow(2, 0)[i], + opsin.PlaneRow(0, 0)[i], opsin.PlaneRow(1, 0)[i], + opsin.PlaneRow(2, 0)[i], opsin2.PlaneRow(0, 0)[i], + opsin2.PlaneRow(1, 0)[i], opsin2.PlaneRow(2, 0)[i], + out.PlaneRow(0, 0)[i], out.PlaneRow(1, 0)[i], out.PlaneRow(2, 0)[i]); + }; + + float max_err[3] = {}; + size_t max_err_i[3] = {}; + for (size_t i = 0; i < kNumColors; ++i) { + for (size_t c = 0; c < 3; ++c) { + // debug_print_color(i); printf("\n"); + float err = std::abs(in.PlaneRow(c, 0)[i] - out.PlaneRow(c, 0)[i]); + if (err > max_err[c]) { + max_err[c] = err; + max_err_i[c] = i; + } + } + } + static float kMaxError[3] = {9e-4, 4e-4, 5e-4}; + printf("Maximum errors:\n"); + for (size_t c = 0; c < 3; ++c) { + debug_print_color(max_err_i[c]); + printf(" %f\n", max_err[c]); + EXPECT_LT(max_err[c], kMaxError[c]); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/common.h b/third-party/libjxl/libjxl/lib/jxl/common.h new file mode 100644 index 0000000000..c2ebe029a8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/common.h @@ -0,0 +1,245 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_COMMON_H_ +#define LIB_JXL_COMMON_H_ + +// Shared constants and helper functions. + +#include +#include +#include + +#include // numeric_limits +#include // unique_ptr +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" + +#ifndef JXL_HIGH_PRECISION +#define JXL_HIGH_PRECISION 1 +#endif + +// Macro that defines whether support for decoding JXL files to JPEG is enabled. +#ifndef JPEGXL_ENABLE_TRANSCODE_JPEG +#define JPEGXL_ENABLE_TRANSCODE_JPEG 1 +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +// Macro that defines whether support for decoding boxes is enabled. +#ifndef JPEGXL_ENABLE_BOXES +#define JPEGXL_ENABLE_BOXES 1 +#endif // JPEGXL_ENABLE_BOXES + +namespace jxl { +// Some enums and typedefs used by more than one header file. + +constexpr size_t kBitsPerByte = 8; // more clear than CHAR_BIT + +constexpr inline size_t RoundUpBitsToByteMultiple(size_t bits) { + return (bits + 7) & ~size_t(7); +} + +constexpr inline size_t RoundUpToBlockDim(size_t dim) { + return (dim + 7) & ~size_t(7); +} + +static inline bool JXL_MAYBE_UNUSED SafeAdd(const uint64_t a, const uint64_t b, + uint64_t& sum) { + sum = a + b; + return sum >= a; // no need to check b - either sum >= both or < both. +} + +template +constexpr inline T1 DivCeil(T1 a, T2 b) { + return (a + b - 1) / b; +} + +// Works for any `align`; if a power of two, compiler emits ADD+AND. +constexpr inline size_t RoundUpTo(size_t what, size_t align) { + return DivCeil(what, align) * align; +} + +constexpr double kPi = 3.14159265358979323846264338327950288; + +// Reasonable default for sRGB, matches common monitors. We map white to this +// many nits (cd/m^2) by default. Butteraugli was tuned for 250 nits, which is +// very close. +static constexpr float kDefaultIntensityTarget = 255; + +template +constexpr T Pi(T multiplier) { + return static_cast(multiplier * kPi); +} + +// Block is the square grid of pixels to which an "energy compaction" +// transformation (e.g. DCT) is applied. Each block has its own AC quantizer. +constexpr size_t kBlockDim = 8; + +constexpr size_t kDCTBlockSize = kBlockDim * kBlockDim; + +constexpr size_t kGroupDim = 256; +static_assert(kGroupDim % kBlockDim == 0, + "Group dim should be divisible by block dim"); +constexpr size_t kGroupDimInBlocks = kGroupDim / kBlockDim; + +// Maximum number of passes in an image. +constexpr size_t kMaxNumPasses = 11; + +// Maximum number of reference frames. +constexpr size_t kMaxNumReferenceFrames = 4; + +// Dimensions of a frame, in pixels, and other derived dimensions. +// Computed from FrameHeader. +// TODO(veluca): add extra channels. +struct FrameDimensions { + void Set(size_t xsize, size_t ysize, size_t group_size_shift, + size_t max_hshift, size_t max_vshift, bool modular_mode, + size_t upsampling) { + group_dim = (kGroupDim >> 1) << group_size_shift; + dc_group_dim = group_dim * kBlockDim; + xsize_upsampled = xsize; + ysize_upsampled = ysize; + this->xsize = DivCeil(xsize, upsampling); + this->ysize = DivCeil(ysize, upsampling); + xsize_blocks = DivCeil(this->xsize, kBlockDim << max_hshift) << max_hshift; + ysize_blocks = DivCeil(this->ysize, kBlockDim << max_vshift) << max_vshift; + xsize_padded = xsize_blocks * kBlockDim; + ysize_padded = ysize_blocks * kBlockDim; + if (modular_mode) { + // Modular mode doesn't have any padding. + xsize_padded = this->xsize; + ysize_padded = this->ysize; + } + xsize_upsampled_padded = xsize_padded * upsampling; + ysize_upsampled_padded = ysize_padded * upsampling; + xsize_groups = DivCeil(this->xsize, group_dim); + ysize_groups = DivCeil(this->ysize, group_dim); + xsize_dc_groups = DivCeil(xsize_blocks, group_dim); + ysize_dc_groups = DivCeil(ysize_blocks, group_dim); + num_groups = xsize_groups * ysize_groups; + num_dc_groups = xsize_dc_groups * ysize_dc_groups; + } + + // Image size without any upsampling, i.e. original_size / upsampling. + size_t xsize; + size_t ysize; + // Original image size. + size_t xsize_upsampled; + size_t ysize_upsampled; + // Image size after upsampling the padded image. + size_t xsize_upsampled_padded; + size_t ysize_upsampled_padded; + // Image size after padding to a multiple of kBlockDim (if VarDCT mode). + size_t xsize_padded; + size_t ysize_padded; + // Image size in kBlockDim blocks. + size_t xsize_blocks; + size_t ysize_blocks; + // Image size in number of groups. + size_t xsize_groups; + size_t ysize_groups; + // Image size in number of DC groups. + size_t xsize_dc_groups; + size_t ysize_dc_groups; + // Number of AC or DC groups. + size_t num_groups; + size_t num_dc_groups; + // Size of a group. + size_t group_dim; + size_t dc_group_dim; +}; + +// Prior to C++14 (i.e. C++11): provide our own make_unique +#if __cplusplus < 201402L +template +std::unique_ptr make_unique(Args&&... args) { + return std::unique_ptr(new T(std::forward(args)...)); +} +#else +using std::make_unique; +#endif + +template +JXL_INLINE T Clamp1(T val, T low, T hi) { + return val < low ? low : val > hi ? hi : val; +} + +// Encodes non-negative (X) into (2 * X), negative (-X) into (2 * X - 1) +constexpr uint32_t PackSigned(int32_t value) + JXL_NO_SANITIZE("unsigned-integer-overflow") { + return (static_cast(value) << 1) ^ + ((static_cast(~value) >> 31) - 1); +} + +// Reverse to PackSigned, i.e. UnpackSigned(PackSigned(X)) == X. +// (((~value) & 1) - 1) is either 0 or 0xFF...FF and it will have an expected +// unsigned-integer-overflow. +constexpr intptr_t UnpackSigned(size_t value) + JXL_NO_SANITIZE("unsigned-integer-overflow") { + return static_cast((value >> 1) ^ (((~value) & 1) - 1)); +} + +// conversion from integer to string. +template +std::string ToString(T n) { + char data[32] = {}; + if (T(0.1) != T(0)) { + // float + snprintf(data, sizeof(data), "%g", static_cast(n)); + } else if (T(-1) > T(0)) { + // unsigned + snprintf(data, sizeof(data), "%llu", static_cast(n)); + } else { + // signed + snprintf(data, sizeof(data), "%lld", static_cast(n)); + } + return data; +} + +static inline JXL_MAYBE_UNUSED uint64_t DecodeVarInt(const uint8_t* input, + size_t inputSize, + size_t* pos) { + size_t i; + uint64_t ret = 0; + for (i = 0; *pos + i < inputSize && i < 10; ++i) { + ret |= uint64_t(input[*pos + i] & 127) << uint64_t(7 * i); + // If the next-byte flag is not set, stop + if ((input[*pos + i] & 128) == 0) break; + } + // TODO: Return a decoding error if i == 10. + *pos += i + 1; + return ret; +} + +static inline JXL_MAYBE_UNUSED bool EncodeVarInt(uint64_t value, + size_t output_size, + size_t* output_pos, + uint8_t* output) { + // While more than 7 bits of data are left, + // store 7 bits and set the next byte flag + while (value > 127) { + if (*output_pos > output_size) return false; + // |128: Set the next byte flag + output[(*output_pos)++] = ((uint8_t)(value & 127)) | 128; + // Remove the seven bits we just wrote + value >>= 7; + } + if (*output_pos > output_size) return false; + output[(*output_pos)++] = ((uint8_t)value) & 127; + return true; +} + +static inline JXL_MAYBE_UNUSED void EncodeVarInt(uint64_t value, + PaddedBytes* data) { + size_t pos = data->size(); + data->resize(data->size() + 9); + JXL_CHECK(EncodeVarInt(value, data->size(), &pos, data->data())); + data->resize(pos); +} + +} // namespace jxl + +#endif // LIB_JXL_COMMON_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/compressed_dc.cc b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.cc new file mode 100644 index 0000000000..52438b9bf0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.cc @@ -0,0 +1,315 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/compressed_dc.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/compressed_dc.cc" +#include +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/image.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +using D = HWY_FULL(float); +using DScalar = HWY_CAPPED(float, 1); + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +// TODO(veluca): optimize constants. +const float w1 = 0.20345139757231578f; +const float w2 = 0.0334829185968739f; +const float w0 = 1.0f - 4.0f * (w1 + w2); + +template +V MaxWorkaround(V a, V b) { +#if (HWY_TARGET == HWY_AVX3) && HWY_COMPILER_CLANG <= 800 + // Prevents "Do not know how to split the result of this operator" error + return IfThenElse(a > b, a, b); +#else + return Max(a, b); +#endif +} + +template +JXL_INLINE void ComputePixelChannel(const D d, const float dc_factor, + const float* JXL_RESTRICT row_top, + const float* JXL_RESTRICT row, + const float* JXL_RESTRICT row_bottom, + Vec* JXL_RESTRICT mc, + Vec* JXL_RESTRICT sm, + Vec* JXL_RESTRICT gap, size_t x) { + const auto tl = LoadU(d, row_top + x - 1); + const auto tc = Load(d, row_top + x); + const auto tr = LoadU(d, row_top + x + 1); + + const auto ml = LoadU(d, row + x - 1); + *mc = Load(d, row + x); + const auto mr = LoadU(d, row + x + 1); + + const auto bl = LoadU(d, row_bottom + x - 1); + const auto bc = Load(d, row_bottom + x); + const auto br = LoadU(d, row_bottom + x + 1); + + const auto w_center = Set(d, w0); + const auto w_side = Set(d, w1); + const auto w_corner = Set(d, w2); + + const auto corner = Add(Add(tl, tr), Add(bl, br)); + const auto side = Add(Add(ml, mr), Add(tc, bc)); + *sm = MulAdd(corner, w_corner, MulAdd(side, w_side, Mul(*mc, w_center))); + + const auto dc_quant = Set(d, dc_factor); + *gap = MaxWorkaround(*gap, Abs(Div(Sub(*mc, *sm), dc_quant))); +} + +template +JXL_INLINE void ComputePixel( + const float* JXL_RESTRICT dc_factors, + const float* JXL_RESTRICT* JXL_RESTRICT rows_top, + const float* JXL_RESTRICT* JXL_RESTRICT rows, + const float* JXL_RESTRICT* JXL_RESTRICT rows_bottom, + float* JXL_RESTRICT* JXL_RESTRICT out_rows, size_t x) { + const D d; + auto mc_x = Undefined(d); + auto mc_y = Undefined(d); + auto mc_b = Undefined(d); + auto sm_x = Undefined(d); + auto sm_y = Undefined(d); + auto sm_b = Undefined(d); + auto gap = Set(d, 0.5f); + ComputePixelChannel(d, dc_factors[0], rows_top[0], rows[0], rows_bottom[0], + &mc_x, &sm_x, &gap, x); + ComputePixelChannel(d, dc_factors[1], rows_top[1], rows[1], rows_bottom[1], + &mc_y, &sm_y, &gap, x); + ComputePixelChannel(d, dc_factors[2], rows_top[2], rows[2], rows_bottom[2], + &mc_b, &sm_b, &gap, x); + auto factor = MulAdd(Set(d, -4.0f), gap, Set(d, 3.0f)); + factor = ZeroIfNegative(factor); + + auto out = MulAdd(Sub(sm_x, mc_x), factor, mc_x); + Store(out, d, out_rows[0] + x); + out = MulAdd(Sub(sm_y, mc_y), factor, mc_y); + Store(out, d, out_rows[1] + x); + out = MulAdd(Sub(sm_b, mc_b), factor, mc_b); + Store(out, d, out_rows[2] + x); +} + +void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc, + ThreadPool* pool) { + const size_t xsize = dc->xsize(); + const size_t ysize = dc->ysize(); + if (ysize <= 2 || xsize <= 2) return; + + // TODO(veluca): use tile-based processing? + // TODO(veluca): decide if changes to the y channel should be propagated to + // the x and b channels through color correlation. + JXL_ASSERT(w1 + w2 < 0.25f); + + Image3F smoothed(xsize, ysize); + // Fill in borders that the loop below will not. First and last are unused. + for (size_t c = 0; c < 3; c++) { + for (size_t y : {size_t(0), ysize - 1}) { + memcpy(smoothed.PlaneRow(c, y), dc->PlaneRow(c, y), + xsize * sizeof(float)); + } + } + auto process_row = [&](const uint32_t y, size_t /*thread*/) { + const float* JXL_RESTRICT rows_top[3]{ + dc->ConstPlaneRow(0, y - 1), + dc->ConstPlaneRow(1, y - 1), + dc->ConstPlaneRow(2, y - 1), + }; + const float* JXL_RESTRICT rows[3] = { + dc->ConstPlaneRow(0, y), + dc->ConstPlaneRow(1, y), + dc->ConstPlaneRow(2, y), + }; + const float* JXL_RESTRICT rows_bottom[3] = { + dc->ConstPlaneRow(0, y + 1), + dc->ConstPlaneRow(1, y + 1), + dc->ConstPlaneRow(2, y + 1), + }; + float* JXL_RESTRICT rows_out[3] = { + smoothed.PlaneRow(0, y), + smoothed.PlaneRow(1, y), + smoothed.PlaneRow(2, y), + }; + for (size_t x : {size_t(0), xsize - 1}) { + for (size_t c = 0; c < 3; c++) { + rows_out[c][x] = rows[c][x]; + } + } + + size_t x = 1; + // First pixels + const size_t N = Lanes(D()); + for (; x < std::min(N, xsize - 1); x++) { + ComputePixel(dc_factors, rows_top, rows, rows_bottom, rows_out, + x); + } + // Full vectors. + for (; x + N <= xsize - 1; x += N) { + ComputePixel(dc_factors, rows_top, rows, rows_bottom, rows_out, x); + } + // Last pixels. + for (; x < xsize - 1; x++) { + ComputePixel(dc_factors, rows_top, rows, rows_bottom, rows_out, + x); + } + }; + JXL_CHECK(RunOnPool(pool, 1, ysize - 1, ThreadPool::NoInit, process_row, + "DCSmoothingRow")); + dc->Swap(smoothed); +} + +// DC dequantization. +void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in, + const float* dc_factors, float mul, const float* cfl_factors, + YCbCrChromaSubsampling chroma_subsampling, + const BlockCtxMap& bctx) { + const HWY_FULL(float) df; + const Rebind di; // assumes pixel_type <= float + if (chroma_subsampling.Is444()) { + const auto fac_x = Set(df, dc_factors[0] * mul); + const auto fac_y = Set(df, dc_factors[1] * mul); + const auto fac_b = Set(df, dc_factors[2] * mul); + const auto cfl_fac_x = Set(df, cfl_factors[0]); + const auto cfl_fac_b = Set(df, cfl_factors[2]); + for (size_t y = 0; y < r.ysize(); y++) { + float* dec_row_x = r.PlaneRow(dc, 0, y); + float* dec_row_y = r.PlaneRow(dc, 1, y); + float* dec_row_b = r.PlaneRow(dc, 2, y); + const int32_t* quant_row_x = in.channel[1].plane.Row(y); + const int32_t* quant_row_y = in.channel[0].plane.Row(y); + const int32_t* quant_row_b = in.channel[2].plane.Row(y); + for (size_t x = 0; x < r.xsize(); x += Lanes(di)) { + const auto in_q_x = Load(di, quant_row_x + x); + const auto in_q_y = Load(di, quant_row_y + x); + const auto in_q_b = Load(di, quant_row_b + x); + const auto in_x = Mul(ConvertTo(df, in_q_x), fac_x); + const auto in_y = Mul(ConvertTo(df, in_q_y), fac_y); + const auto in_b = Mul(ConvertTo(df, in_q_b), fac_b); + Store(in_y, df, dec_row_y + x); + Store(MulAdd(in_y, cfl_fac_x, in_x), df, dec_row_x + x); + Store(MulAdd(in_y, cfl_fac_b, in_b), df, dec_row_b + x); + } + } + } else { + for (size_t c : {1, 0, 2}) { + Rect rect(r.x0() >> chroma_subsampling.HShift(c), + r.y0() >> chroma_subsampling.VShift(c), + r.xsize() >> chroma_subsampling.HShift(c), + r.ysize() >> chroma_subsampling.VShift(c)); + const auto fac = Set(df, dc_factors[c] * mul); + const Channel& ch = in.channel[c < 2 ? c ^ 1 : c]; + for (size_t y = 0; y < rect.ysize(); y++) { + const int32_t* quant_row = ch.plane.Row(y); + float* row = rect.PlaneRow(dc, c, y); + for (size_t x = 0; x < rect.xsize(); x += Lanes(di)) { + const auto in_q = Load(di, quant_row + x); + const auto in = Mul(ConvertTo(df, in_q), fac); + Store(in, df, row + x); + } + } + } + } + if (bctx.num_dc_ctxs <= 1) { + for (size_t y = 0; y < r.ysize(); y++) { + uint8_t* qdc_row = r.Row(quant_dc, y); + memset(qdc_row, 0, sizeof(*qdc_row) * r.xsize()); + } + } else { + for (size_t y = 0; y < r.ysize(); y++) { + uint8_t* qdc_row_val = r.Row(quant_dc, y); + const int32_t* quant_row_x = + in.channel[1].plane.Row(y >> chroma_subsampling.VShift(0)); + const int32_t* quant_row_y = + in.channel[0].plane.Row(y >> chroma_subsampling.VShift(1)); + const int32_t* quant_row_b = + in.channel[2].plane.Row(y >> chroma_subsampling.VShift(2)); + for (size_t x = 0; x < r.xsize(); x++) { + int bucket_x = 0, bucket_y = 0, bucket_b = 0; + for (int t : bctx.dc_thresholds[0]) { + if (quant_row_x[x >> chroma_subsampling.HShift(0)] > t) bucket_x++; + } + for (int t : bctx.dc_thresholds[1]) { + if (quant_row_y[x >> chroma_subsampling.HShift(1)] > t) bucket_y++; + } + for (int t : bctx.dc_thresholds[2]) { + if (quant_row_b[x >> chroma_subsampling.HShift(2)] > t) bucket_b++; + } + int bucket = bucket_x; + bucket *= bctx.dc_thresholds[2].size() + 1; + bucket += bucket_b; + bucket *= bctx.dc_thresholds[1].size() + 1; + bucket += bucket_y; + qdc_row_val[x] = bucket; + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(DequantDC); +HWY_EXPORT(AdaptiveDCSmoothing); +void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc, + ThreadPool* pool) { + return HWY_DYNAMIC_DISPATCH(AdaptiveDCSmoothing)(dc_factors, dc, pool); +} + +void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in, + const float* dc_factors, float mul, const float* cfl_factors, + YCbCrChromaSubsampling chroma_subsampling, + const BlockCtxMap& bctx) { + return HWY_DYNAMIC_DISPATCH(DequantDC)(r, dc, quant_dc, in, dc_factors, mul, + cfl_factors, chroma_subsampling, bctx); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/compressed_dc.h b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.h new file mode 100644 index 0000000000..b06e5931f0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/compressed_dc.h @@ -0,0 +1,34 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_COMPRESSED_DC_H_ +#define LIB_JXL_COMPRESSED_DC_H_ + +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/modular/modular_image.h" + +// DC handling functions: encoding and decoding of DC to and from bitstream, and +// related function to initialize the per-group decoder cache. + +namespace jxl { + +// Smooth DC in already-smooth areas, to counteract banding. +void AdaptiveDCSmoothing(const float* dc_factors, Image3F* dc, + ThreadPool* pool); + +void DequantDC(const Rect& r, Image3F* dc, ImageB* quant_dc, const Image& in, + const float* dc_factors, float mul, const float* cfl_factors, + YCbCrChromaSubsampling chroma_subsampling, + const BlockCtxMap& bctx); + +} // namespace jxl + +#endif // LIB_JXL_COMPRESSED_DC_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve-inl.h b/third-party/libjxl/libjxl/lib/jxl/convolve-inl.h new file mode 100644 index 0000000000..cd79153a3a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve-inl.h @@ -0,0 +1,295 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JXL_CONVOLVE_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_CONVOLVE_INL_H_ +#undef LIB_JXL_CONVOLVE_INL_H_ +#else +#define LIB_JXL_CONVOLVE_INL_H_ +#endif + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/image_ops.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Broadcast; +#if HWY_TARGET != HWY_SCALAR +using hwy::HWY_NAMESPACE::CombineShiftRightBytes; +#endif +using hwy::HWY_NAMESPACE::TableLookupLanes; +using hwy::HWY_NAMESPACE::Vec; + +// Synthesizes left/right neighbors from a vector of center pixels. +class Neighbors { + public: + using D = HWY_CAPPED(float, 16); + using V = Vec; + + // Returns l[i] == c[Mirror(i - 1)]. + HWY_INLINE HWY_MAYBE_UNUSED static V FirstL1(const V c) { +#if HWY_CAP_GE256 + const D d; + HWY_ALIGN constexpr int32_t lanes[16] = {0, 0, 1, 2, 3, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 14}; + const auto indices = SetTableIndices(d, lanes); + // c = PONM'LKJI + return TableLookupLanes(c, indices); // ONML'KJII +#elif HWY_TARGET == HWY_SCALAR + return c; // Same (the first mirrored value is the last valid one) +#else // 128 bit + // c = LKJI +#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) + return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(2, 1, 0, 0))}; // KJII +#else + const D d; + // TODO(deymo): Figure out if this can be optimized using a single vsri + // instruction to convert LKJI to KJII. + HWY_ALIGN constexpr int lanes[4] = {0, 0, 1, 2}; // KJII + const auto indices = SetTableIndices(d, lanes); + return TableLookupLanes(c, indices); +#endif +#endif + } + + // Returns l[i] == c[Mirror(i - 2)]. + HWY_INLINE HWY_MAYBE_UNUSED static V FirstL2(const V c) { +#if HWY_CAP_GE256 + const D d; + HWY_ALIGN constexpr int32_t lanes[16] = {1, 0, 0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 13}; + const auto indices = SetTableIndices(d, lanes); + // c = PONM'LKJI + return TableLookupLanes(c, indices); // NMLK'JIIJ +#elif HWY_TARGET == HWY_SCALAR + const D d; + JXL_ASSERT(false); // unsupported, avoid calling this. + return Zero(d); +#else // 128 bit + // c = LKJI +#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) + return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(1, 0, 0, 1))}; // JIIJ +#else + const D d; + HWY_ALIGN constexpr int lanes[4] = {1, 0, 0, 1}; // JIIJ + const auto indices = SetTableIndices(d, lanes); + return TableLookupLanes(c, indices); +#endif +#endif + } + + // Returns l[i] == c[Mirror(i - 3)]. + HWY_INLINE HWY_MAYBE_UNUSED static V FirstL3(const V c) { +#if HWY_CAP_GE256 + const D d; + HWY_ALIGN constexpr int32_t lanes[16] = {2, 1, 0, 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12}; + const auto indices = SetTableIndices(d, lanes); + // c = PONM'LKJI + return TableLookupLanes(c, indices); // MLKJ'IIJK +#elif HWY_TARGET == HWY_SCALAR + const D d; + JXL_ASSERT(false); // unsupported, avoid calling this. + return Zero(d); +#else // 128 bit + // c = LKJI +#if HWY_TARGET <= (1 << HWY_HIGHEST_TARGET_BIT_X86) + return V{_mm_shuffle_ps(c.raw, c.raw, _MM_SHUFFLE(0, 0, 1, 2))}; // IIJK +#else + const D d; + HWY_ALIGN constexpr int lanes[4] = {2, 1, 0, 0}; // IIJK + const auto indices = SetTableIndices(d, lanes); + return TableLookupLanes(c, indices); +#endif +#endif + } +}; + +#if HWY_TARGET != HWY_SCALAR + +// Returns indices for SetTableIndices such that TableLookupLanes on the +// rightmost unaligned vector (rightmost sample in its most-significant lane) +// returns the mirrored values, with the mirror outside the last valid sample. +static inline const int32_t* MirrorLanes(const size_t mod) { + const HWY_CAPPED(float, 16) d; + constexpr size_t kN = MaxLanes(d); + + // For mod = `image width mod 16` 0..15: + // last full vec mirrored (mem order) loadedVec mirrorVec idxVec + // 0123456789abcdef| fedcba9876543210 fed..210 012..def 012..def + // 0123456789abcdef|0 0fedcba98765432 0fe..321 234..f00 123..eff + // 0123456789abcdef|01 10fedcba987654 10f..432 456..110 234..ffe + // 0123456789abcdef|012 210fedcba9876 210..543 67..2210 34..ffed + // 0123456789abcdef|0123 3210fedcba98 321..654 8..33210 4..ffedc + // 0123456789abcdef|01234 43210fedcba + // 0123456789abcdef|012345 543210fedc + // 0123456789abcdef|0123456 6543210fe + // 0123456789abcdef|01234567 76543210 + // 0123456789abcdef|012345678 8765432 + // 0123456789abcdef|0123456789 987654 + // 0123456789abcdef|0123456789A A9876 + // 0123456789abcdef|0123456789AB BA98 + // 0123456789abcdef|0123456789ABC CBA + // 0123456789abcdef|0123456789ABCD DC + // 0123456789abcdef|0123456789ABCDE E EDC..10f EED..210 ffe..321 +#if HWY_CAP_GE512 + HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, // + 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; +#elif HWY_CAP_GE256 + HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = { + 1, 2, 3, 4, 5, 6, 7, 7, // + 6, 5, 4, 3, 2, 1, 0}; +#else // 128-bit + HWY_ALIGN static constexpr int32_t idx_lanes[2 * kN - 1] = {1, 2, 3, 3, // + 2, 1, 0}; +#endif + return idx_lanes + kN - 1 - mod; +} + +#endif // HWY_TARGET != HWY_SCALAR + +// Single entry point for convolution. +// "Strategy" (Direct*/Separable*) decides kernel size and how to evaluate it. +template +class ConvolveT { + static constexpr int64_t kRadius = Strategy::kRadius; + using Simd = HWY_CAPPED(float, 16); + + public: + static size_t MinWidth() { +#if HWY_TARGET == HWY_SCALAR + // First/Last use mirrored loads of up to +/- kRadius. + return 2 * kRadius; +#else + return Lanes(Simd()) + kRadius; +#endif + } + + // "Image" is ImageF or Image3F. + template + static void Run(const Image& in, const Rect& rect, const Weights& weights, + ThreadPool* pool, Image* out) { + JXL_CHECK(SameSize(rect, *out)); + JXL_CHECK(rect.xsize() >= MinWidth()); + + static_assert(int64_t(kRadius) <= 3, + "Must handle [0, kRadius) and >= kRadius"); + switch (rect.xsize() % Lanes(Simd())) { + case 0: + return RunRows<0>(in, rect, weights, pool, out); + case 1: + return RunRows<1>(in, rect, weights, pool, out); + case 2: + return RunRows<2>(in, rect, weights, pool, out); + default: + return RunRows<3>(in, rect, weights, pool, out); + } + } + + private: + template + static JXL_INLINE void RunRow(const float* JXL_RESTRICT in, + const size_t xsize, const int64_t stride, + const WrapRow& wrap_row, const Weights& weights, + float* JXL_RESTRICT out) { + Strategy::template ConvolveRow(in, xsize, stride, wrap_row, + weights, out); + } + + template + static JXL_INLINE void RunBorderRows(const ImageF& in, const Rect& rect, + const int64_t ybegin, const int64_t yend, + const Weights& weights, ImageF* out) { + const int64_t stride = in.PixelsPerRow(); + const WrapRowMirror wrap_row(in, rect.ysize()); + for (int64_t y = ybegin; y < yend; ++y) { + RunRow(rect.ConstRow(in, y), rect.xsize(), stride, wrap_row, + weights, out->Row(y)); + } + } + + // Image3F. + template + static JXL_INLINE void RunBorderRows(const Image3F& in, const Rect& rect, + const int64_t ybegin, const int64_t yend, + const Weights& weights, Image3F* out) { + const int64_t stride = in.PixelsPerRow(); + for (int64_t y = ybegin; y < yend; ++y) { + for (size_t c = 0; c < 3; ++c) { + const WrapRowMirror wrap_row(in.Plane(c), rect.ysize()); + RunRow(rect.ConstPlaneRow(in, c, y), rect.xsize(), stride, + wrap_row, weights, out->PlaneRow(c, y)); + } + } + } + + template + static JXL_INLINE void RunInteriorRows(const ImageF& in, const Rect& rect, + const int64_t ybegin, + const int64_t yend, + const Weights& weights, + ThreadPool* pool, ImageF* out) { + const int64_t stride = in.PixelsPerRow(); + JXL_CHECK(RunOnPool( + pool, ybegin, yend, ThreadPool::NoInit, + [&](const uint32_t y, size_t /*thread*/) HWY_ATTR { + RunRow(rect.ConstRow(in, y), rect.xsize(), stride, + WrapRowUnchanged(), weights, out->Row(y)); + }, + "Convolve")); + } + + // Image3F. + template + static JXL_INLINE void RunInteriorRows(const Image3F& in, const Rect& rect, + const int64_t ybegin, + const int64_t yend, + const Weights& weights, + ThreadPool* pool, Image3F* out) { + const int64_t stride = in.PixelsPerRow(); + JXL_CHECK(RunOnPool( + pool, ybegin, yend, ThreadPool::NoInit, + [&](const uint32_t y, size_t /*thread*/) HWY_ATTR { + for (size_t c = 0; c < 3; ++c) { + RunRow(rect.ConstPlaneRow(in, c, y), rect.xsize(), + stride, WrapRowUnchanged(), weights, + out->PlaneRow(c, y)); + } + }, + "Convolve3")); + } + + template + static JXL_INLINE void RunRows(const Image& in, const Rect& rect, + const Weights& weights, ThreadPool* pool, + Image* out) { + const int64_t ysize = rect.ysize(); + RunBorderRows(in, rect, 0, std::min(int64_t(kRadius), ysize), + weights, out); + if (ysize > 2 * int64_t(kRadius)) { + RunInteriorRows(in, rect, int64_t(kRadius), + ysize - int64_t(kRadius), weights, pool, out); + } + if (ysize > int64_t(kRadius)) { + RunBorderRows(in, rect, ysize - int64_t(kRadius), ysize, + weights, out); + } + } +}; + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_CONVOLVE_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve.h b/third-party/libjxl/libjxl/lib/jxl/convolve.h new file mode 100644 index 0000000000..2fcd2d0980 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve.h @@ -0,0 +1,105 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_CONVOLVE_H_ +#define LIB_JXL_CONVOLVE_H_ + +// 2D convolution. + +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/image.h" + +namespace jxl { + +// No valid values outside [0, xsize), but the strategy may still safely load +// the preceding vector, and/or round xsize up to the vector lane count. This +// avoids needing PadImage. +// Requires xsize >= kConvolveLanes + kConvolveMaxRadius. +static constexpr size_t kConvolveMaxRadius = 3; + +// Weights must already be normalized. + +struct WeightsSymmetric3 { + // d r d (each replicated 4x) + // r c r + // d r d + float c[4]; + float r[4]; + float d[4]; +}; + +struct WeightsSymmetric5 { + // The lower-right quadrant is: c r R (each replicated 4x) + // r d L + // R L D + float c[4]; + float r[4]; + float R[4]; + float d[4]; + float D[4]; + float L[4]; +}; + +// Weights for separable 5x5 filters (typically but not necessarily the same +// values for horizontal and vertical directions). The kernel must already be +// normalized, but note that values for negative offsets are omitted, so the +// given values do not sum to 1. +struct WeightsSeparable5 { + // Horizontal 1D, distances 0..2 (each replicated 4x) + float horz[3 * 4]; + float vert[3 * 4]; +}; + +// Weights for separable 7x7 filters (typically but not necessarily the same +// values for horizontal and vertical directions). The kernel must already be +// normalized, but note that values for negative offsets are omitted, so the +// given values do not sum to 1. +// +// NOTE: for >= 7x7 Gaussian kernels, it is faster to use FastGaussian instead, +// at least when images exceed the L1 cache size. +struct WeightsSeparable7 { + // Horizontal 1D, distances 0..3 (each replicated 4x) + float horz[4 * 4]; + float vert[4 * 4]; +}; + +const WeightsSymmetric3& WeightsSymmetric3Lowpass(); +const WeightsSeparable5& WeightsSeparable5Lowpass(); +const WeightsSymmetric5& WeightsSymmetric5Lowpass(); + +void SlowSymmetric3(const ImageF& in, const Rect& rect, + const WeightsSymmetric3& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out); + +void SlowSeparable5(const ImageF& in, const Rect& rect, + const WeightsSeparable5& weights, ThreadPool* pool, + ImageF* out); + +void SlowSeparable7(const ImageF& in, const Rect& rect, + const WeightsSeparable7& weights, ThreadPool* pool, + ImageF* out); + +void Symmetric3(const ImageF& in, const Rect& rect, + const WeightsSymmetric3& weights, ThreadPool* pool, + ImageF* out); + +void Symmetric5(const ImageF& in, const Rect& rect, + const WeightsSymmetric5& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out); + +void Separable5(const ImageF& in, const Rect& rect, + const WeightsSeparable5& weights, ThreadPool* pool, + ImageF* out); + +void Separable7(const ImageF& in, const Rect& rect, + const WeightsSeparable7& weights, ThreadPool* pool, + ImageF* out); + +} // namespace jxl + +#endif // LIB_JXL_CONVOLVE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_separable5.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_separable5.cc new file mode 100644 index 0000000000..b26ff54bbc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve_separable5.cc @@ -0,0 +1,261 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable5.cc" +#include +#include + +#include "lib/jxl/convolve-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Vec; + +// 5x5 convolution by separable kernel with a single scan through the input. +// This is more cache-efficient than separate horizontal/vertical passes, and +// possibly faster (given enough registers) than tiling and/or transposing. +// +// Overview: imagine a 5x5 window around a central pixel. First convolve the +// rows by multiplying the pixels with the corresponding weights from +// WeightsSeparable5.horz[abs(x_offset) * 4]. Then multiply each of these +// intermediate results by the corresponding vertical weight, i.e. +// vert[abs(y_offset) * 4]. Finally, store the sum of these values as the +// convolution result at the position of the central pixel in the output. +// +// Each of these operations uses SIMD vectors. The central pixel and most +// importantly the output are aligned, so neighnoring pixels (e.g. x_offset=1) +// require unaligned loads. Because weights are supplied in identical groups of +// 4, we can use LoadDup128 to load them (slightly faster). +// +// Uses mirrored boundary handling. Until x >= kRadius, the horizontal +// convolution uses Neighbors class to shuffle vectors as if each of its lanes +// had been loaded from the mirrored offset. Similarly, the last full vector to +// write uses mirroring. In the case of scalar vectors, Neighbors is not usable +// and the value is loaded directly. Otherwise, the number of valid pixels +// modulo the vector size enables a small optimization: for smaller offsets, +// a non-mirrored load is sufficient. +class Separable5Strategy { + using D = HWY_CAPPED(float, 16); + using V = Vec; + + public: + static constexpr int64_t kRadius = 2; + + template + static JXL_MAYBE_INLINE void ConvolveRow( + const float* const JXL_RESTRICT row_m, const size_t xsize, + const int64_t stride, const WrapRow& wrap_row, + const WeightsSeparable5& weights, float* const JXL_RESTRICT row_out) { + const D d; + const int64_t neg_stride = -stride; // allows LEA addressing. + const float* const JXL_RESTRICT row_t2 = + wrap_row(row_m + 2 * neg_stride, stride); + const float* const JXL_RESTRICT row_t1 = + wrap_row(row_m + 1 * neg_stride, stride); + const float* const JXL_RESTRICT row_b1 = + wrap_row(row_m + 1 * stride, stride); + const float* const JXL_RESTRICT row_b2 = + wrap_row(row_m + 2 * stride, stride); + + const V wh0 = LoadDup128(d, weights.horz + 0 * 4); + const V wh1 = LoadDup128(d, weights.horz + 1 * 4); + const V wh2 = LoadDup128(d, weights.horz + 2 * 4); + const V wv0 = LoadDup128(d, weights.vert + 0 * 4); + const V wv1 = LoadDup128(d, weights.vert + 1 * 4); + const V wv2 = LoadDup128(d, weights.vert + 2 * 4); + + size_t x = 0; + + // More than one iteration for scalars. + for (; x < kRadius; x += Lanes(d)) { + const V conv0 = + Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2), wv0); + + const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2); + const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2); + const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0); + + const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2); + const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2); + const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1); + Store(conv2, d, row_out + x); + } + + // Main loop: load inputs without padding + for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) { + const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2), wv0); + + const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2); + const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2); + const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0); + + const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2); + const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2); + const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1); + Store(conv2, d, row_out + x); + } + + // Last full vector to write (the above loop handled mod >= kRadius) +#if HWY_TARGET == HWY_SCALAR + while (x < xsize) { +#else + if (kSizeModN < kRadius) { +#endif + const V conv0 = + Mul(HorzConvolveLast(row_m, x, xsize, wh0, wh1, wh2), wv0); + + const V conv1t = + HorzConvolveLast(row_t1, x, xsize, wh0, wh1, wh2); + const V conv1b = + HorzConvolveLast(row_b1, x, xsize, wh0, wh1, wh2); + const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0); + + const V conv2t = + HorzConvolveLast(row_t2, x, xsize, wh0, wh1, wh2); + const V conv2b = + HorzConvolveLast(row_b2, x, xsize, wh0, wh1, wh2); + const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1); + Store(conv2, d, row_out + x); + x += Lanes(d); + } + + // If mod = 0, the above vector was the last. + if (kSizeModN != 0) { + for (; x < xsize; ++x) { + float mul = 0.0f; + for (int64_t dy = -kRadius; dy <= kRadius; ++dy) { + const float wy = weights.vert[std::abs(dy) * 4]; + const float* clamped_row = wrap_row(row_m + dy * stride, stride); + for (int64_t dx = -kRadius; dx <= kRadius; ++dx) { + const float wx = weights.horz[std::abs(dx) * 4]; + const int64_t clamped_x = Mirror(x + dx, xsize); + mul += clamped_row[clamped_x] * wx * wy; + } + } + row_out[x] = mul; + } + } + } + + private: + // Same as HorzConvolve for the first/last vector in a row. + static JXL_MAYBE_INLINE V HorzConvolveFirst( + const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize, + const V wh0, const V wh1, const V wh2) { + const D d; + const V c = LoadU(d, row + x); + const V mul0 = Mul(c, wh0); + +#if HWY_TARGET == HWY_SCALAR + const V l1 = LoadU(d, row + Mirror(x - 1, xsize)); + const V l2 = LoadU(d, row + Mirror(x - 2, xsize)); +#else + (void)xsize; + const V l1 = Neighbors::FirstL1(c); + const V l2 = Neighbors::FirstL2(c); +#endif + + const V r1 = LoadU(d, row + x + 1); + const V r2 = LoadU(d, row + x + 2); + + const V mul1 = MulAdd(Add(l1, r1), wh1, mul0); + const V mul2 = MulAdd(Add(l2, r2), wh2, mul1); + return mul2; + } + + template + static JXL_MAYBE_INLINE V + HorzConvolveLast(const float* const JXL_RESTRICT row, const int64_t x, + const int64_t xsize, const V wh0, const V wh1, const V wh2) { + const D d; + const V c = LoadU(d, row + x); + const V mul0 = Mul(c, wh0); + + const V l1 = LoadU(d, row + x - 1); + const V l2 = LoadU(d, row + x - 2); + + V r1, r2; +#if HWY_TARGET == HWY_SCALAR + r1 = LoadU(d, row + Mirror(x + 1, xsize)); + r2 = LoadU(d, row + Mirror(x + 2, xsize)); +#else + const size_t N = Lanes(d); + if (kSizeModN == 0) { + r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2))); + r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1))); + } else { // == 1 + const auto last = LoadU(d, row + xsize - N); + r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1))); + r1 = last; + } +#endif + + // Sum of pixels with Manhattan distance i, multiplied by weights[i]. + const V sum1 = Add(l1, r1); + const V mul1 = MulAdd(sum1, wh1, mul0); + const V sum2 = Add(l2, r2); + const V mul2 = MulAdd(sum2, wh2, mul1); + return mul2; + } + + // Requires kRadius valid pixels before/after pos. + static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos, + const V wh0, const V wh1, + const V wh2) { + const D d; + const V c = LoadU(d, pos); + const V mul0 = Mul(c, wh0); + + // Loading anew is faster than combining vectors. + const V l1 = LoadU(d, pos - 1); + const V r1 = LoadU(d, pos + 1); + const V l2 = LoadU(d, pos - 2); + const V r2 = LoadU(d, pos + 2); + // Sum of pixels with Manhattan distance i, multiplied by weights[i]. + const V sum1 = Add(l1, r1); + const V mul1 = MulAdd(sum1, wh1, mul0); + const V sum2 = Add(l2, r2); + const V mul2 = MulAdd(sum2, wh2, mul1); + return mul2; + } +}; + +void Separable5(const ImageF& in, const Rect& rect, + const WeightsSeparable5& weights, ThreadPool* pool, + ImageF* out) { + using Conv = ConvolveT; + if (rect.xsize() >= Conv::MinWidth()) { + return Conv::Run(in, rect, weights, pool, out); + } + + return SlowSeparable5(in, rect, weights, pool, out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(Separable5); +void Separable5(const ImageF& in, const Rect& rect, + const WeightsSeparable5& weights, ThreadPool* pool, + ImageF* out) { + return HWY_DYNAMIC_DISPATCH(Separable5)(in, rect, weights, pool, out); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_separable7.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_separable7.cc new file mode 100644 index 0000000000..086dfd22b5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve_separable7.cc @@ -0,0 +1,285 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/convolve_separable7.cc" +#include +#include + +#include "lib/jxl/convolve-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Vec; + +// 7x7 convolution by separable kernel with a single scan through the input. +// Extended version of Separable5, see documentation there. +class Separable7Strategy { + using D = HWY_CAPPED(float, 16); + using V = Vec; + + public: + static constexpr int64_t kRadius = 3; + + template + static JXL_MAYBE_INLINE void ConvolveRow( + const float* const JXL_RESTRICT row_m, const size_t xsize, + const int64_t stride, const WrapRow& wrap_row, + const WeightsSeparable7& weights, float* const JXL_RESTRICT row_out) { + const D d; + const int64_t neg_stride = -stride; // allows LEA addressing. + const float* const JXL_RESTRICT row_t3 = + wrap_row(row_m + 3 * neg_stride, stride); + const float* const JXL_RESTRICT row_t2 = + wrap_row(row_m + 2 * neg_stride, stride); + const float* const JXL_RESTRICT row_t1 = + wrap_row(row_m + 1 * neg_stride, stride); + const float* const JXL_RESTRICT row_b1 = + wrap_row(row_m + 1 * stride, stride); + const float* const JXL_RESTRICT row_b2 = + wrap_row(row_m + 2 * stride, stride); + const float* const JXL_RESTRICT row_b3 = + wrap_row(row_m + 3 * stride, stride); + + const V wh0 = LoadDup128(d, weights.horz + 0 * 4); + const V wh1 = LoadDup128(d, weights.horz + 1 * 4); + const V wh2 = LoadDup128(d, weights.horz + 2 * 4); + const V wh3 = LoadDup128(d, weights.horz + 3 * 4); + const V wv0 = LoadDup128(d, weights.vert + 0 * 4); + const V wv1 = LoadDup128(d, weights.vert + 1 * 4); + const V wv2 = LoadDup128(d, weights.vert + 2 * 4); + const V wv3 = LoadDup128(d, weights.vert + 3 * 4); + + size_t x = 0; + + // More than one iteration for scalars. + for (; x < kRadius; x += Lanes(d)) { + const V conv0 = + Mul(HorzConvolveFirst(row_m, x, xsize, wh0, wh1, wh2, wh3), wv0); + + const V conv1t = HorzConvolveFirst(row_t1, x, xsize, wh0, wh1, wh2, wh3); + const V conv1b = HorzConvolveFirst(row_b1, x, xsize, wh0, wh1, wh2, wh3); + const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0); + + const V conv2t = HorzConvolveFirst(row_t2, x, xsize, wh0, wh1, wh2, wh3); + const V conv2b = HorzConvolveFirst(row_b2, x, xsize, wh0, wh1, wh2, wh3); + const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1); + + const V conv3t = HorzConvolveFirst(row_t3, x, xsize, wh0, wh1, wh2, wh3); + const V conv3b = HorzConvolveFirst(row_b3, x, xsize, wh0, wh1, wh2, wh3); + const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2); + + Store(conv3, d, row_out + x); + } + + // Main loop: load inputs without padding + for (; x + Lanes(d) + kRadius <= xsize; x += Lanes(d)) { + const V conv0 = Mul(HorzConvolve(row_m + x, wh0, wh1, wh2, wh3), wv0); + + const V conv1t = HorzConvolve(row_t1 + x, wh0, wh1, wh2, wh3); + const V conv1b = HorzConvolve(row_b1 + x, wh0, wh1, wh2, wh3); + const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0); + + const V conv2t = HorzConvolve(row_t2 + x, wh0, wh1, wh2, wh3); + const V conv2b = HorzConvolve(row_b2 + x, wh0, wh1, wh2, wh3); + const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1); + + const V conv3t = HorzConvolve(row_t3 + x, wh0, wh1, wh2, wh3); + const V conv3b = HorzConvolve(row_b3 + x, wh0, wh1, wh2, wh3); + const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2); + + Store(conv3, d, row_out + x); + } + + // Last full vector to write (the above loop handled mod >= kRadius) +#if HWY_TARGET == HWY_SCALAR + while (x < xsize) { +#else + if (kSizeModN < kRadius) { +#endif + const V conv0 = + Mul(HorzConvolveLast(row_m, x, xsize, wh0, wh1, wh2, wh3), + wv0); + + const V conv1t = + HorzConvolveLast(row_t1, x, xsize, wh0, wh1, wh2, wh3); + const V conv1b = + HorzConvolveLast(row_b1, x, xsize, wh0, wh1, wh2, wh3); + const V conv1 = MulAdd(Add(conv1t, conv1b), wv1, conv0); + + const V conv2t = + HorzConvolveLast(row_t2, x, xsize, wh0, wh1, wh2, wh3); + const V conv2b = + HorzConvolveLast(row_b2, x, xsize, wh0, wh1, wh2, wh3); + const V conv2 = MulAdd(Add(conv2t, conv2b), wv2, conv1); + + const V conv3t = + HorzConvolveLast(row_t3, x, xsize, wh0, wh1, wh2, wh3); + const V conv3b = + HorzConvolveLast(row_b3, x, xsize, wh0, wh1, wh2, wh3); + const V conv3 = MulAdd(Add(conv3t, conv3b), wv3, conv2); + + Store(conv3, d, row_out + x); + x += Lanes(d); + } + + // If mod = 0, the above vector was the last. + if (kSizeModN != 0) { + for (; x < xsize; ++x) { + float mul = 0.0f; + for (int64_t dy = -kRadius; dy <= kRadius; ++dy) { + const float wy = weights.vert[std::abs(dy) * 4]; + const float* clamped_row = wrap_row(row_m + dy * stride, stride); + for (int64_t dx = -kRadius; dx <= kRadius; ++dx) { + const float wx = weights.horz[std::abs(dx) * 4]; + const int64_t clamped_x = Mirror(x + dx, xsize); + mul += clamped_row[clamped_x] * wx * wy; + } + } + row_out[x] = mul; + } + } + } + + private: + // Same as HorzConvolve for the first/last vector in a row. + static JXL_MAYBE_INLINE V HorzConvolveFirst( + const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize, + const V wh0, const V wh1, const V wh2, const V wh3) { + const D d; + const V c = LoadU(d, row + x); + const V mul0 = Mul(c, wh0); + +#if HWY_TARGET == HWY_SCALAR + const V l1 = LoadU(d, row + Mirror(x - 1, xsize)); + const V l2 = LoadU(d, row + Mirror(x - 2, xsize)); + const V l3 = LoadU(d, row + Mirror(x - 3, xsize)); +#else + (void)xsize; + const V l1 = Neighbors::FirstL1(c); + const V l2 = Neighbors::FirstL2(c); + const V l3 = Neighbors::FirstL3(c); +#endif + + const V r1 = LoadU(d, row + x + 1); + const V r2 = LoadU(d, row + x + 2); + const V r3 = LoadU(d, row + x + 3); + + const V mul1 = MulAdd(Add(l1, r1), wh1, mul0); + const V mul2 = MulAdd(Add(l2, r2), wh2, mul1); + const V mul3 = MulAdd(Add(l3, r3), wh3, mul2); + return mul3; + } + + template + static JXL_MAYBE_INLINE V HorzConvolveLast( + const float* const JXL_RESTRICT row, const int64_t x, const int64_t xsize, + const V wh0, const V wh1, const V wh2, const V wh3) { + const D d; + const V c = LoadU(d, row + x); + const V mul0 = Mul(c, wh0); + + const V l1 = LoadU(d, row + x - 1); + const V l2 = LoadU(d, row + x - 2); + const V l3 = LoadU(d, row + x - 3); + + V r1, r2, r3; +#if HWY_TARGET == HWY_SCALAR + r1 = LoadU(d, row + Mirror(x + 1, xsize)); + r2 = LoadU(d, row + Mirror(x + 2, xsize)); + r3 = LoadU(d, row + Mirror(x + 3, xsize)); +#else + const size_t N = Lanes(d); + if (kSizeModN == 0) { + r3 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 3))); + r2 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 2))); + r1 = TableLookupLanes(c, SetTableIndices(d, MirrorLanes(N - 1))); + } else if (kSizeModN == 1) { + const auto last = LoadU(d, row + xsize - N); + r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 2))); + r2 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1))); + r1 = last; + } else /* kSizeModN >= 2 */ { + const auto last = LoadU(d, row + xsize - N); + r3 = TableLookupLanes(last, SetTableIndices(d, MirrorLanes(N - 1))); + r2 = last; + r1 = LoadU(d, row + x + 1); + } +#endif + + // Sum of pixels with Manhattan distance i, multiplied by weights[i]. + const V sum1 = Add(l1, r1); + const V mul1 = MulAdd(sum1, wh1, mul0); + const V sum2 = Add(l2, r2); + const V mul2 = MulAdd(sum2, wh2, mul1); + const V sum3 = Add(l3, r3); + const V mul3 = MulAdd(sum3, wh3, mul2); + return mul3; + } + + // Returns one vector of horizontal convolution results; lane i is the result + // for pixel pos + i. This is the fast path for interior pixels, i.e. kRadius + // valid pixels before/after pos. + static JXL_MAYBE_INLINE V HorzConvolve(const float* const JXL_RESTRICT pos, + const V wh0, const V wh1, const V wh2, + const V wh3) { + const D d; + const V c = LoadU(d, pos); + const V mul0 = Mul(c, wh0); + + // TODO(janwas): better to Combine + const V l1 = LoadU(d, pos - 1); + const V r1 = LoadU(d, pos + 1); + const V l2 = LoadU(d, pos - 2); + const V r2 = LoadU(d, pos + 2); + const V l3 = LoadU(d, pos - 3); + const V r3 = LoadU(d, pos + 3); + // Sum of pixels with Manhattan distance i, multiplied by weights[i]. + const V sum1 = Add(l1, r1); + const V mul1 = MulAdd(sum1, wh1, mul0); + const V sum2 = Add(l2, r2); + const V mul2 = MulAdd(sum2, wh2, mul1); + const V sum3 = Add(l3, r3); + const V mul3 = MulAdd(sum3, wh3, mul2); + return mul3; + } +}; + +void Separable7(const ImageF& in, const Rect& rect, + const WeightsSeparable7& weights, ThreadPool* pool, + ImageF* out) { + using Conv = ConvolveT; + if (rect.xsize() >= Conv::MinWidth()) { + return Conv::Run(in, rect, weights, pool, out); + } + + return SlowSeparable7(in, rect, weights, pool, out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(Separable7); +void Separable7(const ImageF& in, const Rect& rect, + const WeightsSeparable7& weights, ThreadPool* pool, + ImageF* out) { + return HWY_DYNAMIC_DISPATCH(Separable7)(in, rect, weights, pool, out); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_slow.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_slow.cc new file mode 100644 index 0000000000..91e11dcfd5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve_slow.cc @@ -0,0 +1,208 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#include "lib/jxl/convolve-inl.h" + +namespace jxl { + +//------------------------------------------------------------------------------ +// Kernels + +// 4 instances of a given literal value, useful as input to LoadDup128. +#define JXL_REP4(literal) literal, literal, literal, literal + +// Concentrates energy in low-frequency components (e.g. for antialiasing). +const WeightsSymmetric3& WeightsSymmetric3Lowpass() { + // Computed by research/convolve_weights.py's cubic spline approximations of + // prolate spheroidal wave functions. + constexpr float w0 = 0.36208932f; + constexpr float w1 = 0.12820096f; + constexpr float w2 = 0.03127668f; + static constexpr WeightsSymmetric3 weights = { + {JXL_REP4(w0)}, {JXL_REP4(w1)}, {JXL_REP4(w2)}}; + return weights; +} + +const WeightsSeparable5& WeightsSeparable5Lowpass() { + constexpr float w0 = 0.41714928f; + constexpr float w1 = 0.25539268f; + constexpr float w2 = 0.03603267f; + static constexpr WeightsSeparable5 weights = { + {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, + {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; + return weights; +} + +const WeightsSymmetric5& WeightsSymmetric5Lowpass() { + static constexpr WeightsSymmetric5 weights = { + {JXL_REP4(0.1740135f)}, {JXL_REP4(0.1065369f)}, {JXL_REP4(0.0150310f)}, + {JXL_REP4(0.0652254f)}, {JXL_REP4(0.0012984f)}, {JXL_REP4(0.0092025f)}}; + return weights; +} + +const WeightsSeparable5& WeightsSeparable5Gaussian1() { + constexpr float w0 = 0.38774f; + constexpr float w1 = 0.24477f; + constexpr float w2 = 0.06136f; + static constexpr WeightsSeparable5 weights = { + {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, + {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; + return weights; +} + +const WeightsSeparable5& WeightsSeparable5Gaussian2() { + constexpr float w0 = 0.250301f; + constexpr float w1 = 0.221461f; + constexpr float w2 = 0.153388f; + static constexpr WeightsSeparable5 weights = { + {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}, + {JXL_REP4(w0), JXL_REP4(w1), JXL_REP4(w2)}}; + return weights; +} + +#undef JXL_REP4 + +//------------------------------------------------------------------------------ +// Slow + +namespace { + +template +float SlowSymmetric3Pixel(const ImageF& in, const int64_t ix, const int64_t iy, + const int64_t xsize, const int64_t ysize, + const WeightsSymmetric3& weights) { + float sum = 0.0f; + + // ix: image; kx: kernel + for (int64_t ky = -1; ky <= 1; ky++) { + const int64_t y = WrapY()(iy + ky, ysize); + const float* JXL_RESTRICT row_in = in.ConstRow(static_cast(y)); + + const float wc = ky == 0 ? weights.c[0] : weights.r[0]; + const float wlr = ky == 0 ? weights.r[0] : weights.d[0]; + + const int64_t xm1 = WrapX()(ix - 1, xsize); + const int64_t xp1 = WrapX()(ix + 1, xsize); + sum += row_in[ix] * wc + (row_in[xm1] + row_in[xp1]) * wlr; + } + return sum; +} + +template +void SlowSymmetric3Row(const ImageF& in, const int64_t iy, const int64_t xsize, + const int64_t ysize, const WeightsSymmetric3& weights, + float* JXL_RESTRICT row_out) { + row_out[0] = + SlowSymmetric3Pixel(in, 0, iy, xsize, ysize, weights); + for (int64_t ix = 1; ix < xsize - 1; ix++) { + row_out[ix] = SlowSymmetric3Pixel(in, ix, iy, xsize, + ysize, weights); + } + { + const int64_t ix = xsize - 1; + row_out[ix] = SlowSymmetric3Pixel(in, ix, iy, xsize, + ysize, weights); + } +} + +} // namespace + +void SlowSymmetric3(const ImageF& in, const Rect& rect, + const WeightsSymmetric3& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out) { + const int64_t xsize = static_cast(rect.xsize()); + const int64_t ysize = static_cast(rect.ysize()); + const int64_t kRadius = 1; + + JXL_CHECK(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t iy = task; + float* JXL_RESTRICT out_row = out->Row(static_cast(iy)); + + if (iy < kRadius || iy >= ysize - kRadius) { + SlowSymmetric3Row(in, iy, xsize, ysize, weights, out_row); + } else { + SlowSymmetric3Row(in, iy, xsize, ysize, weights, + out_row); + } + }, + "SlowSymmetric3")); +} + +namespace { + +// Separable kernels, any radius. +float SlowSeparablePixel(const ImageF& in, const Rect& rect, const int64_t x, + const int64_t y, const int64_t radius, + const float* JXL_RESTRICT horz_weights, + const float* JXL_RESTRICT vert_weights) { + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + const WrapMirror wrap; + + float mul = 0.0f; + for (int dy = -radius; dy <= radius; ++dy) { + const float wy = vert_weights[std::abs(dy) * 4]; + const size_t sy = wrap(y + dy, ysize); + JXL_CHECK(sy < ysize); + const float* const JXL_RESTRICT row = rect.ConstRow(in, sy); + for (int dx = -radius; dx <= radius; ++dx) { + const float wx = horz_weights[std::abs(dx) * 4]; + const size_t sx = wrap(x + dx, xsize); + JXL_CHECK(sx < xsize); + mul += row[sx] * wx * wy; + } + } + return mul; +} + +} // namespace + +void SlowSeparable5(const ImageF& in, const Rect& rect, + const WeightsSeparable5& weights, ThreadPool* pool, + ImageF* out) { + const float* horz_weights = &weights.horz[0]; + const float* vert_weights = &weights.vert[0]; + + const size_t ysize = rect.ysize(); + JXL_CHECK(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + + float* const JXL_RESTRICT row_out = out->Row(y); + for (size_t x = 0; x < rect.xsize(); ++x) { + row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/2, + horz_weights, vert_weights); + } + }, + "SlowSeparable5")); +} + +void SlowSeparable7(const ImageF& in, const Rect& rect, + const WeightsSeparable7& weights, ThreadPool* pool, + ImageF* out) { + const float* horz_weights = &weights.horz[0]; + const float* vert_weights = &weights.vert[0]; + + const size_t ysize = rect.ysize(); + JXL_CHECK(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + + float* const JXL_RESTRICT row_out = out->Row(y); + for (size_t x = 0; x < rect.xsize(); ++x) { + row_out[x] = SlowSeparablePixel(in, rect, x, y, /*radius=*/3, + horz_weights, vert_weights); + } + }, + "SlowSeparable7")); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric3.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric3.cc new file mode 100644 index 0000000000..06b59dfb60 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric3.cc @@ -0,0 +1,194 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric3.cc" +#include +#include + +#include "lib/jxl/convolve-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Vec; + +template +static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix, + const int64_t iy, const size_t ysize, const V wx0, + const V wx1, const V wx2) { + const HWY_FULL(float) d; + const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix; + const auto in_m2 = LoadU(d, center - 2); + const auto in_p2 = LoadU(d, center + 2); + const auto in_m1 = LoadU(d, center - 1); + const auto in_p1 = LoadU(d, center + 1); + const auto in_00 = Load(d, center); + const auto sum_2 = Mul(wx2, Add(in_m2, in_p2)); + const auto sum_1 = Mul(wx1, Add(in_m1, in_p1)); + const auto sum_0 = Mul(wx0, in_00); + return Add(sum_2, Add(sum_1, sum_0)); +} + +// 3x3 convolution by symmetric kernel with a single scan through the input. +class Symmetric3Strategy { + using D = HWY_CAPPED(float, 16); + using V = Vec; + + public: + static constexpr int64_t kRadius = 1; + + // Only accesses pixels in [0, xsize). + template + static JXL_MAYBE_INLINE void ConvolveRow( + const float* const JXL_RESTRICT row_m, const size_t xsize, + const int64_t stride, const WrapRow& wrap_row, + const WeightsSymmetric3& weights, float* const JXL_RESTRICT row_out) { + const D d; + // t, m, b = top, middle, bottom row; + const float* const JXL_RESTRICT row_t = wrap_row(row_m - stride, stride); + const float* const JXL_RESTRICT row_b = wrap_row(row_m + stride, stride); + + // Must load in advance - compiler doesn't understand LoadDup128 and + // schedules them too late. + const V w0 = LoadDup128(d, weights.c); + const V w1 = LoadDup128(d, weights.r); + const V w2 = LoadDup128(d, weights.d); + + // l, c, r = left, center, right. Leftmost vector: need FirstL1. + { + const V tc = LoadU(d, row_t + 0); + const V mc = LoadU(d, row_m + 0); + const V bc = LoadU(d, row_b + 0); + const V tl = Neighbors::FirstL1(tc); + const V tr = LoadU(d, row_t + 0 + 1); + const V ml = Neighbors::FirstL1(mc); + const V mr = LoadU(d, row_m + 0 + 1); + const V bl = Neighbors::FirstL1(bc); + const V br = LoadU(d, row_b + 0 + 1); + const V conv = + WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2); + Store(conv, d, row_out + 0); + } + + // Loop as long as we can load enough new values: + const size_t N = Lanes(d); + size_t x = N; + for (; x + N + kRadius <= xsize; x += N) { + const auto conv = ConvolveValid(row_t, row_m, row_b, x, w0, w1, w2); + Store(conv, d, row_out + x); + } + + // For final (partial) vector: + const V tc = LoadU(d, row_t + x); + const V mc = LoadU(d, row_m + x); + const V bc = LoadU(d, row_b + x); + + V tr, mr, br; +#if HWY_TARGET == HWY_SCALAR + tr = tc; // Single-lane => mirrored right neighbor = center value. + mr = mc; + br = bc; +#else + if (kSizeModN == 0) { + // The above loop didn't handle the last vector because it needs an + // additional right neighbor (generated via mirroring). + auto mirror = SetTableIndices(d, MirrorLanes(N - 1)); + tr = TableLookupLanes(tc, mirror); + mr = TableLookupLanes(mc, mirror); + br = TableLookupLanes(bc, mirror); + } else { + auto mirror = SetTableIndices(d, MirrorLanes((xsize % N) - 1)); + // Loads last valid value into uppermost lane and mirrors. + tr = TableLookupLanes(LoadU(d, row_t + xsize - N), mirror); + mr = TableLookupLanes(LoadU(d, row_m + xsize - N), mirror); + br = TableLookupLanes(LoadU(d, row_b + xsize - N), mirror); + } +#endif + + const V tl = LoadU(d, row_t + x - 1); + const V ml = LoadU(d, row_m + x - 1); + const V bl = LoadU(d, row_b + x - 1); + const V conv = WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2); + Store(conv, d, row_out + x); + } + + private: + // Returns sum{x_i * w_i}. + template + static JXL_MAYBE_INLINE V WeightedSum(const V tl, const V tc, const V tr, + const V ml, const V mc, const V mr, + const V bl, const V bc, const V br, + const V w0, const V w1, const V w2) { + const V sum_tb = Add(tc, bc); + + // Faster than 5 mul + 4 FMA. + const V mul0 = Mul(mc, w0); + const V sum_lr = Add(ml, mr); + + const V x1 = Add(sum_tb, sum_lr); + const V mul1 = MulAdd(x1, w1, mul0); + + const V sum_t2 = Add(tl, tr); + const V sum_b2 = Add(bl, br); + const V x2 = Add(sum_t2, sum_b2); + const V mul2 = MulAdd(x2, w2, mul1); + return mul2; + } + + static JXL_MAYBE_INLINE V ConvolveValid(const float* JXL_RESTRICT row_t, + const float* JXL_RESTRICT row_m, + const float* JXL_RESTRICT row_b, + const int64_t x, const V w0, + const V w1, const V w2) { + const D d; + const V tc = LoadU(d, row_t + x); + const V mc = LoadU(d, row_m + x); + const V bc = LoadU(d, row_b + x); + const V tl = LoadU(d, row_t + x - 1); + const V tr = LoadU(d, row_t + x + 1); + const V ml = LoadU(d, row_m + x - 1); + const V mr = LoadU(d, row_m + x + 1); + const V bl = LoadU(d, row_b + x - 1); + const V br = LoadU(d, row_b + x + 1); + return WeightedSum(tl, tc, tr, ml, mc, mr, bl, bc, br, w0, w1, w2); + } +}; + +void Symmetric3(const ImageF& in, const Rect& rect, + const WeightsSymmetric3& weights, ThreadPool* pool, + ImageF* out) { + using Conv = ConvolveT; + if (rect.xsize() >= Conv::MinWidth()) { + return Conv::Run(in, rect, weights, pool, out); + } + + return SlowSymmetric3(in, rect, weights, pool, out); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(Symmetric3); +void Symmetric3(const ImageF& in, const Rect& rect, + const WeightsSymmetric3& weights, ThreadPool* pool, + ImageF* out) { + return HWY_DYNAMIC_DISPATCH(Symmetric3)(in, rect, weights, pool, out); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric5.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric5.cc new file mode 100644 index 0000000000..3c46024e72 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve_symmetric5.cc @@ -0,0 +1,183 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/convolve_symmetric5.cc" +#include +#include + +#include "lib/jxl/common.h" // RoundUpTo +#include "lib/jxl/convolve-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Vec; + +// Weighted sum of 1x5 pixels around ix, iy with [wx2 wx1 wx0 wx1 wx2]. +template +static float WeightedSumBorder(const ImageF& in, const WrapY wrap_y, + const int64_t ix, const int64_t iy, + const size_t xsize, const size_t ysize, + const float wx0, const float wx1, + const float wx2) { + const WrapMirror wrap_x; + const float* JXL_RESTRICT row = in.ConstRow(wrap_y(iy, ysize)); + const float in_m2 = row[wrap_x(ix - 2, xsize)]; + const float in_p2 = row[wrap_x(ix + 2, xsize)]; + const float in_m1 = row[wrap_x(ix - 1, xsize)]; + const float in_p1 = row[wrap_x(ix + 1, xsize)]; + const float in_00 = row[ix]; + const float sum_2 = wx2 * (in_m2 + in_p2); + const float sum_1 = wx1 * (in_m1 + in_p1); + const float sum_0 = wx0 * in_00; + return sum_2 + sum_1 + sum_0; +} + +template +static V WeightedSum(const ImageF& in, const WrapY wrap_y, const size_t ix, + const int64_t iy, const size_t ysize, const V wx0, + const V wx1, const V wx2) { + const HWY_FULL(float) d; + const float* JXL_RESTRICT center = in.ConstRow(wrap_y(iy, ysize)) + ix; + const auto in_m2 = LoadU(d, center - 2); + const auto in_p2 = LoadU(d, center + 2); + const auto in_m1 = LoadU(d, center - 1); + const auto in_p1 = LoadU(d, center + 1); + const auto in_00 = Load(d, center); + const auto sum_2 = Mul(wx2, Add(in_m2, in_p2)); + const auto sum_1 = Mul(wx1, Add(in_m1, in_p1)); + const auto sum_0 = Mul(wx0, in_00); + return Add(sum_2, Add(sum_1, sum_0)); +} + +// Produces result for one pixel +template +float Symmetric5Border(const ImageF& in, const Rect& rect, const int64_t ix, + const int64_t iy, const WeightsSymmetric5& weights) { + const float w0 = weights.c[0]; + const float w1 = weights.r[0]; + const float w2 = weights.R[0]; + const float w4 = weights.d[0]; + const float w5 = weights.L[0]; + const float w8 = weights.D[0]; + + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + const WrapY wrap_y; + // Unrolled loop over all 5 rows of the kernel. + float sum0 = WeightedSumBorder(in, wrap_y, ix, iy, xsize, ysize, w0, w1, w2); + + sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 2, xsize, ysize, w2, w5, w8); + float sum1 = + WeightedSumBorder(in, wrap_y, ix, iy + 2, xsize, ysize, w2, w5, w8); + + sum0 += WeightedSumBorder(in, wrap_y, ix, iy - 1, xsize, ysize, w1, w4, w5); + sum1 += WeightedSumBorder(in, wrap_y, ix, iy + 1, xsize, ysize, w1, w4, w5); + + return sum0 + sum1; +} + +// Produces result for one vector's worth of pixels +template +static void Symmetric5Interior(const ImageF& in, const Rect& rect, + const int64_t ix, const int64_t iy, + const WeightsSymmetric5& weights, + float* JXL_RESTRICT row_out) { + const HWY_FULL(float) d; + + const auto w0 = LoadDup128(d, weights.c); + const auto w1 = LoadDup128(d, weights.r); + const auto w2 = LoadDup128(d, weights.R); + const auto w4 = LoadDup128(d, weights.d); + const auto w5 = LoadDup128(d, weights.L); + const auto w8 = LoadDup128(d, weights.D); + + const size_t ysize = rect.ysize(); + const WrapY wrap_y; + // Unrolled loop over all 5 rows of the kernel. + auto sum0 = WeightedSum(in, wrap_y, ix, iy, ysize, w0, w1, w2); + + sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 2, ysize, w2, w5, w8)); + auto sum1 = WeightedSum(in, wrap_y, ix, iy + 2, ysize, w2, w5, w8); + + sum0 = Add(sum0, WeightedSum(in, wrap_y, ix, iy - 1, ysize, w1, w4, w5)); + sum1 = Add(sum1, WeightedSum(in, wrap_y, ix, iy + 1, ysize, w1, w4, w5)); + + Store(Add(sum0, sum1), d, row_out + ix); +} + +template +static void Symmetric5Row(const ImageF& in, const Rect& rect, const int64_t iy, + const WeightsSymmetric5& weights, + float* JXL_RESTRICT row_out) { + const int64_t kRadius = 2; + const size_t xsize = rect.xsize(); + + size_t ix = 0; + const HWY_FULL(float) d; + const size_t N = Lanes(d); + const size_t aligned_x = RoundUpTo(kRadius, N); + for (; ix < std::min(aligned_x, xsize); ++ix) { + row_out[ix] = Symmetric5Border(in, rect, ix, iy, weights); + } + for (; ix + N + kRadius <= xsize; ix += N) { + Symmetric5Interior(in, rect, ix, iy, weights, row_out); + } + for (; ix < xsize; ++ix) { + row_out[ix] = Symmetric5Border(in, rect, ix, iy, weights); + } +} + +static JXL_NOINLINE void Symmetric5BorderRow(const ImageF& in, const Rect& rect, + const int64_t iy, + const WeightsSymmetric5& weights, + float* JXL_RESTRICT row_out) { + return Symmetric5Row(in, rect, iy, weights, row_out); +} + +// Semi-vectorized (interior pixels Fonly); called directly like slow::, unlike +// the fully vectorized strategies below. +void Symmetric5(const ImageF& in, const Rect& rect, + const WeightsSymmetric5& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out) { + const size_t ysize = rect.ysize(); + JXL_CHECK(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t iy = task; + + if (iy < 2 || iy >= static_cast(ysize) - 2) { + Symmetric5BorderRow(in, rect, iy, weights, out->Row(iy)); + } else { + Symmetric5Row(in, rect, iy, weights, out->Row(iy)); + } + }, + "Symmetric5x5Convolution")); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(Symmetric5); +void Symmetric5(const ImageF& in, const Rect& rect, + const WeightsSymmetric5& weights, ThreadPool* pool, + ImageF* JXL_RESTRICT out) { + return HWY_DYNAMIC_DISPATCH(Symmetric5)(in, rect, weights, pool, out); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/convolve_test.cc b/third-party/libjxl/libjxl/lib/jxl/convolve_test.cc new file mode 100644 index 0000000000..e86d637114 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/convolve_test.cc @@ -0,0 +1,252 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/convolve.h" + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/convolve_test.cc" +#include +#include +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +#ifndef JXL_DEBUG_CONVOLVE +#define JXL_DEBUG_CONVOLVE 0 +#endif + +#include "lib/jxl/convolve-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +void TestNeighbors() { + const Neighbors::D d; + const Neighbors::V v = Iota(d, 0); + HWY_ALIGN float actual[hwy::kTestMaxVectorSize / sizeof(float)] = {0}; + + HWY_ALIGN float first_l1[hwy::kTestMaxVectorSize / sizeof(float)] = { + 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14}; + Store(Neighbors::FirstL1(v), d, actual); + const size_t N = Lanes(d); + EXPECT_EQ(std::vector(first_l1, first_l1 + N), + std::vector(actual, actual + N)); + +#if HWY_TARGET != HWY_SCALAR + HWY_ALIGN float first_l2[hwy::kTestMaxVectorSize / sizeof(float)] = { + 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}; + Store(Neighbors::FirstL2(v), d, actual); + EXPECT_EQ(std::vector(first_l2, first_l2 + N), + std::vector(actual, actual + N)); + + HWY_ALIGN float first_l3[hwy::kTestMaxVectorSize / sizeof(float)] = { + 2, 1, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + Store(Neighbors::FirstL3(v), d, actual); + EXPECT_EQ(std::vector(first_l3, first_l3 + N), + std::vector(actual, actual + N)); +#endif // HWY_TARGET != HWY_SCALAR +} + +void VerifySymmetric3(const size_t xsize, const size_t ysize, ThreadPool* pool, + Rng* rng) { + const Rect rect(0, 0, xsize, ysize); + + ImageF in(xsize, ysize); + GenerateImage(*rng, &in, 0.0f, 1.0f); + + ImageF out_expected(xsize, ysize); + ImageF out_actual(xsize, ysize); + + const WeightsSymmetric3& weights = WeightsSymmetric3Lowpass(); + Symmetric3(in, rect, weights, pool, &out_expected); + SlowSymmetric3(in, rect, weights, pool, &out_actual); + + JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); +} + +// Ensures Symmetric and Separable give the same result. +void VerifySymmetric5(const size_t xsize, const size_t ysize, ThreadPool* pool, + Rng* rng) { + const Rect rect(0, 0, xsize, ysize); + + ImageF in(xsize, ysize); + GenerateImage(*rng, &in, 0.0f, 1.0f); + + ImageF out_expected(xsize, ysize); + ImageF out_actual(xsize, ysize); + + Separable5(in, Rect(in), WeightsSeparable5Lowpass(), pool, &out_expected); + Symmetric5(in, rect, WeightsSymmetric5Lowpass(), pool, &out_actual); + + JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); +} + +void VerifySeparable5(const size_t xsize, const size_t ysize, ThreadPool* pool, + Rng* rng) { + const Rect rect(0, 0, xsize, ysize); + + ImageF in(xsize, ysize); + GenerateImage(*rng, &in, 0.0f, 1.0f); + + ImageF out_expected(xsize, ysize); + ImageF out_actual(xsize, ysize); + + const WeightsSeparable5& weights = WeightsSeparable5Lowpass(); + Separable5(in, Rect(in), weights, pool, &out_expected); + SlowSeparable5(in, rect, weights, pool, &out_actual); + + JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); +} + +void VerifySeparable7(const size_t xsize, const size_t ysize, ThreadPool* pool, + Rng* rng) { + const Rect rect(0, 0, xsize, ysize); + + ImageF in(xsize, ysize); + GenerateImage(*rng, &in, 0.0f, 1.0f); + + ImageF out_expected(xsize, ysize); + ImageF out_actual(xsize, ysize); + + // Gaussian sigma 1.0 + const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f), + HWY_REP4(0.060626f), HWY_REP4(0.00598f)}, + {HWY_REP4(0.383103f), HWY_REP4(0.241843f), + HWY_REP4(0.060626f), HWY_REP4(0.00598f)}}; + + SlowSeparable7(in, rect, weights, pool, &out_expected); + Separable7(in, Rect(in), weights, pool, &out_actual); + + JXL_ASSERT_OK(VerifyRelativeError(out_expected, out_actual, 1E-5f, 1E-5f, _)); +} + +// For all xsize/ysize and kernels: +void TestConvolve() { + TestNeighbors(); + + test::ThreadPoolForTests pool(4); + EXPECT_EQ(true, + RunOnPool( + &pool, kConvolveMaxRadius, 40, ThreadPool::NoInit, + [](const uint32_t task, size_t /*thread*/) { + const size_t xsize = task; + Rng rng(129 + 13 * xsize); + + ThreadPool* null_pool = nullptr; + test::ThreadPoolForTests pool3(3); + for (size_t ysize = kConvolveMaxRadius; ysize < 16; ++ysize) { + JXL_DEBUG(JXL_DEBUG_CONVOLVE, + "%" PRIuS " x %" PRIuS " (target %" PRIx64 + ")===============================", + xsize, ysize, static_cast(HWY_TARGET)); + + JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym3------------------"); + VerifySymmetric3(xsize, ysize, null_pool, &rng); + VerifySymmetric3(xsize, ysize, &pool3, &rng); + + JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sym5------------------"); + VerifySymmetric5(xsize, ysize, null_pool, &rng); + VerifySymmetric5(xsize, ysize, &pool3, &rng); + + JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep5------------------"); + VerifySeparable5(xsize, ysize, null_pool, &rng); + VerifySeparable5(xsize, ysize, &pool3, &rng); + + JXL_DEBUG(JXL_DEBUG_CONVOLVE, "Sep7------------------"); + VerifySeparable7(xsize, ysize, null_pool, &rng); + VerifySeparable7(xsize, ysize, &pool3, &rng); + } + }, + "TestConvolve")); +} + +// Measures durations, verifies results, prints timings. `unpredictable1` +// must have value 1 (unknown to the compiler to prevent elision). +template +void BenchmarkConv(const char* caption, const Conv& conv, + const hwy::FuncInput unpredictable1) { + const size_t kNumInputs = 1; + const hwy::FuncInput inputs[kNumInputs] = {unpredictable1}; + hwy::Result results[kNumInputs]; + + const size_t kDim = 160; // in+out fit in L2 + ImageF in(kDim, kDim); + ZeroFillImage(&in); + in.Row(kDim / 2)[kDim / 2] = unpredictable1; + ImageF out(kDim, kDim); + + hwy::Params p; + p.verbose = false; + p.max_evals = 7; + p.target_rel_mad = 0.002; + const size_t num_results = MeasureClosure( + [&in, &conv, &out](const hwy::FuncInput input) { + conv(in, &out); + return out.Row(input)[0]; + }, + inputs, kNumInputs, results, p); + if (num_results != kNumInputs) { + fprintf(stderr, "MeasureClosure failed.\n"); + } + for (size_t i = 0; i < num_results; ++i) { + const double seconds = static_cast(results[i].ticks) / + hwy::platform::InvariantTicksPerSecond(); + printf("%12s: %7.2f MP/s (MAD=%4.2f%%)\n", caption, + kDim * kDim * 1E-6 / seconds, + static_cast(results[i].variability) * 100.0); + } +} + +struct ConvSymmetric3 { + void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const { + ThreadPool* null_pool = nullptr; + Symmetric3(in, Rect(in), WeightsSymmetric3Lowpass(), null_pool, out); + } +}; + +struct ConvSeparable5 { + void operator()(const ImageF& in, ImageF* JXL_RESTRICT out) const { + ThreadPool* null_pool = nullptr; + Separable5(in, Rect(in), WeightsSeparable5Lowpass(), null_pool, out); + } +}; + +void BenchmarkAll() { +#if 0 // disabled to avoid test timeouts, run manually on demand + const hwy::FuncInput unpredictable1 = time(nullptr) != 1234; + BenchmarkConv("Symmetric3", ConvSymmetric3(), unpredictable1); + BenchmarkConv("Separable5", ConvSeparable5(), unpredictable1); +#endif +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class ConvolveTest : public hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(ConvolveTest); + +HWY_EXPORT_AND_TEST_P(ConvolveTest, TestConvolve); + +HWY_EXPORT_AND_TEST_P(ConvolveTest, BenchmarkAll); + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/data_parallel_test.cc b/third-party/libjxl/libjxl/lib/jxl/data_parallel_test.cc new file mode 100644 index 0000000000..ee2a97f93a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/data_parallel_test.cc @@ -0,0 +1,87 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/data_parallel.h" + +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +class DataParallelTest : public ::testing::Test { + protected: + // A fake class to verify that DataParallel is properly calling the + // client-provided runner functions. + static int FakeRunner(void* runner_opaque, void* jpegxl_opaque, + JxlParallelRunInit init, JxlParallelRunFunction func, + uint32_t start_range, uint32_t end_range) { + DataParallelTest* self = static_cast(runner_opaque); + self->runner_called_++; + self->jpegxl_opaque_ = jpegxl_opaque; + self->init_ = init; + self->func_ = func; + self->start_range_ = start_range; + self->end_range_ = end_range; + return self->runner_return_; + } + + ThreadPool pool_{&DataParallelTest::FakeRunner, this}; + + // Number of times FakeRunner() was called. + int runner_called_ = 0; + + // Parameters passed to FakeRunner. + void* jpegxl_opaque_ = nullptr; + JxlParallelRunInit init_ = nullptr; + JxlParallelRunFunction func_ = nullptr; + uint32_t start_range_ = -1; + uint32_t end_range_ = -1; + + // Return value that FakeRunner will return. + int runner_return_ = 0; +}; + +// JxlParallelRunInit interface. +typedef int (*JxlParallelRunInit)(); + +} // namespace + +TEST_F(DataParallelTest, RunnerCalledParameters) { + EXPECT_TRUE(pool_.Run( + 1234, 5678, [](size_t /* num_threads */) { return true; }, + [](uint32_t /* task */, size_t /* thread */) { return; })); + EXPECT_EQ(1, runner_called_); + EXPECT_NE(nullptr, init_); + EXPECT_NE(nullptr, func_); + EXPECT_NE(nullptr, jpegxl_opaque_); + EXPECT_EQ(1234u, start_range_); + EXPECT_EQ(5678u, end_range_); +} + +TEST_F(DataParallelTest, RunnerFailurePropagates) { + runner_return_ = -1; // FakeRunner return value. + EXPECT_FALSE(pool_.Run( + 1234, 5678, [](size_t /* num_threads */) { return false; }, + [](uint32_t /* task */, size_t /* thread */) { return; })); + EXPECT_FALSE(RunOnPool( + nullptr, 1234, 5678, [](size_t /* num_threads */) { return false; }, + [](uint32_t /* task */, size_t /* thread */) { return; }, "Test")); +} + +TEST_F(DataParallelTest, RunnerNotCalledOnEmptyRange) { + runner_return_ = -1; // FakeRunner return value. + EXPECT_TRUE(pool_.Run( + 123, 123, [](size_t /* num_threads */) { return false; }, + [](uint32_t /* task */, size_t /* thread */) { return; })); + EXPECT_TRUE(RunOnPool( + nullptr, 123, 123, [](size_t /* num_threads */) { return false; }, + [](uint32_t /* task */, size_t /* thread */) { return; }, "Test")); + // We don't call the external runner when the range is empty. We don't even + // need to call the init function. + EXPECT_EQ(0, runner_called_); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dct-inl.h b/third-party/libjxl/libjxl/lib/jxl/dct-inl.h new file mode 100644 index 0000000000..532606075e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct-inl.h @@ -0,0 +1,334 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Fast SIMD floating-point (I)DCT, any power of two. + +#if defined(LIB_JXL_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_DCT_INL_H_ +#undef LIB_JXL_DCT_INL_H_ +#else +#define LIB_JXL_DCT_INL_H_ +#endif + +#include + +#include + +#include "lib/jxl/dct_block-inl.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/transpose-inl.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulAdd; +using hwy::HWY_NAMESPACE::Sub; + +template +struct FVImpl { + using type = HWY_CAPPED(float, SZ); +}; + +template <> +struct FVImpl<0> { + using type = HWY_FULL(float); +}; + +template +using FV = typename FVImpl::type; + +// Implementation of Lowest Complexity Self Recursive Radix-2 DCT II/III +// Algorithms, by Siriani M. Perera and Jianhua Liu. + +template +struct CoeffBundle { + static void AddReverse(const float* JXL_RESTRICT ain1, + const float* JXL_RESTRICT ain2, + float* JXL_RESTRICT aout) { + for (size_t i = 0; i < N; i++) { + auto in1 = Load(FV(), ain1 + i * SZ); + auto in2 = Load(FV(), ain2 + (N - i - 1) * SZ); + Store(Add(in1, in2), FV(), aout + i * SZ); + } + } + static void SubReverse(const float* JXL_RESTRICT ain1, + const float* JXL_RESTRICT ain2, + float* JXL_RESTRICT aout) { + for (size_t i = 0; i < N; i++) { + auto in1 = Load(FV(), ain1 + i * SZ); + auto in2 = Load(FV(), ain2 + (N - i - 1) * SZ); + Store(Sub(in1, in2), FV(), aout + i * SZ); + } + } + static void B(float* JXL_RESTRICT coeff) { + auto sqrt2 = Set(FV(), kSqrt2); + auto in1 = Load(FV(), coeff); + auto in2 = Load(FV(), coeff + SZ); + Store(MulAdd(in1, sqrt2, in2), FV(), coeff); + for (size_t i = 1; i + 1 < N; i++) { + auto in1 = Load(FV(), coeff + i * SZ); + auto in2 = Load(FV(), coeff + (i + 1) * SZ); + Store(Add(in1, in2), FV(), coeff + i * SZ); + } + } + static void BTranspose(float* JXL_RESTRICT coeff) { + for (size_t i = N - 1; i > 0; i--) { + auto in1 = Load(FV(), coeff + i * SZ); + auto in2 = Load(FV(), coeff + (i - 1) * SZ); + Store(Add(in1, in2), FV(), coeff + i * SZ); + } + auto sqrt2 = Set(FV(), kSqrt2); + auto in1 = Load(FV(), coeff); + Store(Mul(in1, sqrt2), FV(), coeff); + } + // Ideally optimized away by compiler (except the multiply). + static void InverseEvenOdd(const float* JXL_RESTRICT ain, + float* JXL_RESTRICT aout) { + for (size_t i = 0; i < N / 2; i++) { + auto in1 = Load(FV(), ain + i * SZ); + Store(in1, FV(), aout + 2 * i * SZ); + } + for (size_t i = N / 2; i < N; i++) { + auto in1 = Load(FV(), ain + i * SZ); + Store(in1, FV(), aout + (2 * (i - N / 2) + 1) * SZ); + } + } + // Ideally optimized away by compiler. + static void ForwardEvenOdd(const float* JXL_RESTRICT ain, size_t ain_stride, + float* JXL_RESTRICT aout) { + for (size_t i = 0; i < N / 2; i++) { + auto in1 = LoadU(FV(), ain + 2 * i * ain_stride); + Store(in1, FV(), aout + i * SZ); + } + for (size_t i = N / 2; i < N; i++) { + auto in1 = LoadU(FV(), ain + (2 * (i - N / 2) + 1) * ain_stride); + Store(in1, FV(), aout + i * SZ); + } + } + // Invoked on full vector. + static void Multiply(float* JXL_RESTRICT coeff) { + for (size_t i = 0; i < N / 2; i++) { + auto in1 = Load(FV(), coeff + (N / 2 + i) * SZ); + auto mul = Set(FV(), WcMultipliers::kMultipliers[i]); + Store(Mul(in1, mul), FV(), coeff + (N / 2 + i) * SZ); + } + } + static void MultiplyAndAdd(const float* JXL_RESTRICT coeff, + float* JXL_RESTRICT out, size_t out_stride) { + for (size_t i = 0; i < N / 2; i++) { + auto mul = Set(FV(), WcMultipliers::kMultipliers[i]); + auto in1 = Load(FV(), coeff + i * SZ); + auto in2 = Load(FV(), coeff + (N / 2 + i) * SZ); + auto out1 = MulAdd(mul, in2, in1); + auto out2 = NegMulAdd(mul, in2, in1); + StoreU(out1, FV(), out + i * out_stride); + StoreU(out2, FV(), out + (N - i - 1) * out_stride); + } + } + template + static void LoadFromBlock(const Block& in, size_t off, + float* JXL_RESTRICT coeff) { + for (size_t i = 0; i < N; i++) { + Store(in.LoadPart(FV(), i, off), FV(), coeff + i * SZ); + } + } + template + static void StoreToBlockAndScale(const float* JXL_RESTRICT coeff, + const Block& out, size_t off) { + auto mul = Set(FV(), 1.0f / N); + for (size_t i = 0; i < N; i++) { + out.StorePart(FV(), Mul(mul, Load(FV(), coeff + i * SZ)), i, off); + } + } +}; + +template +struct DCT1DImpl; + +template +struct DCT1DImpl<1, SZ> { + JXL_INLINE void operator()(float* JXL_RESTRICT mem) {} +}; + +template +struct DCT1DImpl<2, SZ> { + JXL_INLINE void operator()(float* JXL_RESTRICT mem) { + auto in1 = Load(FV(), mem); + auto in2 = Load(FV(), mem + SZ); + Store(Add(in1, in2), FV(), mem); + Store(Sub(in1, in2), FV(), mem + SZ); + } +}; + +template +struct DCT1DImpl { + void operator()(float* JXL_RESTRICT mem) { + // This is relatively small (4kB with 64-DCT and AVX-512) + HWY_ALIGN float tmp[N * SZ]; + CoeffBundle::AddReverse(mem, mem + N / 2 * SZ, tmp); + DCT1DImpl()(tmp); + CoeffBundle::SubReverse(mem, mem + N / 2 * SZ, tmp + N / 2 * SZ); + CoeffBundle::Multiply(tmp); + DCT1DImpl()(tmp + N / 2 * SZ); + CoeffBundle::B(tmp + N / 2 * SZ); + CoeffBundle::InverseEvenOdd(tmp, mem); + } +}; + +template +struct IDCT1DImpl; + +template +struct IDCT1DImpl<1, SZ> { + JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + StoreU(LoadU(FV(), from), FV(), to); + } +}; + +template +struct IDCT1DImpl<2, SZ> { + JXL_INLINE void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + JXL_DASSERT(from_stride >= SZ); + JXL_DASSERT(to_stride >= SZ); + auto in1 = LoadU(FV(), from); + auto in2 = LoadU(FV(), from + from_stride); + StoreU(Add(in1, in2), FV(), to); + StoreU(Sub(in1, in2), FV(), to + to_stride); + } +}; + +template +struct IDCT1DImpl { + void operator()(const float* from, size_t from_stride, float* to, + size_t to_stride) { + JXL_DASSERT(from_stride >= SZ); + JXL_DASSERT(to_stride >= SZ); + // This is relatively small (4kB with 64-DCT and AVX-512) + HWY_ALIGN float tmp[N * SZ]; + CoeffBundle::ForwardEvenOdd(from, from_stride, tmp); + IDCT1DImpl()(tmp, SZ, tmp, SZ); + CoeffBundle::BTranspose(tmp + N / 2 * SZ); + IDCT1DImpl()(tmp + N / 2 * SZ, SZ, tmp + N / 2 * SZ, SZ); + CoeffBundle::MultiplyAndAdd(tmp, to, to_stride); + } +}; + +template +void DCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) { + size_t M = M_or_0 != 0 ? M_or_0 : Mp; + constexpr size_t SZ = MaxLanes(FV()); + HWY_ALIGN float tmp[N * SZ]; + for (size_t i = 0; i < M; i += Lanes(FV())) { + // TODO(veluca): consider removing the temporary memory here (as is done in + // IDCT), if it turns out that some compilers don't optimize away the loads + // and this is performance-critical. + CoeffBundle::LoadFromBlock(from, i, tmp); + DCT1DImpl()(tmp); + CoeffBundle::StoreToBlockAndScale(tmp, to, i); + } +} + +template +void IDCT1DWrapper(const FromBlock& from, const ToBlock& to, size_t Mp) { + size_t M = M_or_0 != 0 ? M_or_0 : Mp; + constexpr size_t SZ = MaxLanes(FV()); + for (size_t i = 0; i < M; i += Lanes(FV())) { + IDCT1DImpl()(from.Address(0, i), from.Stride(), to.Address(0, i), + to.Stride()); + } +} + +template +struct DCT1D { + template + void operator()(const FromBlock& from, const ToBlock& to) { + return DCT1DWrapper(from, to, M); + } +}; + +template +struct DCT1D MaxLanes(FV<0>()))>::type> { + template + void operator()(const FromBlock& from, const ToBlock& to) { + return NoInlineWrapper(DCT1DWrapper, from, to, M); + } +}; + +template +struct IDCT1D { + template + void operator()(const FromBlock& from, const ToBlock& to) { + return IDCT1DWrapper(from, to, M); + } +}; + +template +struct IDCT1D MaxLanes(FV<0>()))>::type> { + template + void operator()(const FromBlock& from, const ToBlock& to) { + return NoInlineWrapper(IDCT1DWrapper, from, to, + M); + } +}; + +// Computes the maybe-transposed, scaled DCT of a block, that needs to be +// HWY_ALIGN'ed. +template +struct ComputeScaledDCT { + // scratch_space must be aligned, and should have space for ROWS*COLS + // floats. + template + HWY_MAYBE_UNUSED void operator()(const From& from, float* to, + float* JXL_RESTRICT scratch_space) { + float* JXL_RESTRICT block = scratch_space; + if (ROWS < COLS) { + DCT1D()(from, DCTTo(block, COLS)); + Transpose::Run(DCTFrom(block, COLS), DCTTo(to, ROWS)); + DCT1D()(DCTFrom(to, ROWS), DCTTo(block, ROWS)); + Transpose::Run(DCTFrom(block, ROWS), DCTTo(to, COLS)); + } else { + DCT1D()(from, DCTTo(to, COLS)); + Transpose::Run(DCTFrom(to, COLS), DCTTo(block, ROWS)); + DCT1D()(DCTFrom(block, ROWS), DCTTo(to, ROWS)); + } + } +}; +// Computes the maybe-transposed, scaled IDCT of a block, that needs to be +// HWY_ALIGN'ed. +template +struct ComputeScaledIDCT { + // scratch_space must be aligned, and should have space for ROWS*COLS + // floats. + template + HWY_MAYBE_UNUSED void operator()(float* JXL_RESTRICT from, const To& to, + float* JXL_RESTRICT scratch_space) { + float* JXL_RESTRICT block = scratch_space; + // Reverse the steps done in ComputeScaledDCT. + if (ROWS < COLS) { + Transpose::Run(DCTFrom(from, COLS), DCTTo(block, ROWS)); + IDCT1D()(DCTFrom(block, ROWS), DCTTo(from, ROWS)); + Transpose::Run(DCTFrom(from, ROWS), DCTTo(block, COLS)); + IDCT1D()(DCTFrom(block, COLS), to); + } else { + IDCT1D()(DCTFrom(from, ROWS), DCTTo(block, ROWS)); + Transpose::Run(DCTFrom(block, ROWS), DCTTo(from, COLS)); + IDCT1D()(DCTFrom(from, COLS), to); + } + } +}; + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); +#endif // LIB_JXL_DCT_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_block-inl.h b/third-party/libjxl/libjxl/lib/jxl/dct_block-inl.h new file mode 100644 index 0000000000..50646a737f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct_block-inl.h @@ -0,0 +1,108 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Adapters for DCT input/output: from/to contiguous blocks or image rows. + +#if defined(LIB_JXL_DCT_BLOCK_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_DCT_BLOCK_INL_H_ +#undef LIB_JXL_DCT_BLOCK_INL_H_ +#else +#define LIB_JXL_DCT_BLOCK_INL_H_ +#endif + +#include + +#include + +#include "lib/jxl/base/status.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Vec; + +// Block: (x, y) <-> (N * y + x) +// Lines: (x, y) <-> (stride * y + x) +// +// I.e. Block is a specialization of Lines with fixed stride. +// +// FromXXX should implement Read and Load (Read vector). +// ToXXX should implement Write and Store (Write vector). + +template +using BlockDesc = HWY_CAPPED(float, N); + +// Here and in the following, the SZ template parameter specifies the number of +// values to load/store. Needed because we want to handle 4x4 sub-blocks of +// 16x16 blocks. +class DCTFrom { + public: + DCTFrom(const float* data, size_t stride) : stride_(stride), data_(data) {} + + template + HWY_INLINE Vec LoadPart(D, const size_t row, size_t i) const { + JXL_DASSERT(Lanes(D()) <= stride_); + // Since these functions are used also for DC, no alignment at all is + // guaranteed in the case of floating blocks. + // TODO(veluca): consider using a different class for DC-to-LF and + // DC-from-LF, or copying DC values to/from a temporary aligned location. + return LoadU(D(), Address(row, i)); + } + + HWY_INLINE float Read(const size_t row, const size_t i) const { + return *Address(row, i); + } + + constexpr HWY_INLINE const float* Address(const size_t row, + const size_t i) const { + return data_ + row * stride_ + i; + } + + size_t Stride() const { return stride_; } + + private: + size_t stride_; + const float* JXL_RESTRICT data_; +}; + +class DCTTo { + public: + DCTTo(float* data, size_t stride) : stride_(stride), data_(data) {} + + template + HWY_INLINE void StorePart(D, const Vec& v, const size_t row, + size_t i) const { + JXL_DASSERT(Lanes(D()) <= stride_); + // Since these functions are used also for DC, no alignment at all is + // guaranteed in the case of floating blocks. + // TODO(veluca): consider using a different class for DC-to-LF and + // DC-from-LF, or copying DC values to/from a temporary aligned location. + StoreU(v, D(), Address(row, i)); + } + + HWY_INLINE void Write(float v, const size_t row, const size_t i) const { + *Address(row, i) = v; + } + + constexpr HWY_INLINE float* Address(const size_t row, const size_t i) const { + return data_ + row * stride_ + i; + } + + size_t Stride() const { return stride_; } + + private: + size_t stride_; + float* JXL_RESTRICT data_; +}; + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_DCT_BLOCK_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_for_test.h b/third-party/libjxl/libjxl/lib/jxl/dct_for_test.h new file mode 100644 index 0000000000..8e32aa7eff --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct_for_test.h @@ -0,0 +1,99 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DCT_FOR_TEST_H_ +#define LIB_JXL_DCT_FOR_TEST_H_ + +// Unoptimized DCT only for use in tests. + +#include // memcpy + +#include +#include + +#include "lib/jxl/common.h" // Pi + +namespace jxl { + +namespace test { +static inline double alpha(int u) { return u == 0 ? 0.7071067811865475 : 1.0; } + +// N-DCT on M columns, divided by sqrt(N). Matches the definition in the spec. +template +void DCT1D(double block[N * M], double out[N * M]) { + std::vector matrix(N * N); + const double scale = std::sqrt(2.0) / N; + for (size_t y = 0; y < N; y++) { + for (size_t u = 0; u < N; u++) { + matrix[N * u + y] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale; + } + } + for (size_t x = 0; x < M; x++) { + for (size_t u = 0; u < N; u++) { + out[M * u + x] = 0; + for (size_t y = 0; y < N; y++) { + out[M * u + x] += matrix[N * u + y] * block[M * y + x]; + } + } + } +} + +// N-IDCT on M columns, multiplied by sqrt(N). Matches the definition in the +// spec. +template +void IDCT1D(double block[N * M], double out[N * M]) { + std::vector matrix(N * N); + const double scale = std::sqrt(2.0); + for (size_t y = 0; y < N; y++) { + for (size_t u = 0; u < N; u++) { + // Transpose of DCT matrix. + matrix[N * y + u] = alpha(u) * cos((y + 0.5) * u * Pi(1.0 / N)) * scale; + } + } + for (size_t x = 0; x < M; x++) { + for (size_t u = 0; u < N; u++) { + out[M * u + x] = 0; + for (size_t y = 0; y < N; y++) { + out[M * u + x] += matrix[N * u + y] * block[M * y + x]; + } + } + } +} + +template +void TransposeBlock(double in[N * M], double out[M * N]) { + for (size_t x = 0; x < N; x++) { + for (size_t y = 0; y < M; y++) { + out[y * N + x] = in[x * M + y]; + } + } +} +} // namespace test + +// Untransposed DCT. +template +void DCTSlow(double block[N * N]) { + constexpr size_t kBlockSize = N * N; + std::vector g(kBlockSize); + test::DCT1D(block, g.data()); + test::TransposeBlock(g.data(), block); + test::DCT1D(block, g.data()); + test::TransposeBlock(g.data(), block); +} + +// Untransposed IDCT. +template +void IDCTSlow(double block[N * N]) { + constexpr size_t kBlockSize = N * N; + std::vector g(kBlockSize); + test::IDCT1D(block, g.data()); + test::TransposeBlock(g.data(), block); + test::IDCT1D(block, g.data()); + test::TransposeBlock(g.data(), block); +} + +} // namespace jxl + +#endif // LIB_JXL_DCT_FOR_TEST_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_scales.cc b/third-party/libjxl/libjxl/lib/jxl/dct_scales.cc new file mode 100644 index 0000000000..f9e89a6014 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct_scales.cc @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dct_scales.h" + +namespace jxl { + +// Definition of constexpr arrays. +constexpr float DCTResampleScales<1, 8>::kScales[]; +constexpr float DCTResampleScales<2, 16>::kScales[]; +constexpr float DCTResampleScales<4, 32>::kScales[]; +constexpr float DCTResampleScales<8, 64>::kScales[]; +constexpr float DCTResampleScales<16, 128>::kScales[]; +constexpr float DCTResampleScales<32, 256>::kScales[]; +constexpr float DCTResampleScales<8, 1>::kScales[]; +constexpr float DCTResampleScales<16, 2>::kScales[]; +constexpr float DCTResampleScales<32, 4>::kScales[]; +constexpr float DCTResampleScales<64, 8>::kScales[]; +constexpr float DCTResampleScales<128, 16>::kScales[]; +constexpr float DCTResampleScales<256, 32>::kScales[]; +constexpr float WcMultipliers<4>::kMultipliers[]; +constexpr float WcMultipliers<8>::kMultipliers[]; +constexpr float WcMultipliers<16>::kMultipliers[]; +constexpr float WcMultipliers<32>::kMultipliers[]; +constexpr float WcMultipliers<64>::kMultipliers[]; +constexpr float WcMultipliers<128>::kMultipliers[]; +constexpr float WcMultipliers<256>::kMultipliers[]; + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_scales.h b/third-party/libjxl/libjxl/lib/jxl/dct_scales.h new file mode 100644 index 0000000000..23af03d60f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct_scales.h @@ -0,0 +1,379 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DCT_SCALES_H_ +#define LIB_JXL_DCT_SCALES_H_ + +// Scaling factors. + +#include + +namespace jxl { + +static constexpr float kSqrt2 = 1.41421356237f; +static constexpr float kSqrt0_5 = 0.70710678118f; + +// For n != 0, the n-th basis function of a N-DCT, evaluated in pixel k, has a +// value of cos((k+1/2) n/(2N) pi). When downsampling by 2x, we average +// the values for pixel k and k+1 to get the value for pixel (k/2), thus we get +// +// [cos((k+1/2) n/N pi) + cos((k+3/2) n/N pi)]/2 = +// cos(n/(2N) pi) cos((k+1) n/N pi) = +// cos(n/(2N) pi) cos(((k/2)+1/2) n/(N/2) pi) +// +// which is exactly the same as the value of pixel k/2 of a N/2-sized DCT, +// except for the cos(n/(2N) pi) scaling factor (which does *not* +// depend on the pixel). Thus, when using the lower-frequency coefficients of a +// DCT-N to compute a DCT-(N/2), they should be scaled by this constant. Scaling +// factors for a DCT-(N/4) etc can then be obtained by successive +// multiplications. The structs below contain the above-mentioned scaling +// factors. +// +// Python code for the tables below: +// +// for i in range(N // 8): +// v = math.cos(i / (2 * N) * math.pi) +// v *= math.cos(i / (N) * math.pi) +// v *= math.cos(i / (N / 2) * math.pi) +// print(v, end=", ") + +template +struct DCTResampleScales; + +template <> +struct DCTResampleScales<8, 1> { + static constexpr float kScales[] = { + 1.000000000000000000, + }; +}; + +template <> +struct DCTResampleScales<16, 2> { + static constexpr float kScales[] = { + 1.000000000000000000, + 0.901764195028874394, + }; +}; + +template <> +struct DCTResampleScales<32, 4> { + static constexpr float kScales[] = { + 1.000000000000000000, + 0.974886821136879522, + 0.901764195028874394, + 0.787054918159101335, + }; +}; + +template <> +struct DCTResampleScales<64, 8> { + static constexpr float kScales[] = { + 1.0000000000000000, 0.9936866130906366, 0.9748868211368796, + 0.9440180941651672, 0.9017641950288744, 0.8490574973847023, + 0.7870549181591013, 0.7171081282466044, + }; +}; + +template <> +struct DCTResampleScales<128, 16> { + static constexpr float kScales[] = { + 1.0, + 0.9984194528776054, + 0.9936866130906366, + 0.9858278282666936, + 0.9748868211368796, + 0.9609244059440204, + 0.9440180941651672, + 0.9242615922757944, + 0.9017641950288744, + 0.8766500784429904, + 0.8490574973847023, + 0.8191378932865928, + 0.7870549181591013, + 0.7529833816270532, + 0.7171081282466044, + 0.6796228528314651, + }; +}; + +template <> +struct DCTResampleScales<256, 32> { + static constexpr float kScales[] = { + 1.0, + 0.9996047255830407, + 0.9984194528776054, + 0.9964458326264695, + 0.9936866130906366, + 0.9901456355893141, + 0.9858278282666936, + 0.9807391980963174, + 0.9748868211368796, + 0.9682788310563117, + 0.9609244059440204, + 0.9528337534340876, + 0.9440180941651672, + 0.9344896436056892, + 0.9242615922757944, + 0.913348084400198, + 0.9017641950288744, + 0.8895259056651056, + 0.8766500784429904, + 0.8631544288990163, + 0.8490574973847023, + 0.8343786191696513, + 0.8191378932865928, + 0.8033561501721485, + 0.7870549181591013, + 0.7702563888779096, + 0.7529833816270532, + 0.7352593067735488, + 0.7171081282466044, + 0.6985543251889097, + 0.6796228528314651, + 0.6603391026591464, + }; +}; + +// Inverses of the above. +template <> +struct DCTResampleScales<1, 8> { + static constexpr float kScales[] = { + 1.000000000000000000, + }; +}; + +template <> +struct DCTResampleScales<2, 16> { + static constexpr float kScales[] = { + 1.000000000000000000, + 1.108937353592731823, + }; +}; + +template <> +struct DCTResampleScales<4, 32> { + static constexpr float kScales[] = { + 1.000000000000000000, + 1.025760096781116015, + 1.108937353592731823, + 1.270559368765487251, + }; +}; + +template <> +struct DCTResampleScales<8, 64> { + static constexpr float kScales[] = { + 1.0000000000000000, 1.0063534990068217, 1.0257600967811158, + 1.0593017296817173, 1.1089373535927318, 1.1777765381970435, + 1.2705593687654873, 1.3944898413647777, + }; +}; + +template <> +struct DCTResampleScales<16, 128> { + static constexpr float kScales[] = { + 1.0, + 1.0015830492062623, + 1.0063534990068217, + 1.0143759095928793, + 1.0257600967811158, + 1.0406645869480142, + 1.0593017296817173, + 1.0819447744633812, + 1.1089373535927318, + 1.1407059950032632, + 1.1777765381970435, + 1.2207956782315876, + 1.2705593687654873, + 1.3280505578213306, + 1.3944898413647777, + 1.4714043176061107, + }; +}; + +template <> +struct DCTResampleScales<32, 256> { + static constexpr float kScales[] = { + 1.0, + 1.0003954307206069, + 1.0015830492062623, + 1.0035668445360069, + 1.0063534990068217, + 1.009952439375063, + 1.0143759095928793, + 1.0196390660647288, + 1.0257600967811158, + 1.0327603660498115, + 1.0406645869480142, + 1.049501024072585, + 1.0593017296817173, + 1.0701028169146336, + 1.0819447744633812, + 1.0948728278734026, + 1.1089373535927318, + 1.124194353004584, + 1.1407059950032632, + 1.158541237256391, + 1.1777765381970435, + 1.1984966740820495, + 1.2207956782315876, + 1.244777922949508, + 1.2705593687654873, + 1.2982690107339132, + 1.3280505578213306, + 1.3600643892400104, + 1.3944898413647777, + 1.4315278911623237, + 1.4714043176061107, + 1.5143734423314616, + }; +}; + +// Constants for DCT implementation. Generated by the following snippet: +// for i in range(N // 2): +// print(1.0 / (2 * math.cos((i + 0.5) * math.pi / N)), end=", ") +template +struct WcMultipliers; + +template <> +struct WcMultipliers<4> { + static constexpr float kMultipliers[] = { + 0.541196100146197, + 1.3065629648763764, + }; +}; + +template <> +struct WcMultipliers<8> { + static constexpr float kMultipliers[] = { + 0.5097955791041592, + 0.6013448869350453, + 0.8999762231364156, + 2.5629154477415055, + }; +}; + +template <> +struct WcMultipliers<16> { + static constexpr float kMultipliers[] = { + 0.5024192861881557, 0.5224986149396889, 0.5669440348163577, + 0.6468217833599901, 0.7881546234512502, 1.060677685990347, + 1.7224470982383342, 5.101148618689155, + }; +}; + +template <> +struct WcMultipliers<32> { + static constexpr float kMultipliers[] = { + 0.5006029982351963, 0.5054709598975436, 0.5154473099226246, + 0.5310425910897841, 0.5531038960344445, 0.5829349682061339, + 0.6225041230356648, 0.6748083414550057, 0.7445362710022986, + 0.8393496454155268, 0.9725682378619608, 1.1694399334328847, + 1.4841646163141662, 2.057781009953411, 3.407608418468719, + 10.190008123548033, + }; +}; +template <> +struct WcMultipliers<64> { + static constexpr float kMultipliers[] = { + 0.500150636020651, 0.5013584524464084, 0.5037887256810443, + 0.5074711720725553, 0.5124514794082247, 0.5187927131053328, + 0.52657731515427, 0.535909816907992, 0.5469204379855088, + 0.5597698129470802, 0.57465518403266, 0.5918185358574165, + 0.6115573478825099, 0.6342389366884031, 0.6603198078137061, + 0.6903721282002123, 0.7251205223771985, 0.7654941649730891, + 0.8127020908144905, 0.8683447152233481, 0.9345835970364075, + 1.0144082649970547, 1.1120716205797176, 1.233832737976571, + 1.3892939586328277, 1.5939722833856311, 1.8746759800084078, + 2.282050068005162, 2.924628428158216, 4.084611078129248, + 6.796750711673633, 20.373878167231453, + }; +}; +template <> +struct WcMultipliers<128> { + static constexpr float kMultipliers[] = { + 0.5000376519155477, 0.5003390374428216, 0.5009427176380873, + 0.5018505174842379, 0.5030651913013697, 0.5045904432216454, + 0.5064309549285542, 0.5085924210498143, 0.5110815927066812, + 0.5139063298475396, 0.5170756631334912, 0.5205998663018917, + 0.524490540114724, 0.5287607092074876, 0.5334249333971333, + 0.538499435291984, 0.5440022463817783, 0.549953374183236, + 0.5563749934898856, 0.5632916653417023, 0.5707305880121454, + 0.5787218851348208, 0.5872989370937893, 0.5964987630244563, + 0.606362462272146, 0.6169357260050706, 0.6282694319707711, + 0.6404203382416639, 0.6534518953751283, 0.6674352009263413, + 0.6824501259764195, 0.6985866506472291, 0.7159464549705746, + 0.7346448236478627, 0.7548129391165311, 0.776600658233963, + 0.8001798956216941, 0.8257487738627852, 0.8535367510066064, + 0.8838110045596234, 0.9168844461846523, 0.9531258743921193, + 0.9929729612675466, 1.036949040910389, 1.0856850642580145, + 1.1399486751015042, 1.2006832557294167, 1.2690611716991191, + 1.346557628206286, 1.4350550884414341, 1.5369941008524954, + 1.6555965242641195, 1.7952052190778898, 1.961817848571166, + 2.163957818751979, 2.4141600002500763, 2.7316450287739396, + 3.147462191781909, 3.7152427383269746, 4.5362909369693565, + 5.827688377844654, 8.153848602466814, 13.58429025728446, + 40.744688103351834, + }; +}; + +template <> +struct WcMultipliers<256> { + static constexpr float kMultipliers[128] = { + 0.5000094125358878, 0.500084723455784, 0.5002354020255269, + 0.5004615618093246, 0.5007633734146156, 0.5011410648064231, + 0.5015949217281668, 0.502125288230386, 0.5027325673091954, + 0.5034172216566842, 0.5041797745258774, 0.5050208107132756, + 0.5059409776624396, 0.5069409866925212, 0.5080216143561264, + 0.509183703931388, 0.5104281670536573, 0.5117559854927805, + 0.5131682130825206, 0.5146659778093218, 0.516250484068288, + 0.5179230150949777, 0.5196849355823947, 0.5215376944933958, + 0.5234828280796439, 0.52552196311921, 0.5276568203859896, + 0.5298892183652453, 0.5322210772308335, 0.5346544231010253, + 0.537191392591309, 0.5398342376841637, 0.5425853309375497, + 0.545447171055775, 0.5484223888484947, 0.551513753605893, + 0.554724179920619, 0.5580567349898085, 0.5615146464335654, + 0.5651013106696203, 0.5688203018875696, 0.5726753816701664, + 0.5766705093136241, 0.5808098529038624, 0.5850978012111273, + 0.58953897647151, 0.5941382481306648, 0.5989007476325463, + 0.6038318843443582, 0.6089373627182432, 0.614223200800649, + 0.6196957502119484, 0.6253617177319102, 0.6312281886412079, + 0.6373026519855411, 0.6435930279473415, 0.6501076975307724, + 0.6568555347890955, 0.6638459418498757, 0.6710888870233562, + 0.6785949463131795, 0.6863753486870501, 0.6944420255086364, + 0.7028076645818034, 0.7114857693151208, 0.7204907235796304, + 0.7298378629074134, 0.7395435527641373, 0.749625274727372, + 0.7601017215162176, 0.7709929019493761, 0.7823202570613161, + 0.7941067887834509, 0.8063772028037925, 0.8191580674598145, + 0.83247799080191, 0.8463678182968619, 0.860860854031955, + 0.8759931087426972, 0.8918035785352535, 0.9083345588266809, + 0.9256319988042384, 0.9437459026371479, 0.962730784794803, + 0.9826461881778968, 1.0035572754078206, 1.0255355056139732, + 1.048659411496106, 1.0730154944316674, 1.0986992590905857, + 1.1258164135986009, 1.1544842669978943, 1.184833362908442, + 1.217009397314603, 1.2511754798461228, 1.287514812536712, + 1.326233878832723, 1.3675662599582539, 1.411777227500661, + 1.459169302866857, 1.5100890297227016, 1.5649352798258847, + 1.6241695131835794, 1.6883285509131505, 1.7580406092704062, + 1.8340456094306077, 1.9172211551275689, 2.0086161135167564, + 2.1094945286246385, 2.22139377701127, 2.346202662531156, + 2.486267909203593, 2.644541877144861, 2.824791402350551, + 3.0318994541759925, 3.2723115884254845, 3.5547153325075804, + 3.891107790700307, 4.298537526449054, 4.802076008665048, + 5.440166215091329, 6.274908408039339, 7.413566756422303, + 9.058751453879703, 11.644627325175037, 16.300023088031555, + 27.163977662448232, 81.48784219222516, + }; +}; + +// Apply the DCT algorithm-intrinsic constants to DCTResampleScale. +template +constexpr float DCTTotalResampleScale(size_t x) { + return DCTResampleScales::kScales[x]; +} + +} // namespace jxl + +#endif // LIB_JXL_DCT_SCALES_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_test.cc b/third-party/libjxl/libjxl/lib/jxl/dct_test.cc new file mode 100644 index 0000000000..9f5eff41e9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct_test.cc @@ -0,0 +1,389 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dct_test.cc" +#include +#include +#include + +#include "lib/jxl/common.h" +#include "lib/jxl/dct-inl.h" +#include "lib/jxl/dct_for_test.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/image.h" +#include "lib/jxl/test_utils.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// Computes the in-place NxN DCT of block. +// Requires that block is HWY_ALIGN'ed. +// +// Performs ComputeTransposedScaledDCT and then transposes and scales it to +// obtain "vanilla" DCT. +template +void ComputeDCT(float block[N * N]) { + HWY_ALIGN float tmp_block[N * N]; + HWY_ALIGN float scratch_space[N * N]; + ComputeScaledDCT()(DCTFrom(block, N), tmp_block, scratch_space); + + // Untranspose. + Transpose::Run(DCTFrom(tmp_block, N), DCTTo(block, N)); +} + +// Computes the in-place 8x8 iDCT of block. +// Requires that block is HWY_ALIGN'ed. +template +void ComputeIDCT(float block[N * N]) { + HWY_ALIGN float tmp_block[N * N]; + HWY_ALIGN float scratch_space[N * N]; + // Untranspose. + Transpose::Run(DCTFrom(block, N), DCTTo(tmp_block, N)); + + ComputeScaledIDCT()(tmp_block, DCTTo(block, N), scratch_space); +} + +template +void TransposeTestT(float accuracy) { + constexpr size_t kBlockSize = N * N; + HWY_ALIGN float src[kBlockSize]; + DCTTo to_src(src, N); + for (size_t y = 0; y < N; ++y) { + for (size_t x = 0; x < N; ++x) { + to_src.Write(y * N + x, y, x); + } + } + HWY_ALIGN float dst[kBlockSize]; + Transpose::Run(DCTFrom(src, N), DCTTo(dst, N)); + DCTFrom from_dst(dst, N); + for (size_t y = 0; y < N; ++y) { + for (size_t x = 0; x < N; ++x) { + float expected = x * N + y; + float actual = from_dst.Read(y, x); + EXPECT_NEAR(expected, actual, accuracy) << "x = " << x << ", y = " << y; + } + } +} + +void TransposeTest() { + TransposeTestT<8>(1e-7f); + TransposeTestT<16>(1e-7f); + TransposeTestT<32>(1e-7f); +} + +template +void ColumnDctRoundtripT(float accuracy) { + constexpr size_t kBlockSize = N * N; + // Though we are only interested in single column result, dct.h has built-in + // limit on minimal number of columns processed. So, to be safe, we do + // regular 8x8 block transformation. On the bright side - we could check all + // 8 basis vectors at once. + HWY_ALIGN float block[kBlockSize]; + DCTTo to(block, N); + DCTFrom from(block, N); + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + to.Write((i == j) ? 1.0f : 0.0f, i, j); + } + } + + // Running (I)DCT on the same memory block seems to trigger a compiler bug on + // ARMv7 with clang6. + HWY_ALIGN float tmp[kBlockSize]; + DCTTo to_tmp(tmp, N); + DCTFrom from_tmp(tmp, N); + + DCT1D()(from, to_tmp); + IDCT1D()(from_tmp, to); + + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < N; ++j) { + float expected = (i == j) ? 1.0f : 0.0f; + float actual = from.Read(i, j); + EXPECT_NEAR(expected, actual, accuracy) << " i=" << i << ", j=" << j; + } + } +} + +void ColumnDctRoundtrip() { + ColumnDctRoundtripT<8>(1e-6f); + ColumnDctRoundtripT<16>(1e-6f); + ColumnDctRoundtripT<32>(1e-6f); +} + +template +void TestDctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) { + constexpr size_t kBlockSize = N * N; + for (size_t i = start; i < end; i++) { + HWY_ALIGN float fast[kBlockSize] = {0.0f}; + double slow[kBlockSize] = {0.0}; + fast[i] = 1.0; + slow[i] = 1.0; + DCTSlow(slow); + ComputeDCT(fast); + for (size_t k = 0; k < kBlockSize; ++k) { + EXPECT_NEAR(fast[k], slow[k], accuracy / N) + << "i = " << i << ", k = " << k << ", N = " << N; + } + } +} + +template +void TestIdctAccuracy(float accuracy, size_t start = 0, size_t end = N * N) { + constexpr size_t kBlockSize = N * N; + for (size_t i = start; i < end; i++) { + HWY_ALIGN float fast[kBlockSize] = {0.0f}; + double slow[kBlockSize] = {0.0}; + fast[i] = 1.0; + slow[i] = 1.0; + IDCTSlow(slow); + ComputeIDCT(fast); + for (size_t k = 0; k < kBlockSize; ++k) { + EXPECT_NEAR(fast[k], slow[k], accuracy * N) + << "i = " << i << ", k = " << k << ", N = " << N; + } + } +} + +template +void TestInverseT(float accuracy) { + test::ThreadPoolForTests pool(N < 32 ? 0 : 8); + enum { kBlockSize = N * N }; + EXPECT_TRUE(RunOnPool( + &pool, 0, kBlockSize, ThreadPool::NoInit, + [accuracy](const uint32_t task, size_t /*thread*/) { + const size_t i = static_cast(task); + HWY_ALIGN float x[kBlockSize] = {0.0f}; + x[i] = 1.0; + + ComputeIDCT(x); + ComputeDCT(x); + + for (size_t k = 0; k < kBlockSize; ++k) { + EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy) + << "i = " << i << ", k = " << k; + } + }, + "TestInverse")); +} + +void InverseTest() { + TestInverseT<8>(1e-6f); + TestInverseT<16>(1e-6f); + TestInverseT<32>(3e-6f); +} + +template +void TestDctTranspose(float accuracy, size_t start = 0, size_t end = N * N) { + constexpr size_t kBlockSize = N * N; + for (size_t i = start; i < end; i++) { + for (size_t j = 0; j < kBlockSize; ++j) { + // We check that = . + // That means (Me_j)_i = (M^\dagger{}e_i)_j + + // x := Me_j + HWY_ALIGN float x[kBlockSize] = {0.0f}; + x[j] = 1.0; + ComputeIDCT(x); + // y := M^\dagger{}e_i + HWY_ALIGN float y[kBlockSize] = {0.0f}; + y[i] = 1.0; + ComputeDCT(y); + + EXPECT_NEAR(x[i] / N, y[j] * N, accuracy) << "i = " << i << ", j = " << j; + } + } +} + +template +void TestSlowInverse(float accuracy, size_t start = 0, size_t end = N * N) { + constexpr size_t kBlockSize = N * N; + for (size_t i = start; i < end; i++) { + double x[kBlockSize] = {0.0f}; + x[i] = 1.0; + + DCTSlow(x); + IDCTSlow(x); + + for (size_t k = 0; k < kBlockSize; ++k) { + EXPECT_NEAR(x[k], (k == i) ? 1.0f : 0.0f, accuracy) + << "i = " << i << ", k = " << k; + } + } +} + +template +void TestRectInverseT(float accuracy) { + constexpr size_t kBlockSize = ROWS * COLS; + for (size_t i = 0; i < kBlockSize; ++i) { + HWY_ALIGN float x[kBlockSize] = {0.0f}; + HWY_ALIGN float out[kBlockSize] = {0.0f}; + x[i] = 1.0; + HWY_ALIGN float coeffs[kBlockSize] = {0.0f}; + HWY_ALIGN float scratch_space[kBlockSize * 2]; + + ComputeScaledDCT()(DCTFrom(x, COLS), coeffs, scratch_space); + ComputeScaledIDCT()(coeffs, DCTTo(out, COLS), scratch_space); + + for (size_t k = 0; k < kBlockSize; ++k) { + EXPECT_NEAR(out[k], (k == i) ? 1.0f : 0.0f, accuracy) + << "i = " << i << ", k = " << k << " ROWS = " << ROWS + << " COLS = " << COLS; + } + } +} + +void TestRectInverse() { + TestRectInverseT<16, 32>(1e-6f); + TestRectInverseT<8, 32>(1e-6f); + TestRectInverseT<8, 16>(1e-6f); + TestRectInverseT<4, 8>(1e-6f); + TestRectInverseT<2, 4>(1e-6f); + TestRectInverseT<1, 4>(1e-6f); + TestRectInverseT<1, 2>(1e-6f); + + TestRectInverseT<32, 16>(1e-6f); + TestRectInverseT<32, 8>(1e-6f); + TestRectInverseT<16, 8>(1e-6f); + TestRectInverseT<8, 4>(1e-6f); + TestRectInverseT<4, 2>(1e-6f); + TestRectInverseT<4, 1>(1e-6f); + TestRectInverseT<2, 1>(1e-6f); +} + +template +void TestRectTransposeT(float accuracy) { + constexpr size_t kBlockSize = ROWS * COLS; + HWY_ALIGN float scratch_space[kBlockSize * 2]; + for (size_t px = 0; px < COLS; ++px) { + for (size_t py = 0; py < ROWS; ++py) { + HWY_ALIGN float x1[kBlockSize] = {0.0f}; + HWY_ALIGN float x2[kBlockSize] = {0.0f}; + HWY_ALIGN float coeffs1[kBlockSize] = {0.0f}; + HWY_ALIGN float coeffs2[kBlockSize] = {0.0f}; + x1[py * COLS + px] = 1; + x2[px * ROWS + py] = 1; + + constexpr size_t OUT_ROWS = ROWS < COLS ? ROWS : COLS; + constexpr size_t OUT_COLS = ROWS < COLS ? COLS : ROWS; + + ComputeScaledDCT()(DCTFrom(x1, COLS), coeffs1, scratch_space); + ComputeScaledDCT()(DCTFrom(x2, ROWS), coeffs2, scratch_space); + + for (size_t x = 0; x < OUT_COLS; ++x) { + for (size_t y = 0; y < OUT_ROWS; ++y) { + EXPECT_NEAR(coeffs1[y * OUT_COLS + x], coeffs2[y * OUT_COLS + x], + accuracy) + << " px = " << px << ", py = " << py << ", x = " << x + << ", y = " << y; + } + } + } + } +} + +void TestRectTranspose() { + TestRectTransposeT<16, 32>(1e-6f); + TestRectTransposeT<8, 32>(1e-6f); + TestRectTransposeT<8, 16>(1e-6f); + TestRectTransposeT<4, 8>(1e-6f); + TestRectTransposeT<2, 4>(1e-6f); + TestRectTransposeT<1, 4>(1e-6f); + TestRectTransposeT<1, 2>(1e-6f); + + // Identical to 8, 16 + // TestRectTranspose<16, 8>(1e-6f); +} + +void TestDctAccuracyShard(size_t shard) { + if (shard == 0) { + TestDctAccuracy<1>(1.1E-7f); + TestDctAccuracy<2>(1.1E-7f); + TestDctAccuracy<4>(1.1E-7f); + TestDctAccuracy<8>(1.1E-7f); + TestDctAccuracy<16>(1.3E-7f); + } + TestDctAccuracy<32>(1.1E-7f, 32 * shard, 32 * (shard + 1)); +} + +void TestIdctAccuracyShard(size_t shard) { + if (shard == 0) { + TestIdctAccuracy<1>(1E-7f); + TestIdctAccuracy<2>(1E-7f); + TestIdctAccuracy<4>(1E-7f); + TestIdctAccuracy<8>(1E-7f); + TestIdctAccuracy<16>(1E-7f); + } + TestIdctAccuracy<32>(1E-7f, 32 * shard, 32 * (shard + 1)); +} + +void TestDctTransposeShard(size_t shard) { + if (shard == 0) { + TestDctTranspose<8>(1E-6f); + TestDctTranspose<16>(1E-6f); + } + TestDctTranspose<32>(3E-6f, 32 * shard, 32 * (shard + 1)); +} + +void TestSlowInverseShard(size_t shard) { + if (shard == 0) { + TestSlowInverse<1>(1E-5f); + TestSlowInverse<2>(1E-5f); + TestSlowInverse<4>(1E-5f); + TestSlowInverse<8>(1E-5f); + TestSlowInverse<16>(1E-5f); + } + TestSlowInverse<32>(1E-5f, 32 * shard, 32 * (shard + 1)); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class TransposeTest : public hwy::TestWithParamTarget {}; + +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(TransposeTest); + +HWY_EXPORT_AND_TEST_P(TransposeTest, TransposeTest); +HWY_EXPORT_AND_TEST_P(TransposeTest, InverseTest); +HWY_EXPORT_AND_TEST_P(TransposeTest, ColumnDctRoundtrip); +HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectInverse); +HWY_EXPORT_AND_TEST_P(TransposeTest, TestRectTranspose); + +// Tests in the DctShardedTest class are sharded for N=32. +class DctShardedTest : public ::hwy::TestWithParamTargetAndT {}; + +std::vector ShardRange(uint32_t n) { +#ifdef JXL_DISABLE_SLOW_TESTS + JXL_ASSERT(n > 6); + std::vector ret = {0, 1, 3, 5, n - 1}; +#else + std::vector ret(n); + std::iota(ret.begin(), ret.end(), 0); +#endif // JXL_DISABLE_SLOW_TESTS + return ret; +} + +HWY_TARGET_INSTANTIATE_TEST_SUITE_P_T(DctShardedTest, + ::testing::ValuesIn(ShardRange(32))); + +HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctAccuracyShard); +HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestIdctAccuracyShard); +HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestDctTransposeShard); +HWY_EXPORT_AND_TEST_P_T(DctShardedTest, TestSlowInverseShard); + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/dct_util.h b/third-party/libjxl/libjxl/lib/jxl/dct_util.h new file mode 100644 index 0000000000..fb6ce3b971 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dct_util.h @@ -0,0 +1,86 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DCT_UTIL_H_ +#define LIB_JXL_DCT_UTIL_H_ + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +union ACPtr { + int32_t* ptr32; + int16_t* ptr16; + ACPtr() = default; + explicit ACPtr(int16_t* p) : ptr16(p) {} + explicit ACPtr(int32_t* p) : ptr32(p) {} +}; + +union ConstACPtr { + const int32_t* ptr32; + const int16_t* ptr16; + ConstACPtr() = default; + explicit ConstACPtr(const int16_t* p) : ptr16(p) {} + explicit ConstACPtr(const int32_t* p) : ptr32(p) {} +}; + +enum class ACType { k16 = 0, k32 = 1 }; + +class ACImage { + public: + virtual ~ACImage() = default; + virtual ACType Type() const = 0; + virtual ACPtr PlaneRow(size_t c, size_t y, size_t xbase) = 0; + virtual ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const = 0; + virtual size_t PixelsPerRow() const = 0; + virtual void ZeroFill() = 0; + virtual void ZeroFillPlane(size_t c) = 0; + virtual bool IsEmpty() const = 0; +}; + +template +class ACImageT final : public ACImage { + public: + ACImageT() = default; + ACImageT(size_t xsize, size_t ysize) { + static_assert( + std::is_same::value || std::is_same::value, + "ACImage must be either 32- or 16- bit"); + img_ = Image3(xsize, ysize); + } + ACType Type() const override { + return sizeof(T) == 2 ? ACType::k16 : ACType::k32; + } + ACPtr PlaneRow(size_t c, size_t y, size_t xbase) override { + return ACPtr(img_.PlaneRow(c, y) + xbase); + } + ConstACPtr PlaneRow(size_t c, size_t y, size_t xbase) const override { + return ConstACPtr(img_.PlaneRow(c, y) + xbase); + } + + size_t PixelsPerRow() const override { return img_.PixelsPerRow(); } + + void ZeroFill() override { ZeroFillImage(&img_); } + + void ZeroFillPlane(size_t c) override { ZeroFillImage(&img_.Plane(c)); } + + bool IsEmpty() const override { + return img_.xsize() == 0 || img_.ysize() == 0; + } + + private: + Image3 img_; +}; + +} // namespace jxl + +#endif // LIB_JXL_DCT_UTIL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_ans.cc b/third-party/libjxl/libjxl/lib/jxl/dec_ans.cc new file mode 100644 index 0000000000..13a57238f1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_ans.cc @@ -0,0 +1,372 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_ans.h" + +#include + +#include + +#include "lib/jxl/ans_common.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_context_map.h" +#include "lib/jxl/fields.h" + +namespace jxl { +namespace { + +// Decodes a number in the range [0..255], by reading 1 - 11 bits. +inline int DecodeVarLenUint8(BitReader* input) { + if (input->ReadFixedBits<1>()) { + int nbits = static_cast(input->ReadFixedBits<3>()); + if (nbits == 0) { + return 1; + } else { + return static_cast(input->ReadBits(nbits)) + (1 << nbits); + } + } + return 0; +} + +// Decodes a number in the range [0..65535], by reading 1 - 21 bits. +inline int DecodeVarLenUint16(BitReader* input) { + if (input->ReadFixedBits<1>()) { + int nbits = static_cast(input->ReadFixedBits<4>()); + if (nbits == 0) { + return 1; + } else { + return static_cast(input->ReadBits(nbits)) + (1 << nbits); + } + } + return 0; +} + +Status ReadHistogram(int precision_bits, std::vector* counts, + BitReader* input) { + int simple_code = input->ReadBits(1); + if (simple_code == 1) { + int i; + int symbols[2] = {0}; + int max_symbol = 0; + const int num_symbols = input->ReadBits(1) + 1; + for (i = 0; i < num_symbols; ++i) { + symbols[i] = DecodeVarLenUint8(input); + if (symbols[i] > max_symbol) max_symbol = symbols[i]; + } + counts->resize(max_symbol + 1); + if (num_symbols == 1) { + (*counts)[symbols[0]] = 1 << precision_bits; + } else { + if (symbols[0] == symbols[1]) { // corrupt data + return false; + } + (*counts)[symbols[0]] = input->ReadBits(precision_bits); + (*counts)[symbols[1]] = (1 << precision_bits) - (*counts)[symbols[0]]; + } + } else { + int is_flat = input->ReadBits(1); + if (is_flat == 1) { + int alphabet_size = DecodeVarLenUint8(input) + 1; + *counts = CreateFlatHistogram(alphabet_size, 1 << precision_bits); + return true; + } + + uint32_t shift; + { + // TODO(veluca): speed up reading with table lookups. + int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1); + int log = 0; + for (; log < upper_bound_log; log++) { + if (input->ReadFixedBits<1>() == 0) break; + } + shift = (input->ReadBits(log) | (1 << log)) - 1; + if (shift > ANS_LOG_TAB_SIZE + 1) { + return JXL_FAILURE("Invalid shift value"); + } + } + + int length = DecodeVarLenUint8(input) + 3; + counts->resize(length); + int total_count = 0; + + static const uint8_t huff[128][2] = { + {3, 10}, {7, 12}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {7, 13}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {6, 11}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + {3, 10}, {5, 0}, {3, 7}, {4, 3}, {3, 6}, {3, 8}, {3, 9}, {4, 5}, + {3, 10}, {4, 4}, {3, 7}, {4, 1}, {3, 6}, {3, 8}, {3, 9}, {4, 2}, + }; + + std::vector logcounts(counts->size()); + int omit_log = -1; + int omit_pos = -1; + // This array remembers which symbols have an RLE length. + std::vector same(counts->size(), 0); + for (size_t i = 0; i < logcounts.size(); ++i) { + input->Refill(); // for PeekFixedBits + Advance + int idx = input->PeekFixedBits<7>(); + input->Consume(huff[idx][0]); + logcounts[i] = huff[idx][1]; + // The RLE symbol. + if (logcounts[i] == ANS_LOG_TAB_SIZE + 1) { + int rle_length = DecodeVarLenUint8(input); + same[i] = rle_length + 5; + i += rle_length + 3; + continue; + } + if (logcounts[i] > omit_log) { + omit_log = logcounts[i]; + omit_pos = i; + } + } + // Invalid input, e.g. due to invalid usage of RLE. + if (omit_pos < 0) return JXL_FAILURE("Invalid histogram."); + if (static_cast(omit_pos) + 1 < logcounts.size() && + logcounts[omit_pos + 1] == ANS_TAB_SIZE + 1) { + return JXL_FAILURE("Invalid histogram."); + } + int prev = 0; + int numsame = 0; + for (size_t i = 0; i < logcounts.size(); ++i) { + if (same[i]) { + // RLE sequence, let this loop output the same count for the next + // iterations. + numsame = same[i] - 1; + prev = i > 0 ? (*counts)[i - 1] : 0; + } + if (numsame > 0) { + (*counts)[i] = prev; + numsame--; + } else { + int code = logcounts[i]; + // omit_pos may not be negative at this point (checked before). + if (i == static_cast(omit_pos)) { + continue; + } else if (code == 0) { + continue; + } else if (code == 1) { + (*counts)[i] = 1; + } else { + int bitcount = GetPopulationCountPrecision(code - 1, shift); + (*counts)[i] = (1 << (code - 1)) + + (input->ReadBits(bitcount) << (code - 1 - bitcount)); + } + } + total_count += (*counts)[i]; + } + (*counts)[omit_pos] = (1 << precision_bits) - total_count; + if ((*counts)[omit_pos] <= 0) { + // The histogram we've read sums to more than total_count (including at + // least 1 for the omitted value). + return JXL_FAILURE("Invalid histogram count."); + } + } + return true; +} + +} // namespace + +Status DecodeANSCodes(const size_t num_histograms, + const size_t max_alphabet_size, BitReader* in, + ANSCode* result) { + result->degenerate_symbols.resize(num_histograms, -1); + if (result->use_prefix_code) { + JXL_ASSERT(max_alphabet_size <= 1 << PREFIX_MAX_BITS); + result->huffman_data.resize(num_histograms); + std::vector alphabet_sizes(num_histograms); + for (size_t c = 0; c < num_histograms; c++) { + alphabet_sizes[c] = DecodeVarLenUint16(in) + 1; + if (alphabet_sizes[c] > max_alphabet_size) { + return JXL_FAILURE("Alphabet size is too long: %u", alphabet_sizes[c]); + } + } + for (size_t c = 0; c < num_histograms; c++) { + if (alphabet_sizes[c] > 1) { + if (!result->huffman_data[c].ReadFromBitStream(alphabet_sizes[c], in)) { + if (!in->AllReadsWithinBounds()) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for huffman code"); + } + return JXL_FAILURE("Invalid huffman tree number %" PRIuS + ", alphabet size %u", + c, alphabet_sizes[c]); + } + } else { + // 0-bit codes does not require extension tables. + result->huffman_data[c].table_.clear(); + result->huffman_data[c].table_.resize(1u << kHuffmanTableBits); + } + for (const auto& h : result->huffman_data[c].table_) { + if (h.bits <= kHuffmanTableBits) { + result->UpdateMaxNumBits(c, h.value); + } + } + } + } else { + JXL_ASSERT(max_alphabet_size <= ANS_MAX_ALPHABET_SIZE); + result->alias_tables = + AllocateArray(num_histograms * (1 << result->log_alpha_size) * + sizeof(AliasTable::Entry)); + AliasTable::Entry* alias_tables = + reinterpret_cast(result->alias_tables.get()); + for (size_t c = 0; c < num_histograms; ++c) { + std::vector counts; + if (!ReadHistogram(ANS_LOG_TAB_SIZE, &counts, in)) { + return JXL_FAILURE("Invalid histogram bitstream."); + } + if (counts.size() > max_alphabet_size) { + return JXL_FAILURE("Alphabet size is too long: %" PRIuS, counts.size()); + } + while (!counts.empty() && counts.back() == 0) { + counts.pop_back(); + } + for (size_t s = 0; s < counts.size(); s++) { + if (counts[s] != 0) { + result->UpdateMaxNumBits(c, s); + } + } + // InitAliasTable "fixes" empty counts to contain degenerate "0" symbol. + int degenerate_symbol = counts.empty() ? 0 : (counts.size() - 1); + for (int s = 0; s < degenerate_symbol; ++s) { + if (counts[s] != 0) { + degenerate_symbol = -1; + break; + } + } + result->degenerate_symbols[c] = degenerate_symbol; + InitAliasTable(counts, ANS_TAB_SIZE, result->log_alpha_size, + alias_tables + c * (1 << result->log_alpha_size)); + } + } + return true; +} +Status DecodeUintConfig(size_t log_alpha_size, HybridUintConfig* uint_config, + BitReader* br) { + br->Refill(); + size_t split_exponent = br->ReadBits(CeilLog2Nonzero(log_alpha_size + 1)); + size_t msb_in_token = 0, lsb_in_token = 0; + if (split_exponent != log_alpha_size) { + // otherwise, msb/lsb don't matter. + size_t nbits = CeilLog2Nonzero(split_exponent + 1); + msb_in_token = br->ReadBits(nbits); + if (msb_in_token > split_exponent) { + // This could be invalid here already and we need to check this before + // we use its value to read more bits. + return JXL_FAILURE("Invalid HybridUintConfig"); + } + nbits = CeilLog2Nonzero(split_exponent - msb_in_token + 1); + lsb_in_token = br->ReadBits(nbits); + } + if (lsb_in_token + msb_in_token > split_exponent) { + return JXL_FAILURE("Invalid HybridUintConfig"); + } + *uint_config = HybridUintConfig(split_exponent, msb_in_token, lsb_in_token); + return true; +} + +Status DecodeUintConfigs(size_t log_alpha_size, + std::vector* uint_config, + BitReader* br) { + // TODO(veluca): RLE? + for (size_t i = 0; i < uint_config->size(); i++) { + JXL_RETURN_IF_ERROR( + DecodeUintConfig(log_alpha_size, &(*uint_config)[i], br)); + } + return true; +} + +LZ77Params::LZ77Params() { Bundle::Init(this); } +Status LZ77Params::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &enabled)); + if (!visitor->Conditional(enabled)) return true; + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(224), Val(512), Val(4096), + BitsOffset(15, 8), 224, &min_symbol)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(3), Val(4), BitsOffset(2, 5), + BitsOffset(8, 9), 3, &min_length)); + return true; +} + +void ANSCode::UpdateMaxNumBits(size_t ctx, size_t symbol) { + HybridUintConfig* cfg = &uint_config[ctx]; + // LZ77 symbols use a different uint config. + if (lz77.enabled && lz77.nonserialized_distance_context != ctx && + symbol >= lz77.min_symbol) { + symbol -= lz77.min_symbol; + cfg = &lz77.length_uint_config; + } + size_t split_token = cfg->split_token; + size_t msb_in_token = cfg->msb_in_token; + size_t lsb_in_token = cfg->lsb_in_token; + size_t split_exponent = cfg->split_exponent; + if (symbol < split_token) { + max_num_bits = std::max(max_num_bits, split_exponent); + return; + } + uint32_t n_extra_bits = + split_exponent - (msb_in_token + lsb_in_token) + + ((symbol - split_token) >> (msb_in_token + lsb_in_token)); + size_t total_bits = msb_in_token + lsb_in_token + n_extra_bits + 1; + max_num_bits = std::max(max_num_bits, total_bits); +} + +Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code, + std::vector* context_map, bool disallow_lz77) { + JXL_RETURN_IF_ERROR(Bundle::Read(br, &code->lz77)); + if (code->lz77.enabled) { + num_contexts++; + JXL_RETURN_IF_ERROR(DecodeUintConfig(/*log_alpha_size=*/8, + &code->lz77.length_uint_config, br)); + } + if (code->lz77.enabled && disallow_lz77) { + return JXL_FAILURE("Using LZ77 when explicitly disallowed"); + } + size_t num_histograms = 1; + context_map->resize(num_contexts); + if (num_contexts > 1) { + JXL_RETURN_IF_ERROR(DecodeContextMap(context_map, &num_histograms, br)); + } + code->lz77.nonserialized_distance_context = context_map->back(); + code->use_prefix_code = br->ReadFixedBits<1>(); + if (code->use_prefix_code) { + code->log_alpha_size = PREFIX_MAX_BITS; + } else { + code->log_alpha_size = br->ReadFixedBits<2>() + 5; + } + code->uint_config.resize(num_histograms); + JXL_RETURN_IF_ERROR( + DecodeUintConfigs(code->log_alpha_size, &code->uint_config, br)); + const size_t max_alphabet_size = 1 << code->log_alpha_size; + JXL_RETURN_IF_ERROR( + DecodeANSCodes(num_histograms, max_alphabet_size, br, code)); + // When using LZ77, flat codes might result in valid codestreams with + // histograms that potentially allow very large bit counts. + // TODO(veluca): in principle, a valid codestream might contain a histogram + // that could allow very large numbers of bits that is never used during ANS + // decoding. There's no benefit to doing that, though. + if (!code->lz77.enabled && code->max_num_bits > 32) { + // Just emit a warning as there are many opportunities for false positives. + JXL_WARNING("Histogram can represent numbers that are too large: %" PRIuS + "\n", + code->max_num_bits); + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_ans.h b/third-party/libjxl/libjxl/lib/jxl/dec_ans.h new file mode 100644 index 0000000000..6986cf1b1f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_ans.h @@ -0,0 +1,505 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_ANS_H_ +#define LIB_JXL_DEC_ANS_H_ + +// Library to decode the ANS population counts from the bit-stream and build a +// decoding table from them. + +#include +#include + +#include +#include + +#include "lib/jxl/ans_common.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/cache_aligned.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_huffman.h" +#include "lib/jxl/field_encodings.h" + +namespace jxl { + +class ANSSymbolReader; + +// Experiments show that best performance is typically achieved for a +// split-exponent of 3 or 4. Trend seems to be that '4' is better +// for large-ish pictures, and '3' better for rather small-ish pictures. +// This is plausible - the more special symbols we have, the better +// statistics we need to get a benefit out of them. + +// Our hybrid-encoding scheme has dedicated tokens for the smallest +// (1 << split_exponents) numbers, and for the rest +// encodes (number of bits) + (msb_in_token sub-leading binary digits) + +// (lsb_in_token lowest binary digits) in the token, with the remaining bits +// then being encoded as data. +// +// Example with split_exponent = 4, msb_in_token = 2, lsb_in_token = 0. +// +// Numbers N in [0 .. 15]: +// These get represented as (token=N, bits=''). +// Numbers N >= 16: +// If n is such that 2**n <= N < 2**(n+1), +// and m = N - 2**n is the 'mantissa', +// these get represented as: +// (token=split_token + +// ((n - split_exponent) * 4) + +// (m >> (n - msb_in_token)), +// bits=m & (1 << (n - msb_in_token)) - 1) +// Specifically, we would get: +// N = 0 - 15: (token=N, nbits=0, bits='') +// N = 16 (10000): (token=16, nbits=2, bits='00') +// N = 17 (10001): (token=16, nbits=2, bits='01') +// N = 20 (10100): (token=17, nbits=2, bits='00') +// N = 24 (11000): (token=18, nbits=2, bits='00') +// N = 28 (11100): (token=19, nbits=2, bits='00') +// N = 32 (100000): (token=20, nbits=3, bits='000') +// N = 65535: (token=63, nbits=13, bits='1111111111111') +struct HybridUintConfig { + uint32_t split_exponent; + uint32_t split_token; + uint32_t msb_in_token; + uint32_t lsb_in_token; + JXL_INLINE void Encode(uint32_t value, uint32_t* JXL_RESTRICT token, + uint32_t* JXL_RESTRICT nbits, + uint32_t* JXL_RESTRICT bits) const { + if (value < split_token) { + *token = value; + *nbits = 0; + *bits = 0; + } else { + uint32_t n = FloorLog2Nonzero(value); + uint32_t m = value - (1 << n); + *token = split_token + + ((n - split_exponent) << (msb_in_token + lsb_in_token)) + + ((m >> (n - msb_in_token)) << lsb_in_token) + + (m & ((1 << lsb_in_token) - 1)); + *nbits = n - msb_in_token - lsb_in_token; + *bits = (value >> lsb_in_token) & ((1UL << *nbits) - 1); + } + } + + explicit HybridUintConfig(uint32_t split_exponent = 4, + uint32_t msb_in_token = 2, + uint32_t lsb_in_token = 0) + : split_exponent(split_exponent), + split_token(1 << split_exponent), + msb_in_token(msb_in_token), + lsb_in_token(lsb_in_token) { + JXL_DASSERT(split_exponent >= msb_in_token + lsb_in_token); + } +}; + +struct LZ77Params : public Fields { + LZ77Params(); + JXL_FIELDS_NAME(LZ77Params) + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + bool enabled; + + // Symbols above min_symbol use a special hybrid uint encoding and + // represent a length, to be added to min_length. + uint32_t min_symbol; + uint32_t min_length; + + // Not serialized by VisitFields. + HybridUintConfig length_uint_config{0, 0, 0}; + + size_t nonserialized_distance_context; +}; + +static constexpr size_t kWindowSize = 1 << 20; +static constexpr size_t kNumSpecialDistances = 120; +// Table of special distance codes from WebP lossless. +static constexpr int8_t kSpecialDistances[kNumSpecialDistances][2] = { + {0, 1}, {1, 0}, {1, 1}, {-1, 1}, {0, 2}, {2, 0}, {1, 2}, {-1, 2}, + {2, 1}, {-2, 1}, {2, 2}, {-2, 2}, {0, 3}, {3, 0}, {1, 3}, {-1, 3}, + {3, 1}, {-3, 1}, {2, 3}, {-2, 3}, {3, 2}, {-3, 2}, {0, 4}, {4, 0}, + {1, 4}, {-1, 4}, {4, 1}, {-4, 1}, {3, 3}, {-3, 3}, {2, 4}, {-2, 4}, + {4, 2}, {-4, 2}, {0, 5}, {3, 4}, {-3, 4}, {4, 3}, {-4, 3}, {5, 0}, + {1, 5}, {-1, 5}, {5, 1}, {-5, 1}, {2, 5}, {-2, 5}, {5, 2}, {-5, 2}, + {4, 4}, {-4, 4}, {3, 5}, {-3, 5}, {5, 3}, {-5, 3}, {0, 6}, {6, 0}, + {1, 6}, {-1, 6}, {6, 1}, {-6, 1}, {2, 6}, {-2, 6}, {6, 2}, {-6, 2}, + {4, 5}, {-4, 5}, {5, 4}, {-5, 4}, {3, 6}, {-3, 6}, {6, 3}, {-6, 3}, + {0, 7}, {7, 0}, {1, 7}, {-1, 7}, {5, 5}, {-5, 5}, {7, 1}, {-7, 1}, + {4, 6}, {-4, 6}, {6, 4}, {-6, 4}, {2, 7}, {-2, 7}, {7, 2}, {-7, 2}, + {3, 7}, {-3, 7}, {7, 3}, {-7, 3}, {5, 6}, {-5, 6}, {6, 5}, {-6, 5}, + {8, 0}, {4, 7}, {-4, 7}, {7, 4}, {-7, 4}, {8, 1}, {8, 2}, {6, 6}, + {-6, 6}, {8, 3}, {5, 7}, {-5, 7}, {7, 5}, {-7, 5}, {8, 4}, {6, 7}, + {-6, 7}, {7, 6}, {-7, 6}, {8, 5}, {7, 7}, {-7, 7}, {8, 6}, {8, 7}}; + +struct ANSCode { + CacheAlignedUniquePtr alias_tables; + std::vector huffman_data; + std::vector uint_config; + std::vector degenerate_symbols; + bool use_prefix_code; + uint8_t log_alpha_size; // for ANS. + LZ77Params lz77; + // Maximum number of bits necessary to represent the result of a + // ReadHybridUint call done with this ANSCode. + size_t max_num_bits = 0; + void UpdateMaxNumBits(size_t ctx, size_t symbol); +}; + +class ANSSymbolReader { + public: + // Invalid symbol reader, to be overwritten. + ANSSymbolReader() = default; + ANSSymbolReader(const ANSCode* code, BitReader* JXL_RESTRICT br, + size_t distance_multiplier = 0) + : alias_tables_( + reinterpret_cast(code->alias_tables.get())), + huffman_data_(code->huffman_data.data()), + use_prefix_code_(code->use_prefix_code), + configs(code->uint_config.data()) { + if (!use_prefix_code_) { + state_ = static_cast(br->ReadFixedBits<32>()); + log_alpha_size_ = code->log_alpha_size; + log_entry_size_ = ANS_LOG_TAB_SIZE - code->log_alpha_size; + entry_size_minus_1_ = (1 << log_entry_size_) - 1; + } else { + state_ = (ANS_SIGNATURE << 16u); + } + if (!code->lz77.enabled) return; + // a std::vector incurs unacceptable decoding speed loss because of + // initialization. + lz77_window_storage_ = AllocateArray(kWindowSize * sizeof(uint32_t)); + lz77_window_ = reinterpret_cast(lz77_window_storage_.get()); + lz77_ctx_ = code->lz77.nonserialized_distance_context; + lz77_length_uint_ = code->lz77.length_uint_config; + lz77_threshold_ = code->lz77.min_symbol; + lz77_min_length_ = code->lz77.min_length; + num_special_distances_ = + distance_multiplier == 0 ? 0 : kNumSpecialDistances; + for (size_t i = 0; i < num_special_distances_; i++) { + int dist = kSpecialDistances[i][0]; + dist += static_cast(distance_multiplier) * kSpecialDistances[i][1]; + if (dist < 1) dist = 1; + special_distances_[i] = dist; + } + } + + JXL_INLINE size_t ReadSymbolANSWithoutRefill(const size_t histo_idx, + BitReader* JXL_RESTRICT br) { + const uint32_t res = state_ & (ANS_TAB_SIZE - 1u); + + const AliasTable::Entry* table = + &alias_tables_[histo_idx << log_alpha_size_]; + const AliasTable::Symbol symbol = + AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_); + state_ = symbol.freq * (state_ >> ANS_LOG_TAB_SIZE) + symbol.offset; + +#if 1 + // Branchless version is about equally fast on SKX. + const uint32_t new_state = + (state_ << 16u) | static_cast(br->PeekFixedBits<16>()); + const bool normalize = state_ < (1u << 16u); + state_ = normalize ? new_state : state_; + br->Consume(normalize ? 16 : 0); +#else + if (JXL_UNLIKELY(state_ < (1u << 16u))) { + state_ = (state_ << 16u) | br->PeekFixedBits<16>(); + br->Consume(16); + } +#endif + const uint32_t next_res = state_ & (ANS_TAB_SIZE - 1u); + AliasTable::Prefetch(table, next_res, log_entry_size_); + + return symbol.value; + } + + JXL_INLINE size_t ReadSymbolHuffWithoutRefill(const size_t histo_idx, + BitReader* JXL_RESTRICT br) { + return huffman_data_[histo_idx].ReadSymbol(br); + } + + JXL_INLINE size_t ReadSymbolWithoutRefill(const size_t histo_idx, + BitReader* JXL_RESTRICT br) { + // TODO(veluca): hoist if in hotter loops. + if (JXL_UNLIKELY(use_prefix_code_)) { + return ReadSymbolHuffWithoutRefill(histo_idx, br); + } + return ReadSymbolANSWithoutRefill(histo_idx, br); + } + + JXL_INLINE size_t ReadSymbol(const size_t histo_idx, + BitReader* JXL_RESTRICT br) { + br->Refill(); + return ReadSymbolWithoutRefill(histo_idx, br); + } + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + bool CheckANSFinalState() const { return true; } +#else + bool CheckANSFinalState() const { return state_ == (ANS_SIGNATURE << 16u); } +#endif + + template + static JXL_INLINE uint32_t ReadHybridUintConfig( + const HybridUintConfig& config, size_t token, BitReader* br) { + size_t split_token = config.split_token; + size_t msb_in_token = config.msb_in_token; + size_t lsb_in_token = config.lsb_in_token; + size_t split_exponent = config.split_exponent; + // Fast-track version of hybrid integer decoding. + if (token < split_token) return token; + uint32_t nbits = split_exponent - (msb_in_token + lsb_in_token) + + ((token - split_token) >> (msb_in_token + lsb_in_token)); + // Max amount of bits for ReadBits is 32 and max valid left shift is 29 + // bits. However, for speed no error is propagated here, instead limit the + // nbits size. If nbits > 29, the code stream is invalid, but no error is + // returned. + // Note that in most cases we will emit an error if the histogram allows + // representing numbers that would cause invalid shifts, but we need to + // keep this check as when LZ77 is enabled it might make sense to have an + // histogram that could in principle cause invalid shifts. + nbits &= 31u; + uint32_t low = token & ((1 << lsb_in_token) - 1); + token >>= lsb_in_token; + const size_t bits = br->PeekBits(nbits); + br->Consume(nbits); + size_t ret = (((((1 << msb_in_token) | (token & ((1 << msb_in_token) - 1))) + << nbits) | + bits) + << lsb_in_token) | + low; + // TODO(eustas): mark BitReader as unhealthy if nbits > 29 or ret does not + // fit uint32_t + return static_cast(ret); + } + + // Takes a *clustered* idx. Can only use if HuffRleOnly() is true. + JXL_INLINE void ReadHybridUintClusteredHuffRleOnly(size_t ctx, + BitReader* JXL_RESTRICT br, + uint32_t* value, + uint32_t* run) { + JXL_DASSERT(HuffRleOnly()); + br->Refill(); // covers ReadSymbolWithoutRefill + PeekBits + size_t token = ReadSymbolHuffWithoutRefill(ctx, br); + if (JXL_UNLIKELY(token >= lz77_threshold_)) { + *run = + ReadHybridUintConfig(lz77_length_uint_, token - lz77_threshold_, br) + + lz77_min_length_ - 1; + return; + } + *value = ReadHybridUintConfig(configs[ctx], token, br); + } + bool HuffRleOnly() { + if (lz77_window_ == nullptr) return false; + if (!use_prefix_code_) return false; + for (size_t i = 0; i < kHuffmanTableBits; i++) { + if (huffman_data_[lz77_ctx_].table_[i].bits) return false; + if (huffman_data_[lz77_ctx_].table_[i].value != 1) return false; + } + if (configs[lz77_ctx_].split_token > 1) return false; + return true; + } + bool UsesLZ77() { return lz77_window_ != nullptr; } + + // Takes a *clustered* idx. Inlined, for use in hot paths. + template + JXL_INLINE size_t ReadHybridUintClusteredInlined(size_t ctx, + BitReader* JXL_RESTRICT br) { + if (uses_lz77) { + if (JXL_UNLIKELY(num_to_copy_ > 0)) { + size_t ret = lz77_window_[(copy_pos_++) & kWindowMask]; + num_to_copy_--; + lz77_window_[(num_decoded_++) & kWindowMask] = ret; + return ret; + } + } + + br->Refill(); // covers ReadSymbolWithoutRefill + PeekBits + size_t token = ReadSymbolWithoutRefill(ctx, br); + if (uses_lz77) { + if (JXL_UNLIKELY(token >= lz77_threshold_)) { + num_to_copy_ = ReadHybridUintConfig(lz77_length_uint_, + token - lz77_threshold_, br) + + lz77_min_length_; + br->Refill(); // covers ReadSymbolWithoutRefill + PeekBits + // Distance code. + size_t token = ReadSymbolWithoutRefill(lz77_ctx_, br); + size_t distance = ReadHybridUintConfig(configs[lz77_ctx_], token, br); + if (JXL_LIKELY(distance < num_special_distances_)) { + distance = special_distances_[distance]; + } else { + distance = distance + 1 - num_special_distances_; + } + if (JXL_UNLIKELY(distance > num_decoded_)) { + distance = num_decoded_; + } + if (JXL_UNLIKELY(distance > kWindowSize)) { + distance = kWindowSize; + } + copy_pos_ = num_decoded_ - distance; + if (JXL_UNLIKELY(distance == 0)) { + JXL_DASSERT(lz77_window_ != nullptr); + // distance 0 -> num_decoded_ == copy_pos_ == 0 + size_t to_fill = std::min(num_to_copy_, kWindowSize); + memset(lz77_window_, 0, to_fill * sizeof(lz77_window_[0])); + } + // TODO(eustas): overflow; mark BitReader as unhealthy + if (num_to_copy_ < lz77_min_length_) return 0; + // the code below is the same as doing this: + // return ReadHybridUintClustered(ctx, br); + // but gcc doesn't like recursive inlining + + size_t ret = lz77_window_[(copy_pos_++) & kWindowMask]; + num_to_copy_--; + lz77_window_[(num_decoded_++) & kWindowMask] = ret; + return ret; + } + } + size_t ret = ReadHybridUintConfig(configs[ctx], token, br); + if (uses_lz77 && lz77_window_) + lz77_window_[(num_decoded_++) & kWindowMask] = ret; + return ret; + } + + // same but not inlined + template + size_t ReadHybridUintClustered(size_t ctx, BitReader* JXL_RESTRICT br) { + return ReadHybridUintClusteredInlined(ctx, br); + } + + // inlined only in the no-lz77 case + template + JXL_INLINE size_t + ReadHybridUintClusteredMaybeInlined(size_t ctx, BitReader* JXL_RESTRICT br) { + if (uses_lz77) { + return ReadHybridUintClustered(ctx, br); + } else { + return ReadHybridUintClusteredInlined(ctx, br); + } + } + + // inlined, for use in hot paths + template + JXL_INLINE size_t + ReadHybridUintInlined(size_t ctx, BitReader* JXL_RESTRICT br, + const std::vector& context_map) { + return ReadHybridUintClustered(context_map[ctx], br); + } + + // not inlined, for use in non-hot paths + size_t ReadHybridUint(size_t ctx, BitReader* JXL_RESTRICT br, + const std::vector& context_map) { + return ReadHybridUintClustered(context_map[ctx], br); + } + + // ctx is a *clustered* context! + // This function will modify the ANS state as if `count` symbols have been + // decoded. + bool IsSingleValueAndAdvance(size_t ctx, uint32_t* value, size_t count) { + // TODO(veluca): No optimization for Huffman mode yet. + if (use_prefix_code_) return false; + // TODO(eustas): propagate "degenerate_symbol" to simplify this method. + const uint32_t res = state_ & (ANS_TAB_SIZE - 1u); + const AliasTable::Entry* table = &alias_tables_[ctx << log_alpha_size_]; + AliasTable::Symbol symbol = + AliasTable::Lookup(table, res, log_entry_size_, entry_size_minus_1_); + if (symbol.freq != ANS_TAB_SIZE) return false; + if (configs[ctx].split_token <= symbol.value) return false; + if (symbol.value >= lz77_threshold_) return false; + *value = symbol.value; + if (lz77_window_) { + for (size_t i = 0; i < count; i++) { + lz77_window_[(num_decoded_++) & kWindowMask] = symbol.value; + } + } + return true; + } + + static constexpr size_t kMaxCheckpointInterval = 512; + struct Checkpoint { + uint32_t state; + uint32_t num_to_copy; + uint32_t copy_pos; + uint32_t num_decoded; + uint32_t lz77_window[kMaxCheckpointInterval]; + }; + void Save(Checkpoint* checkpoint) { + checkpoint->state = state_; + checkpoint->num_decoded = num_decoded_; + checkpoint->num_to_copy = num_to_copy_; + checkpoint->copy_pos = copy_pos_; + if (lz77_window_) { + size_t win_start = num_decoded_ & kWindowMask; + size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask; + if (win_end > win_start) { + memcpy(checkpoint->lz77_window, lz77_window_ + win_start, + (win_end - win_start) * sizeof(*lz77_window_)); + } else { + memcpy(checkpoint->lz77_window, lz77_window_ + win_start, + (kWindowSize - win_start) * sizeof(*lz77_window_)); + memcpy(checkpoint->lz77_window + (kWindowSize - win_start), + lz77_window_, win_end * sizeof(*lz77_window_)); + } + } + } + void Restore(const Checkpoint& checkpoint) { + state_ = checkpoint.state; + JXL_DASSERT(num_decoded_ <= + checkpoint.num_decoded + kMaxCheckpointInterval); + num_decoded_ = checkpoint.num_decoded; + num_to_copy_ = checkpoint.num_to_copy; + copy_pos_ = checkpoint.copy_pos; + if (lz77_window_) { + size_t win_start = num_decoded_ & kWindowMask; + size_t win_end = (num_decoded_ + kMaxCheckpointInterval) & kWindowMask; + if (win_end > win_start) { + memcpy(lz77_window_ + win_start, checkpoint.lz77_window, + (win_end - win_start) * sizeof(*lz77_window_)); + } else { + memcpy(lz77_window_ + win_start, checkpoint.lz77_window, + (kWindowSize - win_start) * sizeof(*lz77_window_)); + memcpy(lz77_window_, checkpoint.lz77_window + (kWindowSize - win_start), + win_end * sizeof(*lz77_window_)); + } + } + } + + private: + const AliasTable::Entry* JXL_RESTRICT alias_tables_; // not owned + const HuffmanDecodingData* huffman_data_; + bool use_prefix_code_; + uint32_t state_ = ANS_SIGNATURE << 16u; + const HybridUintConfig* JXL_RESTRICT configs; + uint32_t log_alpha_size_{}; + uint32_t log_entry_size_{}; + uint32_t entry_size_minus_1_{}; + + // LZ77 structures and constants. + static constexpr size_t kWindowMask = kWindowSize - 1; + CacheAlignedUniquePtr lz77_window_storage_; + uint32_t* lz77_window_ = nullptr; + uint32_t num_decoded_ = 0; + uint32_t num_to_copy_ = 0; + uint32_t copy_pos_ = 0; + uint32_t lz77_ctx_ = 0; + uint32_t lz77_min_length_ = 0; + uint32_t lz77_threshold_ = 1 << 20; // bigger than any symbol. + HybridUintConfig lz77_length_uint_; + uint32_t special_distances_[kNumSpecialDistances]{}; + uint32_t num_special_distances_{}; +}; + +Status DecodeHistograms(BitReader* br, size_t num_contexts, ANSCode* code, + std::vector* context_map, + bool disallow_lz77 = false); + +// Exposed for tests. +Status DecodeUintConfigs(size_t log_alpha_size, + std::vector* uint_config, + BitReader* br); + +} // namespace jxl + +#endif // LIB_JXL_DEC_ANS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_bit_reader.h b/third-party/libjxl/libjxl/lib/jxl/dec_bit_reader.h new file mode 100644 index 0000000000..aea44505a3 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_bit_reader.h @@ -0,0 +1,352 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_BIT_READER_H_ +#define LIB_JXL_DEC_BIT_READER_H_ + +// Bounds-checked bit reader; 64-bit buffer with support for deferred refills +// and switching to reading byte-aligned words. + +#include +#include +#include // memcpy + +#ifdef __BMI2__ +#include +#endif + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" + +namespace jxl { + +// Reads bits previously written to memory by BitWriter. Uses unaligned 8-byte +// little-endian loads. +class BitReader { + public: + static constexpr size_t kMaxBitsPerCall = 56; + + // Constructs an invalid BitReader, to be overwritten before usage. + BitReader() + : buf_(0), + bits_in_buf_(0), + next_byte_{nullptr}, + end_minus_8_{nullptr}, + first_byte_(nullptr) {} + BitReader(const BitReader&) = delete; + + // bytes need not be aligned nor padded! + template + explicit BitReader(const ArrayLike& bytes) + : buf_(0), + bits_in_buf_(0), + next_byte_(bytes.data()), + // Assumes first_byte_ >= 8. + end_minus_8_(bytes.data() - 8 + bytes.size()), + first_byte_(bytes.data()) { + Refill(); + } + ~BitReader() { + // Close() must be called before destroying an initialized bit reader. + // Invalid bit readers will have a nullptr in first_byte_. + JXL_ASSERT(close_called_ || !first_byte_); + } + + // Move operator needs to invalidate the other BitReader such that it is + // irrelevant if we call Close() on it or not. + BitReader& operator=(BitReader&& other) noexcept { + // Ensure the current instance was already closed, before we overwrite it + // with other. + JXL_ASSERT(close_called_ || !first_byte_); + + JXL_DASSERT(!other.close_called_); + buf_ = other.buf_; + bits_in_buf_ = other.bits_in_buf_; + next_byte_ = other.next_byte_; + end_minus_8_ = other.end_minus_8_; + first_byte_ = other.first_byte_; + overread_bytes_ = other.overread_bytes_; + close_called_ = other.close_called_; + + other.first_byte_ = nullptr; + other.next_byte_ = nullptr; + return *this; + } + BitReader& operator=(const BitReader& other) = delete; + + // For time-critical reads, refills can be shared by multiple reads. + // Based on variant 4 (plus bounds-checking), see + // fgiesen.wordpress.com/2018/02/20/reading-bits-in-far-too-many-ways-part-2/ + JXL_INLINE void Refill() { + if (JXL_UNLIKELY(next_byte_ > end_minus_8_)) { + BoundsCheckedRefill(); + } else { + // It's safe to load 64 bits; insert valid (possibly nonzero) bits above + // bits_in_buf_. The shift requires bits_in_buf_ < 64. + buf_ |= LoadLE64(next_byte_) << bits_in_buf_; + + // Advance by bytes fully absorbed into the buffer. + next_byte_ += (63 - bits_in_buf_) >> 3; + + // We absorbed a multiple of 8 bits, so the lower 3 bits of bits_in_buf_ + // must remain unchanged, otherwise the next refill's shifted bits will + // not align with buf_. Set the three upper bits so the result >= 56. + bits_in_buf_ |= 56; + JXL_DASSERT(56 <= bits_in_buf_ && bits_in_buf_ < 64); + } + } + + // Returns the bits that would be returned by Read without calling Advance(). + // It is legal to PEEK at more bits than present in the bitstream (required + // by Huffman), and those bits will be zero. + template + JXL_INLINE uint64_t PeekFixedBits() const { + static_assert(N <= kMaxBitsPerCall, "Reading too many bits in one call."); + JXL_DASSERT(!close_called_); + return buf_ & ((1ULL << N) - 1); + } + + JXL_INLINE uint64_t PeekBits(size_t nbits) const { + JXL_DASSERT(nbits <= kMaxBitsPerCall); + JXL_DASSERT(!close_called_); + + // Slightly faster but requires BMI2. It is infeasible to make the many + // callers reside between begin/end_target, especially because only the + // callers in dec_ans are time-critical. Therefore only enabled if the + // entire binary is compiled for (and thus requires) BMI2. +#if defined(__BMI2__) && defined(__x86_64__) + return _bzhi_u64(buf_, nbits); +#else + const uint64_t mask = (1ULL << nbits) - 1; + return buf_ & mask; +#endif + } + + // Removes bits from the buffer. Need not match the previous Peek size, but + // the buffer must contain at least num_bits (this prevents consuming more + // than the total number of bits). + JXL_INLINE void Consume(size_t num_bits) { + JXL_DASSERT(!close_called_); + JXL_DASSERT(bits_in_buf_ >= num_bits); +#ifdef JXL_CRASH_ON_ERROR + // When JXL_CRASH_ON_ERROR is defined, it is a fatal error to read more bits + // than available in the stream. A non-zero overread_bytes_ implies that + // next_byte_ is already at the end of the stream, so we don't need to + // check that. + JXL_ASSERT(bits_in_buf_ >= num_bits + overread_bytes_ * kBitsPerByte); +#endif + bits_in_buf_ -= num_bits; + buf_ >>= num_bits; + } + + JXL_INLINE uint64_t ReadBits(size_t nbits) { + JXL_DASSERT(!close_called_); + Refill(); + const uint64_t bits = PeekBits(nbits); + Consume(nbits); + return bits; + } + + template + JXL_INLINE uint64_t ReadFixedBits() { + JXL_DASSERT(!close_called_); + Refill(); + const uint64_t bits = PeekFixedBits(); + Consume(N); + return bits; + } + + // Equivalent to calling ReadFixedBits(1) `skip` times, but much faster. + // `skip` is typically large. + void SkipBits(size_t skip) { + JXL_DASSERT(!close_called_); + // Buffer is large enough - don't zero buf_ below. + if (JXL_UNLIKELY(skip <= bits_in_buf_)) { + Consume(skip); + return; + } + + // First deduct what we can satisfy from the buffer + skip -= bits_in_buf_; + bits_in_buf_ = 0; + // Not enough to call Advance - that may leave some bits in the buffer + // which were previously ABOVE bits_in_buf. + buf_ = 0; + + // Skip whole bytes + const size_t whole_bytes = skip / kBitsPerByte; + skip %= kBitsPerByte; + if (JXL_UNLIKELY(whole_bytes > + static_cast(end_minus_8_ + 8 - next_byte_))) { + // This is already an overflow condition (skipping past the end of the bit + // stream). However if we increase next_byte_ too much we risk overflowing + // that value and potentially making it valid again (next_byte_ < end). + // This will set next_byte_ to the end of the stream and still consume + // some bits in overread_bytes_, however the TotalBitsConsumed() will be + // incorrect (still larger than the TotalBytes()). + next_byte_ = end_minus_8_ + 8; + skip += kBitsPerByte; + } else { + next_byte_ += whole_bytes; + } + + Refill(); + Consume(skip); + } + + size_t TotalBitsConsumed() const { + const size_t bytes_read = static_cast(next_byte_ - first_byte_); + return (bytes_read + overread_bytes_) * kBitsPerByte - bits_in_buf_; + } + + Status JumpToByteBoundary() { + const size_t remainder = TotalBitsConsumed() % kBitsPerByte; + if (remainder == 0) return true; + if (JXL_UNLIKELY(ReadBits(kBitsPerByte - remainder) != 0)) { + return JXL_FAILURE("Non-zero padding bits"); + } + return true; + } + + // For interoperability with other bitreaders (for resuming at + // non-byte-aligned positions). + const uint8_t* FirstByte() const { return first_byte_; } + size_t TotalBytes() const { + return static_cast(end_minus_8_ + 8 - first_byte_); + } + + // Returns span of the remaining (unconsumed) bytes, e.g. for passing to + // external decoders such as Brotli. + Span GetSpan() const { + JXL_DASSERT(first_byte_ != nullptr); + JXL_ASSERT(TotalBitsConsumed() % kBitsPerByte == 0); + const size_t offset = TotalBitsConsumed() / kBitsPerByte; // no remainder + JXL_ASSERT(offset <= TotalBytes()); + return Span(first_byte_ + offset, TotalBytes() - offset); + } + + // Returns whether all the bits read so far have been within the input bounds. + // When reading past the EOF, the Read*() and Consume() functions return zeros + // but flag a failure when calling Close() without checking this function. + Status AllReadsWithinBounds() { + // Mark up to which point the user checked the out of bounds condition. If + // the user handles the condition at higher level (e.g. fetch more bytes + // from network, return a custom JXL_FAILURE, ...), Close() should not + // output a debug error (which would break tests with JXL_CRASH_ON_ERROR + // even when legitimately handling the situation at higher level). This is + // used by Bundle::CanRead. + checked_out_of_bounds_bits_ = TotalBitsConsumed(); + if (TotalBitsConsumed() > TotalBytes() * kBitsPerByte) { + return false; + } + return true; + } + + // Close the bit reader and return whether all the previous reads were + // successful. Close must be called once. + Status Close() { + JXL_DASSERT(!close_called_); + close_called_ = true; + if (!first_byte_) return true; + if (TotalBitsConsumed() > checked_out_of_bounds_bits_ && + TotalBitsConsumed() > TotalBytes() * kBitsPerByte) { + return JXL_FAILURE("Read more bits than available in the bit_reader"); + } + return true; + } + + private: + // Separate function avoids inlining this relatively cold code into callers. + JXL_NOINLINE void BoundsCheckedRefill() { + const uint8_t* end = end_minus_8_ + 8; + + // Read whole bytes until we have [56, 64) bits (same as LoadLE64) + for (; bits_in_buf_ < 64 - kBitsPerByte; bits_in_buf_ += kBitsPerByte) { + if (next_byte_ >= end) break; + buf_ |= static_cast(*next_byte_++) << bits_in_buf_; + } + JXL_DASSERT(bits_in_buf_ < 64); + + // Add extra bytes as 0 at the end of the stream in the bit_buffer_. If + // these bits are read, Close() will return a failure. + size_t extra_bytes = (63 - bits_in_buf_) / kBitsPerByte; + overread_bytes_ += extra_bytes; + bits_in_buf_ += extra_bytes * kBitsPerByte; + + JXL_DASSERT(bits_in_buf_ < 64); + JXL_DASSERT(bits_in_buf_ >= 56); + } + + JXL_NOINLINE uint32_t BoundsCheckedReadByteAlignedWord() { + if (next_byte_ + 1 < end_minus_8_ + 8) { + uint32_t ret = LoadLE16(next_byte_); + next_byte_ += 2; + return ret; + } + overread_bytes_ += 2; + return 0; + } + + uint64_t buf_; + size_t bits_in_buf_; // [0, 64) + const uint8_t* JXL_RESTRICT next_byte_; + const uint8_t* end_minus_8_; // for refill bounds check + const uint8_t* first_byte_; // for GetSpan + + // Number of bytes past the end that were loaded into the buf_. These bytes + // are not read from memory, but instead assumed 0. It is an error (likely due + // to an invalid stream) to Consume() more bits than specified in the range + // passed to the constructor. + uint64_t overread_bytes_{0}; + bool close_called_{false}; + + uint64_t checked_out_of_bounds_bits_{0}; +}; + +// Closes a BitReader when the BitReaderScopedCloser goes out of scope. When +// closing the bit reader, if the status result was failure it sets this failure +// to the passed variable pointer. Typical usage. +// +// Status ret = true; +// { +// BitReader reader(...); +// BitReaderScopedCloser reader_closer(&reader, &ret); +// +// // ... code that can return errors here ... +// } +// // ... more code that doesn't use the BitReader. +// return ret; + +class BitReaderScopedCloser { + public: + BitReaderScopedCloser(BitReader* reader, Status* status) + : reader_(reader), status_(status) { + JXL_DASSERT(reader_ != nullptr); + JXL_DASSERT(status_ != nullptr); + } + ~BitReaderScopedCloser() { + if (reader_ != nullptr) { + Status close_ret = reader_->Close(); + if (!close_ret) *status_ = close_ret; + } + } + void CloseAndSuppressError() { + JXL_ASSERT(reader_ != nullptr); + (void)reader_->Close(); + reader_ = nullptr; + } + BitReaderScopedCloser(const BitReaderScopedCloser&) = delete; + + private: + BitReader* reader_; + Status* status_; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_BIT_READER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_cache.cc b/third-party/libjxl/libjxl/lib/jxl/dec_cache.cc new file mode 100644 index 0000000000..5cf34ebbbd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_cache.cc @@ -0,0 +1,231 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_cache.h" + +#include "lib/jxl/blending.h" +#include "lib/jxl/render_pipeline/stage_blending.h" +#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h" +#include "lib/jxl/render_pipeline/stage_epf.h" +#include "lib/jxl/render_pipeline/stage_from_linear.h" +#include "lib/jxl/render_pipeline/stage_gaborish.h" +#include "lib/jxl/render_pipeline/stage_noise.h" +#include "lib/jxl/render_pipeline/stage_patches.h" +#include "lib/jxl/render_pipeline/stage_splines.h" +#include "lib/jxl/render_pipeline/stage_spot.h" +#include "lib/jxl/render_pipeline/stage_to_linear.h" +#include "lib/jxl/render_pipeline/stage_tone_mapping.h" +#include "lib/jxl/render_pipeline/stage_upsampling.h" +#include "lib/jxl/render_pipeline/stage_write.h" +#include "lib/jxl/render_pipeline/stage_xyb.h" +#include "lib/jxl/render_pipeline/stage_ycbcr.h" + +namespace jxl { + +Status PassesDecoderState::PreparePipeline(ImageBundle* decoded, + PipelineOptions options) { + const FrameHeader& frame_header = shared->frame_header; + size_t num_c = 3 + frame_header.nonserialized_metadata->m.num_extra_channels; + if ((frame_header.flags & FrameHeader::kNoise) != 0) { + num_c += 3; + } + + if (frame_header.CanBeReferenced()) { + // Necessary so that SetInputSizes() can allocate output buffers as needed. + frame_storage_for_referencing = ImageBundle(decoded->metadata()); + } + + RenderPipeline::Builder builder(num_c); + + if (options.use_slow_render_pipeline) { + builder.UseSimpleImplementation(); + } + + if (!frame_header.chroma_subsampling.Is444()) { + for (size_t c = 0; c < 3; c++) { + if (frame_header.chroma_subsampling.HShift(c) != 0) { + builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/true)); + } + if (frame_header.chroma_subsampling.VShift(c) != 0) { + builder.AddStage(GetChromaUpsamplingStage(c, /*horizontal=*/false)); + } + } + } + + if (frame_header.loop_filter.gab) { + builder.AddStage(GetGaborishStage(frame_header.loop_filter)); + } + + { + const LoopFilter& lf = frame_header.loop_filter; + if (lf.epf_iters >= 3) { + builder.AddStage(GetEPFStage(lf, sigma, 0)); + } + if (lf.epf_iters >= 1) { + builder.AddStage(GetEPFStage(lf, sigma, 1)); + } + if (lf.epf_iters >= 2) { + builder.AddStage(GetEPFStage(lf, sigma, 2)); + } + } + + bool late_ec_upsample = frame_header.upsampling != 1; + for (auto ecups : frame_header.extra_channel_upsampling) { + if (ecups != frame_header.upsampling) { + // If patches are applied, either frame_header.upsampling == 1 or + // late_ec_upsample is true. + late_ec_upsample = false; + } + } + + if (!late_ec_upsample) { + for (size_t ec = 0; ec < frame_header.extra_channel_upsampling.size(); + ec++) { + if (frame_header.extra_channel_upsampling[ec] != 1) { + builder.AddStage(GetUpsamplingStage( + frame_header.nonserialized_metadata->transform_data, 3 + ec, + CeilLog2Nonzero(frame_header.extra_channel_upsampling[ec]))); + } + } + } + + if ((frame_header.flags & FrameHeader::kPatches) != 0) { + builder.AddStage( + GetPatchesStage(&shared->image_features.patches, + 3 + shared->metadata->m.num_extra_channels)); + } + if ((frame_header.flags & FrameHeader::kSplines) != 0) { + builder.AddStage(GetSplineStage(&shared->image_features.splines)); + } + + if (frame_header.upsampling != 1) { + size_t nb_channels = + 3 + + (late_ec_upsample ? frame_header.extra_channel_upsampling.size() : 0); + for (size_t c = 0; c < nb_channels; c++) { + builder.AddStage(GetUpsamplingStage( + frame_header.nonserialized_metadata->transform_data, c, + CeilLog2Nonzero(frame_header.upsampling))); + } + } + + if ((frame_header.flags & FrameHeader::kNoise) != 0) { + builder.AddStage(GetConvolveNoiseStage(num_c - 3)); + builder.AddStage(GetAddNoiseStage(shared->image_features.noise_params, + shared->cmap, num_c - 3)); + } + if (frame_header.dc_level != 0) { + builder.AddStage(GetWriteToImage3FStage( + &shared_storage.dc_frames[frame_header.dc_level - 1])); + } + + if (frame_header.CanBeReferenced() && + frame_header.save_before_color_transform) { + builder.AddStage(GetWriteToImageBundleStage( + &frame_storage_for_referencing, output_encoding_info.color_encoding)); + } + + bool has_alpha = false; + size_t alpha_c = 0; + for (size_t i = 0; i < decoded->metadata()->extra_channel_info.size(); i++) { + if (decoded->metadata()->extra_channel_info[i].type == + ExtraChannel::kAlpha) { + has_alpha = true; + alpha_c = 3 + i; + break; + } + } + + if (fast_xyb_srgb8_conversion) { +#if !JXL_HIGH_PRECISION + JXL_ASSERT(!NeedsBlending(this)); + JXL_ASSERT(!frame_header.CanBeReferenced() || + frame_header.save_before_color_transform); + JXL_ASSERT(!options.render_spotcolors || + !decoded->metadata()->Find(ExtraChannel::kSpotColor)); + bool is_rgba = (main_output.format.num_channels == 4); + uint8_t* rgb_output = reinterpret_cast(main_output.buffer); + builder.AddStage(GetFastXYBTosRGB8Stage(rgb_output, main_output.stride, + width, height, is_rgba, has_alpha, + alpha_c)); +#endif + } else { + bool linear = false; + if (frame_header.color_transform == ColorTransform::kYCbCr) { + builder.AddStage(GetYCbCrStage()); + } else if (frame_header.color_transform == ColorTransform::kXYB) { + builder.AddStage(GetXYBStage(output_encoding_info)); + if (output_encoding_info.color_encoding.GetColorSpace() != + ColorSpace::kXYB) { + linear = true; + } + } // Nothing to do for kNone. + + if (options.coalescing && NeedsBlending(this)) { + if (linear) { + builder.AddStage(GetFromLinearStage(output_encoding_info)); + linear = false; + } + builder.AddStage( + GetBlendingStage(this, output_encoding_info.color_encoding)); + } + + if (options.coalescing && frame_header.CanBeReferenced() && + !frame_header.save_before_color_transform) { + if (linear) { + builder.AddStage(GetFromLinearStage(output_encoding_info)); + linear = false; + } + builder.AddStage(GetWriteToImageBundleStage( + &frame_storage_for_referencing, output_encoding_info.color_encoding)); + } + + if (options.render_spotcolors && + frame_header.nonserialized_metadata->m.Find(ExtraChannel::kSpotColor)) { + for (size_t i = 0; i < decoded->metadata()->extra_channel_info.size(); + i++) { + // Don't use Find() because there may be multiple spot color channels. + const ExtraChannelInfo& eci = + decoded->metadata()->extra_channel_info[i]; + if (eci.type == ExtraChannel::kSpotColor) { + builder.AddStage(GetSpotColorStage(3 + i, eci.spot_color)); + } + } + } + + auto tone_mapping_stage = GetToneMappingStage(output_encoding_info); + if (tone_mapping_stage) { + if (!linear) { + auto to_linear_stage = GetToLinearStage(output_encoding_info); + if (!to_linear_stage) { + return JXL_FAILURE( + "attempting to perform tone mapping on colorspace not " + "convertible to linear"); + } + builder.AddStage(std::move(to_linear_stage)); + linear = true; + } + builder.AddStage(std::move(tone_mapping_stage)); + } + + if (linear) { + builder.AddStage(GetFromLinearStage(output_encoding_info)); + linear = false; + } + + if (main_output.callback.IsPresent() || main_output.buffer) { + builder.AddStage(GetWriteToOutputStage(main_output, width, height, + has_alpha, unpremul_alpha, alpha_c, + undo_orientation, extra_output)); + } else { + builder.AddStage(GetWriteToImageBundleStage( + decoded, output_encoding_info.color_encoding)); + } + } + render_pipeline = std::move(builder).Finalize(shared->frame_dim); + return render_pipeline->IsInitialized(); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_cache.h b/third-party/libjxl/libjxl/lib/jxl/dec_cache.h new file mode 100644 index 0000000000..051638a2cb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_cache.h @@ -0,0 +1,258 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_CACHE_H_ +#define LIB_JXL_DEC_CACHE_H_ + +#include +#include + +#include +#include // HWY_ALIGN_MAX + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/dec_group_border.h" +#include "lib/jxl/dec_noise.h" +#include "lib/jxl/image.h" +#include "lib/jxl/passes_state.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/render_pipeline/render_pipeline.h" +#include "lib/jxl/render_pipeline/stage_upsampling.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { + +constexpr size_t kSigmaBorder = 1; +constexpr size_t kSigmaPadding = 2; + +struct PixelCallback { + PixelCallback() = default; + PixelCallback(JxlImageOutInitCallback init, JxlImageOutRunCallback run, + JxlImageOutDestroyCallback destroy, void* init_opaque) + : init(init), run(run), destroy(destroy), init_opaque(init_opaque) { +#if JXL_ENABLE_ASSERT + const bool has_init = init != nullptr; + const bool has_run = run != nullptr; + const bool has_destroy = destroy != nullptr; + JXL_ASSERT(has_init == has_run && has_run == has_destroy); +#endif + } + + bool IsPresent() const { return run != nullptr; } + + void* Init(size_t num_threads, size_t num_pixels) const { + return init(init_opaque, num_threads, num_pixels); + } + + JxlImageOutInitCallback init = nullptr; + JxlImageOutRunCallback run = nullptr; + JxlImageOutDestroyCallback destroy = nullptr; + void* init_opaque = nullptr; +}; + +struct ImageOutput { + // Pixel format of the output pixels, used for buffer and callback output. + JxlPixelFormat format; + // Output bit depth for unsigned data types, used for float to int conversion. + size_t bits_per_sample; + // Callback for line-by-line output. + PixelCallback callback; + // Pixel buffer for image output. + void* buffer; + size_t buffer_size; + // Length of a row of image_buffer in bytes (based on oriented width). + size_t stride; +}; + +// Per-frame decoder state. All the images here should be accessed through a +// group rect (either with block units or pixel units). +struct PassesDecoderState { + PassesSharedState shared_storage; + // Allows avoiding copies for encoder loop. + const PassesSharedState* JXL_RESTRICT shared = &shared_storage; + + // 8x upsampling stage for DC. + std::unique_ptr upsampler8x; + + // For ANS decoding. + std::vector code; + std::vector> context_map; + + // Multiplier to be applied to the quant matrices of the x channel. + float x_dm_multiplier; + float b_dm_multiplier; + + // Sigma values for EPF. + ImageF sigma; + + // Image dimensions before applying undo_orientation. + size_t width; + size_t height; + ImageOutput main_output; + std::vector extra_output; + + // Whether to use int16 float-XYB-to-uint8-srgb conversion. + bool fast_xyb_srgb8_conversion; + + // If true, the RGBA output will be unpremultiplied before writing to the + // output. + bool unpremul_alpha; + + // The render pipeline will apply this orientation to bring the image to the + // intended display orientation. + Orientation undo_orientation; + + // Used for seeding noise. + size_t visible_frame_index = 0; + size_t nonvisible_frame_index = 0; + + // Keep track of the transform types used. + std::atomic used_acs{0}; + + // Storage for coefficients if in "accumulate" mode. + std::unique_ptr coefficients = make_unique>(0, 0); + + // Rendering pipeline. + std::unique_ptr render_pipeline; + + // Storage for the current frame if it can be referenced by future frames. + ImageBundle frame_storage_for_referencing; + + struct PipelineOptions { + bool use_slow_render_pipeline; + bool coalescing; + bool render_spotcolors; + }; + + Status PreparePipeline(ImageBundle* decoded, PipelineOptions options); + + // Information for colour conversions. + OutputEncodingInfo output_encoding_info; + + // Initializes decoder-specific structures using information from *shared. + Status Init() { + x_dm_multiplier = + std::pow(1 / (1.25f), shared->frame_header.x_qm_scale - 2.0f); + b_dm_multiplier = + std::pow(1 / (1.25f), shared->frame_header.b_qm_scale - 2.0f); + + main_output.callback = PixelCallback(); + main_output.buffer = nullptr; + extra_output.clear(); + + fast_xyb_srgb8_conversion = false; + unpremul_alpha = false; + undo_orientation = Orientation::kIdentity; + + used_acs = 0; + + upsampler8x = GetUpsamplingStage(shared->metadata->transform_data, 0, 3); + if (shared->frame_header.loop_filter.epf_iters > 0) { + sigma = ImageF(shared->frame_dim.xsize_blocks + 2 * kSigmaPadding, + shared->frame_dim.ysize_blocks + 2 * kSigmaPadding); + } + return true; + } + + // Initialize the decoder state after all of DC is decoded. + Status InitForAC(ThreadPool* pool) { + shared_storage.coeff_order_size = 0; + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + if (((1 << o) & used_acs) == 0) continue; + uint8_t ord = kStrategyOrder[o]; + shared_storage.coeff_order_size = + std::max(kCoeffOrderOffset[3 * (ord + 1)] * kDCTBlockSize, + shared_storage.coeff_order_size); + } + size_t sz = shared_storage.frame_header.passes.num_passes * + shared_storage.coeff_order_size; + if (sz > shared_storage.coeff_orders.size()) { + shared_storage.coeff_orders.resize(sz); + } + return true; + } + + // Fills the `state->filter_weights.sigma` image with the precomputed sigma + // values in the area inside `block_rect`. Accesses the AC strategy, quant + // field and epf_sharpness fields in the corresponding positions. + void ComputeSigma(const Rect& block_rect, PassesDecoderState* state); +}; + +// Temp images required for decoding a single group. Reduces memory allocations +// for large images because we only initialize min(#threads, #groups) instances. +struct GroupDecCache { + void InitOnce(size_t num_passes, size_t used_acs) { + for (size_t i = 0; i < num_passes; i++) { + if (num_nzeroes[i].xsize() == 0) { + // Allocate enough for a whole group - partial groups on the + // right/bottom border just use a subset. The valid size is passed via + // Rect. + + num_nzeroes[i] = Image3I(kGroupDimInBlocks, kGroupDimInBlocks); + } + } + size_t max_block_area = 0; + + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + AcStrategy acs = AcStrategy::FromRawStrategy(o); + if ((used_acs & (1 << o)) == 0) continue; + size_t area = + acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize; + max_block_area = std::max(area, max_block_area); + } + + if (max_block_area > max_block_area_) { + max_block_area_ = max_block_area; + // We need 3x float blocks for dequantized coefficients and 1x for scratch + // space for transforms. + float_memory_ = hwy::AllocateAligned(max_block_area_ * 4); + // We need 3x int32 or int16 blocks for quantized coefficients. + int32_memory_ = hwy::AllocateAligned(max_block_area_ * 3); + int16_memory_ = hwy::AllocateAligned(max_block_area_ * 3); + } + + dec_group_block = float_memory_.get(); + scratch_space = dec_group_block + max_block_area_ * 3; + dec_group_qblock = int32_memory_.get(); + dec_group_qblock16 = int16_memory_.get(); + } + + void InitDCBufferOnce() { + if (dc_buffer.xsize() == 0) { + dc_buffer = ImageF(kGroupDimInBlocks + kRenderPipelineXOffset * 2, + kGroupDimInBlocks + 4); + } + } + + // Scratch space used by DecGroupImpl(). + float* dec_group_block; + int32_t* dec_group_qblock; + int16_t* dec_group_qblock16; + + // For TransformToPixels. + float* scratch_space; + // Note that scratch_space is never used at the same time as dec_group_qblock. + // Moreover, only one of dec_group_qblock16 is ever used. + // TODO(veluca): figure out if we can save allocations. + + // AC decoding + Image3I num_nzeroes[kMaxNumPasses]; + + // Buffer for DC upsampling. + ImageF dc_buffer; + + private: + hwy::AlignedFreeUniquePtr float_memory_; + hwy::AlignedFreeUniquePtr int32_memory_; + hwy::AlignedFreeUniquePtr int16_memory_; + size_t max_block_area_ = 0; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_CACHE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_context_map.cc b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.cc new file mode 100644 index 0000000000..ffb29aad6b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.cc @@ -0,0 +1,89 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_context_map.h" + +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/inverse_mtf-inl.h" + +namespace jxl { + +namespace { + +Status VerifyContextMap(const std::vector& context_map, + const size_t num_htrees) { + std::vector have_htree(num_htrees); + size_t num_found = 0; + for (const uint8_t htree : context_map) { + if (htree >= num_htrees) { + return JXL_FAILURE("Invalid histogram index in context map."); + } + if (!have_htree[htree]) { + have_htree[htree] = true; + ++num_found; + } + } + if (num_found != num_htrees) { + return JXL_FAILURE("Incomplete context map."); + } + return true; +} + +} // namespace + +Status DecodeContextMap(std::vector* context_map, size_t* num_htrees, + BitReader* input) { + bool is_simple = input->ReadFixedBits<1>(); + if (is_simple) { + int bits_per_entry = input->ReadFixedBits<2>(); + if (bits_per_entry != 0) { + for (size_t i = 0; i < context_map->size(); i++) { + (*context_map)[i] = input->ReadBits(bits_per_entry); + } + } else { + std::fill(context_map->begin(), context_map->end(), 0); + } + } else { + bool use_mtf = input->ReadFixedBits<1>(); + ANSCode code; + std::vector dummy_ctx_map; + // Usage of LZ77 is disallowed if decoding only two symbols. This doesn't + // make sense in non-malicious bitstreams, and could cause a stack overflow + // in malicious bitstreams by making every context map require its own + // context map. + JXL_RETURN_IF_ERROR( + DecodeHistograms(input, 1, &code, &dummy_ctx_map, + /*disallow_lz77=*/context_map->size() <= 2)); + ANSSymbolReader reader(&code, input); + size_t i = 0; + uint32_t maxsym = 0; + while (i < context_map->size()) { + uint32_t sym = reader.ReadHybridUintInlined( + 0, input, dummy_ctx_map); + maxsym = sym > maxsym ? sym : maxsym; + (*context_map)[i] = sym; + i++; + } + if (maxsym >= kMaxClusters) { + return JXL_FAILURE("Invalid cluster ID"); + } + if (!reader.CheckANSFinalState()) { + return JXL_FAILURE("Invalid context map"); + } + if (use_mtf) { + InverseMoveToFrontTransform(context_map->data(), context_map->size()); + } + } + *num_htrees = *std::max_element(context_map->begin(), context_map->end()) + 1; + return VerifyContextMap(*context_map, *num_htrees); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_context_map.h b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.h new file mode 100644 index 0000000000..95b8a0ca92 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_context_map.h @@ -0,0 +1,30 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_CONTEXT_MAP_H_ +#define LIB_JXL_DEC_CONTEXT_MAP_H_ + +#include +#include + +#include + +#include "lib/jxl/dec_bit_reader.h" + +namespace jxl { + +// Context map uses uint8_t. +constexpr size_t kMaxClusters = 256; + +// Reads the context map from the bit stream. On calling this function, +// context_map->size() must be the number of possible context ids. +// Sets *num_htrees to the number of different histogram ids in +// *context_map. +Status DecodeContextMap(std::vector* context_map, size_t* num_htrees, + BitReader* input); + +} // namespace jxl + +#endif // LIB_JXL_DEC_CONTEXT_MAP_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_external_image.cc b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.cc new file mode 100644 index 0000000000..1661d99965 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.cc @@ -0,0 +1,481 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_external_image.h" + +#include + +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dec_external_image.cc" +#include +#include + +#include "lib/jxl/alpha.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/cache_aligned.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Clamp; +using hwy::HWY_NAMESPACE::NearestInt; + +// TODO(jon): check if this can be replaced by a FloatToU16 function +void FloatToU32(const float* in, uint32_t* out, size_t num, float mul, + size_t bits_per_sample) { + const HWY_FULL(float) d; + const hwy::HWY_NAMESPACE::Rebind du; + + // Unpoison accessing partially-uninitialized vectors with memory sanitizer. + // This is because we run NearestInt() on the vector, which triggers msan even + // it it safe to do so since the values are not mixed between lanes. + const size_t num_round_up = RoundUpTo(num, Lanes(d)); + msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num)); + + const auto one = Set(d, 1.0f); + const auto scale = Set(d, mul); + for (size_t x = 0; x < num; x += Lanes(d)) { + auto v = Load(d, in + x); + // Clamp turns NaN to 'min'. + v = Clamp(v, Zero(d), one); + auto i = NearestInt(Mul(v, scale)); + Store(BitCast(du, i), du, out + x); + } + + // Poison back the output. + msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num)); +} + +void FloatToF16(const float* in, hwy::float16_t* out, size_t num) { + const HWY_FULL(float) d; + const hwy::HWY_NAMESPACE::Rebind du; + + // Unpoison accessing partially-uninitialized vectors with memory sanitizer. + // This is because we run DemoteTo() on the vector which triggers msan. + const size_t num_round_up = RoundUpTo(num, Lanes(d)); + msan::UnpoisonMemory(in + num, sizeof(in[0]) * (num_round_up - num)); + + for (size_t x = 0; x < num; x += Lanes(d)) { + auto v = Load(d, in + x); + auto v16 = DemoteTo(du, v); + Store(v16, du, out + x); + } + + // Poison back the output. + msan::PoisonMemory(out + num, sizeof(out[0]) * (num_round_up - num)); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jxl { +namespace { + +// Stores a float in big endian +void StoreBEFloat(float value, uint8_t* p) { + uint32_t u; + memcpy(&u, &value, 4); + StoreBE32(u, p); +} + +// Stores a float in little endian +void StoreLEFloat(float value, uint8_t* p) { + uint32_t u; + memcpy(&u, &value, 4); + StoreLE32(u, p); +} + +// The orientation may not be identity. +// TODO(lode): SIMDify where possible +template +Status UndoOrientation(jxl::Orientation undo_orientation, const Plane& image, + Plane& out, jxl::ThreadPool* pool) { + const size_t xsize = image.xsize(); + const size_t ysize = image.ysize(); + + if (undo_orientation == Orientation::kFlipHorizontal) { + out = Plane(xsize, ysize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + T* JXL_RESTRICT row_out = out.Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[xsize - x - 1] = row_in[x]; + } + }, + "UndoOrientation")); + } else if (undo_orientation == Orientation::kRotate180) { + out = Plane(xsize, ysize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + T* JXL_RESTRICT row_out = out.Row(ysize - y - 1); + for (size_t x = 0; x < xsize; ++x) { + row_out[xsize - x - 1] = row_in[x]; + } + }, + "UndoOrientation")); + } else if (undo_orientation == Orientation::kFlipVertical) { + out = Plane(xsize, ysize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + T* JXL_RESTRICT row_out = out.Row(ysize - y - 1); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row_in[x]; + } + }, + "UndoOrientation")); + } else if (undo_orientation == Orientation::kTranspose) { + out = Plane(ysize, xsize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + for (size_t x = 0; x < xsize; ++x) { + out.Row(x)[y] = row_in[x]; + } + }, + "UndoOrientation")); + } else if (undo_orientation == Orientation::kRotate90) { + out = Plane(ysize, xsize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + for (size_t x = 0; x < xsize; ++x) { + out.Row(x)[ysize - y - 1] = row_in[x]; + } + }, + "UndoOrientation")); + } else if (undo_orientation == Orientation::kAntiTranspose) { + out = Plane(ysize, xsize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + for (size_t x = 0; x < xsize; ++x) { + out.Row(xsize - x - 1)[ysize - y - 1] = row_in[x]; + } + }, + "UndoOrientation")); + } else if (undo_orientation == Orientation::kRotate270) { + out = Plane(ysize, xsize); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const int64_t y = task; + const T* JXL_RESTRICT row_in = image.Row(y); + for (size_t x = 0; x < xsize; ++x) { + out.Row(xsize - x - 1)[y] = row_in[x]; + } + }, + "UndoOrientation")); + } + return true; +} +} // namespace + +HWY_EXPORT(FloatToU32); +HWY_EXPORT(FloatToF16); + +namespace { + +using StoreFuncType = void(uint32_t value, uint8_t* dest); +template +void StoreUintRow(uint32_t* JXL_RESTRICT* rows_u32, size_t num_channels, + size_t xsize, size_t bytes_per_sample, + uint8_t* JXL_RESTRICT out) { + for (size_t x = 0; x < xsize; ++x) { + for (size_t c = 0; c < num_channels; c++) { + StoreFunc(rows_u32[c][x], + out + (num_channels * x + c) * bytes_per_sample); + } + } +} + +template +void StoreFloatRow(const float* JXL_RESTRICT* rows_in, size_t num_channels, + size_t xsize, uint8_t* JXL_RESTRICT out) { + for (size_t x = 0; x < xsize; ++x) { + for (size_t c = 0; c < num_channels; c++) { + StoreFunc(rows_in[c][x], out + (num_channels * x + c) * sizeof(float)); + } + } +} + +void JXL_INLINE Store8(uint32_t value, uint8_t* dest) { *dest = value & 0xff; } + +} // namespace + +Status ConvertChannelsToExternal(const ImageF* channels[], size_t num_channels, + size_t bits_per_sample, bool float_out, + JxlEndianness endianness, size_t stride, + jxl::ThreadPool* pool, void* out_image, + size_t out_size, + const PixelCallback& out_callback, + jxl::Orientation undo_orientation) { + JXL_DASSERT(num_channels != 0 && num_channels <= kConvertMaxChannels); + JXL_DASSERT(channels[0] != nullptr); + JXL_CHECK(float_out ? bits_per_sample == 16 || bits_per_sample == 32 + : bits_per_sample > 0 && bits_per_sample <= 16); + if (!!out_image == out_callback.IsPresent()) { + return JXL_FAILURE( + "Must provide either an out_image or an out_callback, but not both."); + } + + const size_t bytes_per_channel = DivCeil(bits_per_sample, jxl::kBitsPerByte); + const size_t bytes_per_pixel = num_channels * bytes_per_channel; + + std::vector> row_out_callback; + const auto FreeCallbackOpaque = [&out_callback](void* p) { + out_callback.destroy(p); + }; + std::unique_ptr out_run_opaque( + nullptr, FreeCallbackOpaque); + auto InitOutCallback = [&](size_t num_threads) -> Status { + if (out_callback.IsPresent()) { + out_run_opaque.reset(out_callback.Init(num_threads, stride)); + JXL_RETURN_IF_ERROR(out_run_opaque != nullptr); + row_out_callback.resize(num_threads); + for (size_t i = 0; i < num_threads; ++i) { + row_out_callback[i].resize(stride); + } + } + return true; + }; + + // Channels used to store the transformed original channels if needed. + ImageF temp_channels[kConvertMaxChannels]; + if (undo_orientation != Orientation::kIdentity) { + for (size_t c = 0; c < num_channels; ++c) { + if (channels[c]) { + JXL_RETURN_IF_ERROR(UndoOrientation(undo_orientation, *channels[c], + temp_channels[c], pool)); + channels[c] = &(temp_channels[c]); + } + } + } + + // First channel may not be nullptr. + size_t xsize = channels[0]->xsize(); + size_t ysize = channels[0]->ysize(); + if (stride < bytes_per_pixel * xsize) { + return JXL_FAILURE("stride is smaller than scanline width in bytes: %" PRIuS + " vs %" PRIuS, + stride, bytes_per_pixel * xsize); + } + if (!out_callback.IsPresent() && + out_size < (ysize - 1) * stride + bytes_per_pixel * xsize) { + return JXL_FAILURE("out_size is too small to store image"); + } + + const bool little_endian = + endianness == JXL_LITTLE_ENDIAN || + (endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()); + + // Handle the case where a channel is nullptr by creating a single row with + // ones to use instead. + ImageF ones; + for (size_t c = 0; c < num_channels; ++c) { + if (!channels[c]) { + ones = ImageF(xsize, 1); + FillImage(1.0f, &ones); + break; + } + } + + if (float_out) { + if (bits_per_sample == 16) { + bool swap_endianness = little_endian != IsLittleEndian(); + Plane f16_cache; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), + [&](size_t num_threads) { + f16_cache = + Plane(xsize, num_channels * num_threads); + return InitOutCallback(num_threads); + }, + [&](const uint32_t task, const size_t thread) { + const int64_t y = task; + const float* JXL_RESTRICT row_in[kConvertMaxChannels]; + for (size_t c = 0; c < num_channels; c++) { + row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); + } + hwy::float16_t* JXL_RESTRICT row_f16[kConvertMaxChannels]; + for (size_t c = 0; c < num_channels; c++) { + row_f16[c] = f16_cache.Row(c + thread * num_channels); + HWY_DYNAMIC_DISPATCH(FloatToF16) + (row_in[c], row_f16[c], xsize); + } + uint8_t* row_out = + out_callback.IsPresent() + ? row_out_callback[thread].data() + : &(reinterpret_cast(out_image))[stride * y]; + // interleave the one scanline + hwy::float16_t* row_f16_out = + reinterpret_cast(row_out); + for (size_t x = 0; x < xsize; x++) { + for (size_t c = 0; c < num_channels; c++) { + row_f16_out[x * num_channels + c] = row_f16[c][x]; + } + } + if (swap_endianness) { + size_t size = xsize * num_channels * 2; + for (size_t i = 0; i < size; i += 2) { + std::swap(row_out[i + 0], row_out[i + 1]); + } + } + if (out_callback.IsPresent()) { + out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, + row_out); + } + }, + "ConvertF16")); + } else if (bits_per_sample == 32) { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), + [&](size_t num_threads) { return InitOutCallback(num_threads); }, + [&](const uint32_t task, const size_t thread) { + const int64_t y = task; + uint8_t* row_out = + out_callback.IsPresent() + ? row_out_callback[thread].data() + : &(reinterpret_cast(out_image))[stride * y]; + const float* JXL_RESTRICT row_in[kConvertMaxChannels]; + for (size_t c = 0; c < num_channels; c++) { + row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); + } + if (little_endian) { + StoreFloatRow(row_in, num_channels, xsize, row_out); + } else { + StoreFloatRow(row_in, num_channels, xsize, row_out); + } + if (out_callback.IsPresent()) { + out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, + row_out); + } + }, + "ConvertFloat")); + } else { + return JXL_FAILURE("float other than 16-bit and 32-bit not supported"); + } + } else { + // Multiplier to convert from floating point 0-1 range to the integer + // range. + float mul = (1ull << bits_per_sample) - 1; + Plane u32_cache; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, static_cast(ysize), + [&](size_t num_threads) { + u32_cache = Plane(xsize, num_channels * num_threads); + return InitOutCallback(num_threads); + }, + [&](const uint32_t task, const size_t thread) { + const int64_t y = task; + uint8_t* row_out = + out_callback.IsPresent() + ? row_out_callback[thread].data() + : &(reinterpret_cast(out_image))[stride * y]; + const float* JXL_RESTRICT row_in[kConvertMaxChannels]; + for (size_t c = 0; c < num_channels; c++) { + row_in[c] = channels[c] ? channels[c]->Row(y) : ones.Row(0); + } + uint32_t* JXL_RESTRICT row_u32[kConvertMaxChannels]; + for (size_t c = 0; c < num_channels; c++) { + row_u32[c] = u32_cache.Row(c + thread * num_channels); + // row_u32[] is a per-thread temporary row storage, this isn't + // intended to be initialized on a previous run. + msan::PoisonMemory(row_u32[c], xsize * sizeof(row_u32[c][0])); + HWY_DYNAMIC_DISPATCH(FloatToU32) + (row_in[c], row_u32[c], xsize, mul, bits_per_sample); + } + if (bits_per_sample <= 8) { + StoreUintRow(row_u32, num_channels, xsize, 1, row_out); + } else { + if (little_endian) { + StoreUintRow(row_u32, num_channels, xsize, 2, row_out); + } else { + StoreUintRow(row_u32, num_channels, xsize, 2, row_out); + } + } + if (out_callback.IsPresent()) { + out_callback.run(out_run_opaque.get(), thread, 0, y, xsize, + row_out); + } + }, + "ConvertUint")); + } + return true; +} + +Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample, + bool float_out, size_t num_channels, + JxlEndianness endianness, size_t stride, + jxl::ThreadPool* pool, void* out_image, + size_t out_size, const PixelCallback& out_callback, + jxl::Orientation undo_orientation, + bool unpremul_alpha) { + bool want_alpha = num_channels == 2 || num_channels == 4; + size_t color_channels = num_channels <= 2 ? 1 : 3; + + const Image3F* color = &ib.color(); + // Undo premultiplied alpha. + Image3F unpremul; + if (ib.AlphaIsPremultiplied() && ib.HasAlpha() && unpremul_alpha) { + unpremul = Image3F(color->xsize(), color->ysize()); + CopyImageTo(*color, &unpremul); + for (size_t y = 0; y < unpremul.ysize(); y++) { + UnpremultiplyAlpha(unpremul.PlaneRow(0, y), unpremul.PlaneRow(1, y), + unpremul.PlaneRow(2, y), ib.alpha().Row(y), + unpremul.xsize()); + } + color = &unpremul; + } + + const ImageF* channels[kConvertMaxChannels]; + size_t c = 0; + for (; c < color_channels; c++) { + channels[c] = &color->Plane(c); + } + if (want_alpha) { + channels[c++] = ib.HasAlpha() ? &ib.alpha() : nullptr; + } + JXL_ASSERT(num_channels == c); + + return ConvertChannelsToExternal( + channels, num_channels, bits_per_sample, float_out, endianness, stride, + pool, out_image, out_size, out_callback, undo_orientation); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_external_image.h b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.h new file mode 100644 index 0000000000..7ca7cfd0e7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_external_image.h @@ -0,0 +1,66 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_EXTERNAL_IMAGE_H_ +#define LIB_JXL_DEC_EXTERNAL_IMAGE_H_ + +// Interleaved image for color transforms and Codec. + +#include +#include +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +// Maximum number of channels for the ConvertChannelsToExternal function. +const size_t kConvertMaxChannels = 4; + +// Converts a list of channels to an interleaved image, applying transformations +// when needed. +// The input channels are given as a (non-const!) array of channel pointers and +// interleaved in that order. +// +// Note: if a pointer in channels[] is nullptr, a 1.0 value will be used +// instead. This is useful for handling when a user requests an alpha channel +// from an image that doesn't have one. The first channel in the list may not +// be nullptr, since it is used to determine the image size. +Status ConvertChannelsToExternal(const ImageF* channels[], size_t num_channels, + size_t bits_per_sample, bool float_out, + JxlEndianness endianness, size_t stride, + jxl::ThreadPool* pool, void* out_image, + size_t out_size, + const PixelCallback& out_callback, + jxl::Orientation undo_orientation); + +// Converts ib to interleaved void* pixel buffer with the given format. +// bits_per_sample: must be 16 or 32 if float_out is true, and at most 16 +// if it is false. No bit packing is done. +// num_channels: must be 1, 2, 3 or 4 for gray, gray+alpha, RGB, RGB+alpha. +// This supports the features needed for the C API and does not perform +// color space conversion. +// TODO(lode): support rectangle crop. +// stride_out is output scanline size in bytes, must be >= +// output_xsize * output_bytes_per_pixel. +// undo_orientation is an EXIF orientation to undo. Depending on the +// orientation, the output xsize and ysize are swapped compared to input +// xsize and ysize. +Status ConvertToExternal(const jxl::ImageBundle& ib, size_t bits_per_sample, + bool float_out, size_t num_channels, + JxlEndianness endianness, size_t stride_out, + jxl::ThreadPool* thread_pool, void* out_image, + size_t out_size, const PixelCallback& out_callback, + jxl::Orientation undo_orientation, + bool unpremul_alpha = false); + +} // namespace jxl + +#endif // LIB_JXL_DEC_EXTERNAL_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_external_image_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/dec_external_image_gbench.cc new file mode 100644 index 0000000000..c87a4d5f36 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_external_image_gbench.cc @@ -0,0 +1,56 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "benchmark/benchmark.h" +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { +namespace { + +// Decoder case, interleaves an internal float image. +void BM_DecExternalImage_ConvertImageRGBA(benchmark::State& state) { + const size_t kNumIter = 5; + size_t xsize = state.range(); + size_t ysize = state.range(); + size_t num_channels = 4; + + ImageMetadata im; + im.SetAlphaBits(8); + ImageBundle ib(&im); + Image3F color(xsize, ysize); + ZeroFillImage(&color); + ib.SetFromImage(std::move(color), ColorEncoding::SRGB()); + ImageF alpha(xsize, ysize); + ZeroFillImage(&alpha); + ib.SetAlpha(std::move(alpha)); + + const size_t bytes_per_row = xsize * num_channels; + std::vector interleaved(bytes_per_row * ysize); + + for (auto _ : state) { + for (size_t i = 0; i < kNumIter; ++i) { + JXL_CHECK(ConvertToExternal( + ib, + /*bits_per_sample=*/8, + /*float_out=*/false, num_channels, JXL_NATIVE_ENDIAN, + /*stride*/ bytes_per_row, + /*thread_pool=*/nullptr, interleaved.data(), interleaved.size(), + /*out_callback=*/{}, + /*undo_orientation=*/jxl::Orientation::kIdentity)); + } + } + + // Pixels per second. + state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize); + state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size()); +} + +BENCHMARK(BM_DecExternalImage_ConvertImageRGBA) + ->RangeMultiplier(2) + ->Range(256, 2048); + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_frame.cc b/third-party/libjxl/libjxl/lib/jxl/dec_frame.cc new file mode 100644 index 0000000000..82458de1ec --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_frame.cc @@ -0,0 +1,871 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_frame.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/compressed_dc.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/dec_group.h" +#include "lib/jxl/dec_modular.h" +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/epf.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/jpeg/jpeg_data.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/passes_state.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/splines.h" +#include "lib/jxl/toc.h" + +namespace jxl { + +namespace { +Status DecodeGlobalDCInfo(BitReader* reader, bool is_jpeg, + PassesDecoderState* state, ThreadPool* pool) { + JXL_RETURN_IF_ERROR(state->shared_storage.quantizer.Decode(reader)); + + JXL_RETURN_IF_ERROR( + DecodeBlockCtxMap(reader, &state->shared_storage.block_ctx_map)); + + JXL_RETURN_IF_ERROR(state->shared_storage.cmap.DecodeDC(reader)); + + // Pre-compute info for decoding a group. + if (is_jpeg) { + state->shared_storage.quantizer.ClearDCMul(); // Don't dequant DC + } + + state->shared_storage.ac_strategy.FillInvalid(); + return true; +} +} // namespace + +Status DecodeFrame(PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool, + const uint8_t* next_in, size_t avail_in, + ImageBundle* decoded, const CodecMetadata& metadata, + bool use_slow_rendering_pipeline) { + FrameDecoder frame_decoder(dec_state, metadata, pool, + use_slow_rendering_pipeline); + + BitReader reader(Span(next_in, avail_in)); + JXL_RETURN_IF_ERROR(frame_decoder.InitFrame(&reader, decoded, + /*is_preview=*/false)); + JXL_RETURN_IF_ERROR(frame_decoder.InitFrameOutput()); + + JXL_RETURN_IF_ERROR(reader.AllReadsWithinBounds()); + size_t header_bytes = reader.TotalBitsConsumed() / kBitsPerByte; + JXL_RETURN_IF_ERROR(reader.Close()); + + size_t processed_bytes = header_bytes; + Status close_ok = true; + std::vector> section_readers; + { + std::vector> section_closers; + std::vector section_info; + std::vector section_status; + size_t pos = header_bytes; + size_t index = 0; + for (auto toc_entry : frame_decoder.Toc()) { + JXL_RETURN_IF_ERROR(pos + toc_entry.size <= avail_in); + auto br = make_unique( + Span(next_in + pos, toc_entry.size)); + section_info.emplace_back( + FrameDecoder::SectionInfo{br.get(), toc_entry.id, index++}); + section_closers.emplace_back( + make_unique(br.get(), &close_ok)); + section_readers.emplace_back(std::move(br)); + pos += toc_entry.size; + } + section_status.resize(section_info.size()); + JXL_RETURN_IF_ERROR(frame_decoder.ProcessSections( + section_info.data(), section_info.size(), section_status.data())); + for (size_t i = 0; i < section_status.size(); i++) { + JXL_RETURN_IF_ERROR(section_status[i] == FrameDecoder::kDone); + processed_bytes += frame_decoder.Toc()[i].size; + } + } + JXL_RETURN_IF_ERROR(close_ok); + JXL_RETURN_IF_ERROR(frame_decoder.FinalizeFrame()); + decoded->SetDecodedBytes(processed_bytes); + return true; +} + +Status FrameDecoder::InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded, + bool is_preview) { + decoded_ = decoded; + JXL_ASSERT(is_finalized_); + + // Reset the dequantization matrices to their default values. + dec_state_->shared_storage.matrices = DequantMatrices(); + + frame_header_.nonserialized_is_preview = is_preview; + JXL_ASSERT(frame_header_.nonserialized_metadata != nullptr); + JXL_RETURN_IF_ERROR(ReadFrameHeader(br, &frame_header_)); + frame_dim_ = frame_header_.ToFrameDimensions(); + JXL_DEBUG_V(2, "FrameHeader: %s", frame_header_.DebugString().c_str()); + + const size_t num_passes = frame_header_.passes.num_passes; + const size_t num_groups = frame_dim_.num_groups; + + // If the previous frame was not a kRegularFrame, `decoded` may have different + // dimensions; must reset to avoid errors. + decoded->RemoveColor(); + decoded->ClearExtraChannels(); + + decoded->duration = frame_header_.animation_frame.duration; + + if (!frame_header_.nonserialized_is_preview && + (frame_header_.is_last || frame_header_.animation_frame.duration > 0) && + (frame_header_.frame_type == kRegularFrame || + frame_header_.frame_type == kSkipProgressive)) { + ++dec_state_->visible_frame_index; + dec_state_->nonvisible_frame_index = 0; + } else { + ++dec_state_->nonvisible_frame_index; + } + + // Read TOC. + const bool has_ac_global = true; + const size_t toc_entries = NumTocEntries(num_groups, frame_dim_.num_dc_groups, + num_passes, has_ac_global); + std::vector sizes; + std::vector permutation; + JXL_RETURN_IF_ERROR(ReadToc(toc_entries, br, &sizes, &permutation)); + bool have_permutation = !permutation.empty(); + toc_.resize(toc_entries); + section_sizes_sum_ = 0; + for (size_t i = 0; i < toc_entries; ++i) { + toc_[i].size = sizes[i]; + size_t index = have_permutation ? permutation[i] : i; + toc_[index].id = i; + if (section_sizes_sum_ + toc_[i].size < section_sizes_sum_) { + return JXL_FAILURE("group offset overflow"); + } + section_sizes_sum_ += toc_[i].size; + } + + JXL_DASSERT((br->TotalBitsConsumed() % kBitsPerByte) == 0); + const size_t group_codes_begin = br->TotalBitsConsumed() / kBitsPerByte; + JXL_DASSERT(!toc_.empty()); + + // Overflow check. + if (group_codes_begin + section_sizes_sum_ < group_codes_begin) { + return JXL_FAILURE("Invalid group codes"); + } + + if (!frame_header_.chroma_subsampling.Is444() && + !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) && + frame_header_.encoding == FrameEncoding::kVarDCT) { + return JXL_FAILURE( + "Non-444 chroma subsampling is not allowed when adaptive DC " + "smoothing is enabled"); + } + return true; +} + +Status FrameDecoder::InitFrameOutput() { + JXL_RETURN_IF_ERROR( + InitializePassesSharedState(frame_header_, &dec_state_->shared_storage)); + JXL_RETURN_IF_ERROR(dec_state_->Init()); + modular_frame_decoder_.Init(frame_dim_); + + if (decoded_->IsJPEG()) { + if (frame_header_.encoding == FrameEncoding::kModular) { + return JXL_FAILURE("Cannot output JPEG from Modular"); + } + jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get(); + size_t num_components = jpeg_data->components.size(); + if (num_components != 1 && num_components != 3) { + return JXL_FAILURE("Invalid number of components"); + } + if (frame_header_.nonserialized_metadata->m.xyb_encoded) { + return JXL_FAILURE("Cannot decode to JPEG an XYB image"); + } + auto jpeg_c_map = JpegOrder(ColorTransform::kYCbCr, num_components == 1); + decoded_->jpeg_data->width = frame_dim_.xsize; + decoded_->jpeg_data->height = frame_dim_.ysize; + for (size_t c = 0; c < num_components; c++) { + auto& component = jpeg_data->components[jpeg_c_map[c]]; + component.width_in_blocks = + frame_dim_.xsize_blocks >> frame_header_.chroma_subsampling.HShift(c); + component.height_in_blocks = + frame_dim_.ysize_blocks >> frame_header_.chroma_subsampling.VShift(c); + component.h_samp_factor = + 1 << frame_header_.chroma_subsampling.RawHShift(c); + component.v_samp_factor = + 1 << frame_header_.chroma_subsampling.RawVShift(c); + component.coeffs.resize(component.width_in_blocks * + component.height_in_blocks * jxl::kDCTBlockSize); + } + } + + // Clear the state. + decoded_dc_global_ = false; + decoded_ac_global_ = false; + is_finalized_ = false; + finalized_dc_ = false; + num_sections_done_ = 0; + decoded_dc_groups_.clear(); + decoded_dc_groups_.resize(frame_dim_.num_dc_groups); + decoded_passes_per_ac_group_.clear(); + decoded_passes_per_ac_group_.resize(frame_dim_.num_groups, 0); + processed_section_.clear(); + processed_section_.resize(toc_.size()); + allocated_ = false; + return true; +} + +Status FrameDecoder::ProcessDCGlobal(BitReader* br) { + PassesSharedState& shared = dec_state_->shared_storage; + if (shared.frame_header.flags & FrameHeader::kPatches) { + bool uses_extra_channels = false; + JXL_RETURN_IF_ERROR(shared.image_features.patches.Decode( + br, frame_dim_.xsize_padded, frame_dim_.ysize_padded, + &uses_extra_channels)); + if (uses_extra_channels && frame_header_.upsampling != 1) { + for (size_t ecups : frame_header_.extra_channel_upsampling) { + if (ecups != frame_header_.upsampling) { + return JXL_FAILURE( + "Cannot use extra channels in patches if color channels are " + "subsampled differently from extra channels"); + } + } + } + } else { + shared.image_features.patches.Clear(); + } + shared.image_features.splines.Clear(); + if (shared.frame_header.flags & FrameHeader::kSplines) { + JXL_RETURN_IF_ERROR(shared.image_features.splines.Decode( + br, frame_dim_.xsize * frame_dim_.ysize)); + } + if (shared.frame_header.flags & FrameHeader::kNoise) { + JXL_RETURN_IF_ERROR(DecodeNoise(br, &shared.image_features.noise_params)); + } + JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.DecodeDC(br)); + + if (frame_header_.encoding == FrameEncoding::kVarDCT) { + JXL_RETURN_IF_ERROR( + jxl::DecodeGlobalDCInfo(br, decoded_->IsJPEG(), dec_state_, pool_)); + } + // Splines' draw cache uses the color correlation map. + if (shared.frame_header.flags & FrameHeader::kSplines) { + JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache( + frame_dim_.xsize_upsampled, frame_dim_.ysize_upsampled, + dec_state_->shared->cmap)); + } + Status dec_status = modular_frame_decoder_.DecodeGlobalInfo( + br, frame_header_, /*allow_truncated_group=*/false); + if (dec_status.IsFatalError()) return dec_status; + if (dec_status) { + decoded_dc_global_ = true; + } + return dec_status; +} + +Status FrameDecoder::ProcessDCGroup(size_t dc_group_id, BitReader* br) { + const size_t gx = dc_group_id % frame_dim_.xsize_dc_groups; + const size_t gy = dc_group_id / frame_dim_.xsize_dc_groups; + const LoopFilter& lf = dec_state_->shared->frame_header.loop_filter; + if (frame_header_.encoding == FrameEncoding::kVarDCT && + !(frame_header_.flags & FrameHeader::kUseDcFrame)) { + JXL_RETURN_IF_ERROR( + modular_frame_decoder_.DecodeVarDCTDC(dc_group_id, br, dec_state_)); + } + const Rect mrect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim, + frame_dim_.dc_group_dim, frame_dim_.dc_group_dim); + JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup( + mrect, br, 3, 1000, ModularStreamId::ModularDC(dc_group_id), + /*zerofill=*/false, nullptr, nullptr, + /*allow_truncated=*/false)); + if (frame_header_.encoding == FrameEncoding::kVarDCT) { + JXL_RETURN_IF_ERROR( + modular_frame_decoder_.DecodeAcMetadata(dc_group_id, br, dec_state_)); + } else if (lf.epf_iters > 0) { + FillImage(kInvSigmaNum / lf.epf_sigma_for_modular, &dec_state_->sigma); + } + decoded_dc_groups_[dc_group_id] = uint8_t{true}; + return true; +} + +void FrameDecoder::FinalizeDC() { + // Do Adaptive DC smoothing if enabled. This *must* happen between all the + // ProcessDCGroup and ProcessACGroup. + if (frame_header_.encoding == FrameEncoding::kVarDCT && + !(frame_header_.flags & FrameHeader::kSkipAdaptiveDCSmoothing) && + !(frame_header_.flags & FrameHeader::kUseDcFrame)) { + AdaptiveDCSmoothing(dec_state_->shared->quantizer.MulDC(), + &dec_state_->shared_storage.dc_storage, pool_); + } + + finalized_dc_ = true; +} + +Status FrameDecoder::AllocateOutput() { + if (allocated_) return true; + modular_frame_decoder_.MaybeDropFullImage(); + decoded_->origin = dec_state_->shared->frame_header.frame_origin; + JXL_RETURN_IF_ERROR(dec_state_->InitForAC(nullptr)); + allocated_ = true; + return true; +} + +Status FrameDecoder::ProcessACGlobal(BitReader* br) { + JXL_CHECK(finalized_dc_); + + // Decode AC group. + if (frame_header_.encoding == FrameEncoding::kVarDCT) { + JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.Decode( + br, &modular_frame_decoder_)); + JXL_RETURN_IF_ERROR(dec_state_->shared_storage.matrices.EnsureComputed( + dec_state_->used_acs)); + + size_t num_histo_bits = + CeilLog2Nonzero(dec_state_->shared->frame_dim.num_groups); + dec_state_->shared_storage.num_histograms = + 1 + br->ReadBits(num_histo_bits); + + dec_state_->code.resize(kMaxNumPasses); + dec_state_->context_map.resize(kMaxNumPasses); + // Read coefficient orders and histograms. + size_t max_num_bits_ac = 0; + for (size_t i = 0; + i < dec_state_->shared_storage.frame_header.passes.num_passes; i++) { + uint16_t used_orders = U32Coder::Read(kOrderEnc, br); + JXL_RETURN_IF_ERROR(DecodeCoeffOrders( + used_orders, dec_state_->used_acs, + &dec_state_->shared_storage + .coeff_orders[i * dec_state_->shared_storage.coeff_order_size], + br)); + size_t num_contexts = + dec_state_->shared->num_histograms * + dec_state_->shared_storage.block_ctx_map.NumACContexts(); + JXL_RETURN_IF_ERROR(DecodeHistograms( + br, num_contexts, &dec_state_->code[i], &dec_state_->context_map[i])); + // Add extra values to enable the cheat in hot loop of DecodeACVarBlock. + dec_state_->context_map[i].resize( + num_contexts + kZeroDensityContextLimit - kZeroDensityContextCount); + max_num_bits_ac = + std::max(max_num_bits_ac, dec_state_->code[i].max_num_bits); + } + max_num_bits_ac += CeilLog2Nonzero( + dec_state_->shared_storage.frame_header.passes.num_passes); + // 16-bit buffer for decoding to JPEG are not implemented. + // TODO(veluca): figure out the exact limit - 16 should still work with + // 16-bit buffers, but we are excluding it for safety. + bool use_16_bit = max_num_bits_ac < 16 && !decoded_->IsJPEG(); + bool store = frame_header_.passes.num_passes > 1; + size_t xs = store ? kGroupDim * kGroupDim : 0; + size_t ys = store ? frame_dim_.num_groups : 0; + if (use_16_bit) { + dec_state_->coefficients = make_unique>(xs, ys); + } else { + dec_state_->coefficients = make_unique>(xs, ys); + } + if (store) { + dec_state_->coefficients->ZeroFill(); + } + } + + // Set JPEG decoding data. + if (decoded_->IsJPEG()) { + decoded_->color_transform = frame_header_.color_transform; + decoded_->chroma_subsampling = frame_header_.chroma_subsampling; + const std::vector& qe = + dec_state_->shared_storage.matrices.encodings(); + if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW || + std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) { + return JXL_FAILURE( + "Quantization table is not a JPEG quantization table."); + } + jpeg::JPEGData* jpeg_data = decoded_->jpeg_data.get(); + size_t num_components = jpeg_data->components.size(); + bool is_gray = (num_components == 1); + auto jpeg_c_map = JpegOrder(frame_header_.color_transform, is_gray); + size_t qt_set = 0; + for (size_t c = 0; c < num_components; c++) { + // TODO(eustas): why 1-st quant table for gray? + size_t quant_c = is_gray ? 1 : c; + size_t qpos = jpeg_data->components[jpeg_c_map[c]].quant_idx; + JXL_CHECK(qpos != jpeg_data->quant.size()); + qt_set |= 1 << qpos; + for (size_t x = 0; x < 8; x++) { + for (size_t y = 0; y < 8; y++) { + jpeg_data->quant[qpos].values[x * 8 + y] = + (*qe[0].qraw.qtable)[quant_c * 64 + y * 8 + x]; + } + } + } + for (size_t i = 0; i < jpeg_data->quant.size(); i++) { + if (qt_set & (1 << i)) continue; + if (i == 0) return JXL_FAILURE("First quant table unused."); + // Unused quant table is set to copy of previous quant table + for (size_t j = 0; j < 64; j++) { + jpeg_data->quant[i].values[j] = jpeg_data->quant[i - 1].values[j]; + } + } + } + decoded_ac_global_ = true; + return true; +} + +Status FrameDecoder::ProcessACGroup(size_t ac_group_id, + BitReader* JXL_RESTRICT* br, + size_t num_passes, size_t thread, + bool force_draw, bool dc_only) { + size_t group_dim = frame_dim_.group_dim; + const size_t gx = ac_group_id % frame_dim_.xsize_groups; + const size_t gy = ac_group_id / frame_dim_.xsize_groups; + const size_t x = gx * group_dim; + const size_t y = gy * group_dim; + JXL_DEBUG_V(3, + "Processing AC group %" PRIuS "(%" PRIuS ",%" PRIuS + ") group_dim: %" PRIuS " decoded passes: %u new passes: %" PRIuS, + ac_group_id, gx, gy, group_dim, + decoded_passes_per_ac_group_[ac_group_id], num_passes); + + RenderPipelineInput render_pipeline_input = + dec_state_->render_pipeline->GetInputBuffers(ac_group_id, thread); + + bool should_run_pipeline = true; + + if (frame_header_.encoding == FrameEncoding::kVarDCT) { + group_dec_caches_[thread].InitOnce(frame_header_.passes.num_passes, + dec_state_->used_acs); + JXL_RETURN_IF_ERROR(DecodeGroup(br, num_passes, ac_group_id, dec_state_, + &group_dec_caches_[thread], thread, + render_pipeline_input, decoded_, + decoded_passes_per_ac_group_[ac_group_id], + force_draw, dc_only, &should_run_pipeline)); + } + + // don't limit to image dimensions here (is done in DecodeGroup) + const Rect mrect(x, y, group_dim, group_dim); + bool modular_ready = false; + size_t pass0 = decoded_passes_per_ac_group_[ac_group_id]; + size_t pass1 = + force_draw ? frame_header_.passes.num_passes : pass0 + num_passes; + for (size_t i = pass0; i < pass1; ++i) { + int minShift, maxShift; + frame_header_.passes.GetDownsamplingBracket(i, minShift, maxShift); + bool modular_pass_ready = true; + if (i < pass0 + num_passes) { + JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup( + mrect, br[i - pass0], minShift, maxShift, + ModularStreamId::ModularAC(ac_group_id, i), + /*zerofill=*/false, dec_state_, &render_pipeline_input, + /*allow_truncated=*/false, &modular_pass_ready)); + } else { + JXL_RETURN_IF_ERROR(modular_frame_decoder_.DecodeGroup( + mrect, nullptr, minShift, maxShift, + ModularStreamId::ModularAC(ac_group_id, i), /*zerofill=*/true, + dec_state_, &render_pipeline_input, + /*allow_truncated=*/false, &modular_pass_ready)); + } + if (modular_pass_ready) modular_ready = true; + } + decoded_passes_per_ac_group_[ac_group_id] += num_passes; + + if ((frame_header_.flags & FrameHeader::kNoise) != 0) { + size_t noise_c_start = + 3 + frame_header_.nonserialized_metadata->m.num_extra_channels; + // When the color channels are downsampled, we need to generate more noise + // input for the current group than just the group dimensions. + std::pair rects[3]; + for (size_t iy = 0; iy < frame_header_.upsampling; iy++) { + for (size_t ix = 0; ix < frame_header_.upsampling; ix++) { + for (size_t c = 0; c < 3; c++) { + auto r = render_pipeline_input.GetBuffer(noise_c_start + c); + rects[c].first = r.first; + size_t x1 = r.second.x0() + r.second.xsize(); + size_t y1 = r.second.y0() + r.second.ysize(); + rects[c].second = Rect(r.second.x0() + ix * group_dim, + r.second.y0() + iy * group_dim, group_dim, + group_dim, x1, y1); + } + Random3Planes(dec_state_->visible_frame_index, + dec_state_->nonvisible_frame_index, + (gx * frame_header_.upsampling + ix) * group_dim, + (gy * frame_header_.upsampling + iy) * group_dim, + rects[0], rects[1], rects[2]); + } + } + } + + if (!modular_frame_decoder_.UsesFullImage() && !decoded_->IsJPEG()) { + if (should_run_pipeline && modular_ready) { + render_pipeline_input.Done(); + } else if (force_draw) { + return JXL_FAILURE("Modular group decoding failed."); + } + } + return true; +} + +void FrameDecoder::MarkSections(const SectionInfo* sections, size_t num, + SectionStatus* section_status) { + num_sections_done_ += num; + for (size_t i = 0; i < num; i++) { + if (section_status[i] != SectionStatus::kDone) { + processed_section_[sections[i].id] = false; + num_sections_done_--; + } + } +} + +Status FrameDecoder::ProcessSections(const SectionInfo* sections, size_t num, + SectionStatus* section_status) { + if (num == 0) return true; // Nothing to process + std::fill(section_status, section_status + num, SectionStatus::kSkipped); + size_t dc_global_sec = num; + size_t ac_global_sec = num; + std::vector dc_group_sec(frame_dim_.num_dc_groups, num); + std::vector> ac_group_sec( + frame_dim_.num_groups, + std::vector(frame_header_.passes.num_passes, num)); + // This keeps track of the number of ac passes we want to process during this + // call of ProcessSections. + std::vector desired_num_ac_passes(frame_dim_.num_groups); + bool single_section = + frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1; + if (single_section) { + JXL_ASSERT(num == 1); + JXL_ASSERT(sections[0].id == 0); + if (processed_section_[0] == false) { + processed_section_[0] = true; + ac_group_sec[0].resize(1); + dc_global_sec = ac_global_sec = dc_group_sec[0] = ac_group_sec[0][0] = 0; + desired_num_ac_passes[0] = 1; + } else { + section_status[0] = SectionStatus::kDuplicate; + } + } else { + size_t ac_global_index = frame_dim_.num_dc_groups + 1; + for (size_t i = 0; i < num; i++) { + JXL_ASSERT(sections[i].id < processed_section_.size()); + if (processed_section_[sections[i].id]) { + section_status[i] = SectionStatus::kDuplicate; + continue; + } + if (sections[i].id == 0) { + dc_global_sec = i; + } else if (sections[i].id < ac_global_index) { + dc_group_sec[sections[i].id - 1] = i; + } else if (sections[i].id == ac_global_index) { + ac_global_sec = i; + } else { + size_t ac_idx = sections[i].id - ac_global_index - 1; + size_t acg = ac_idx % frame_dim_.num_groups; + size_t acp = ac_idx / frame_dim_.num_groups; + if (acp >= frame_header_.passes.num_passes) { + return JXL_FAILURE("Invalid section ID"); + } + ac_group_sec[acg][acp] = i; + } + processed_section_[sections[i].id] = true; + } + // Count number of new passes per group. + for (size_t g = 0; g < ac_group_sec.size(); g++) { + size_t j = 0; + for (; j + decoded_passes_per_ac_group_[g] < + frame_header_.passes.num_passes; + j++) { + if (ac_group_sec[g][j + decoded_passes_per_ac_group_[g]] == num) { + break; + } + } + desired_num_ac_passes[g] = j; + } + } + if (dc_global_sec != num) { + Status dc_global_status = ProcessDCGlobal(sections[dc_global_sec].br); + if (dc_global_status.IsFatalError()) return dc_global_status; + if (dc_global_status) { + section_status[dc_global_sec] = SectionStatus::kDone; + } else { + section_status[dc_global_sec] = SectionStatus::kPartial; + } + } + + std::atomic has_error{false}; + if (decoded_dc_global_) { + JXL_RETURN_IF_ERROR(RunOnPool( + pool_, 0, dc_group_sec.size(), ThreadPool::NoInit, + [this, &dc_group_sec, &num, §ions, §ion_status, &has_error]( + size_t i, size_t thread) { + if (dc_group_sec[i] != num) { + if (!ProcessDCGroup(i, sections[dc_group_sec[i]].br)) { + has_error = true; + } else { + section_status[dc_group_sec[i]] = SectionStatus::kDone; + } + } + }, + "DecodeDCGroup")); + } + if (has_error) return JXL_FAILURE("Error in DC group"); + + if (*std::min_element(decoded_dc_groups_.begin(), decoded_dc_groups_.end()) && + !finalized_dc_) { + PassesDecoderState::PipelineOptions pipeline_options; + pipeline_options.use_slow_render_pipeline = use_slow_rendering_pipeline_; + pipeline_options.coalescing = coalescing_; + pipeline_options.render_spotcolors = render_spotcolors_; + JXL_RETURN_IF_ERROR( + dec_state_->PreparePipeline(decoded_, pipeline_options)); + FinalizeDC(); + JXL_RETURN_IF_ERROR(AllocateOutput()); + if (progressive_detail_ >= JxlProgressiveDetail::kDC) { + MarkSections(sections, num, section_status); + return true; + } + } + + if (finalized_dc_ && ac_global_sec != num && !decoded_ac_global_) { + JXL_RETURN_IF_ERROR(ProcessACGlobal(sections[ac_global_sec].br)); + section_status[ac_global_sec] = SectionStatus::kDone; + } + + if (progressive_detail_ >= JxlProgressiveDetail::kLastPasses) { + // Mark that we only want the next progression pass. + size_t target_complete_passes = NextNumPassesToPause(); + for (size_t i = 0; i < ac_group_sec.size(); i++) { + desired_num_ac_passes[i] = + std::min(desired_num_ac_passes[i], + target_complete_passes - decoded_passes_per_ac_group_[i]); + } + } + + if (decoded_ac_global_) { + // Mark all the AC groups that we received as not complete yet. + for (size_t i = 0; i < ac_group_sec.size(); i++) { + if (desired_num_ac_passes[i] != 0) { + dec_state_->render_pipeline->ClearDone(i); + } + } + + JXL_RETURN_IF_ERROR(RunOnPool( + pool_, 0, ac_group_sec.size(), + [this](size_t num_threads) { + return PrepareStorage(num_threads, + decoded_passes_per_ac_group_.size()); + }, + [this, &ac_group_sec, &desired_num_ac_passes, &num, §ions, + §ion_status, &has_error](size_t g, size_t thread) { + if (desired_num_ac_passes[g] == 0) { + // no new AC pass, nothing to do + return; + } + (void)num; + size_t first_pass = decoded_passes_per_ac_group_[g]; + BitReader* JXL_RESTRICT readers[kMaxNumPasses]; + for (size_t i = 0; i < desired_num_ac_passes[g]; i++) { + JXL_ASSERT(ac_group_sec[g][first_pass + i] != num); + readers[i] = sections[ac_group_sec[g][first_pass + i]].br; + } + if (!ProcessACGroup(g, readers, desired_num_ac_passes[g], + GetStorageLocation(thread, g), + /*force_draw=*/false, /*dc_only=*/false)) { + has_error = true; + } else { + for (size_t i = 0; i < desired_num_ac_passes[g]; i++) { + section_status[ac_group_sec[g][first_pass + i]] = + SectionStatus::kDone; + } + } + }, + "DecodeGroup")); + } + if (has_error) return JXL_FAILURE("Error in AC group"); + + MarkSections(sections, num, section_status); + return true; +} + +Status FrameDecoder::Flush() { + bool has_blending = frame_header_.blending_info.mode != BlendMode::kReplace || + frame_header_.custom_size_or_origin; + for (const auto& blending_info_ec : + frame_header_.extra_channel_blending_info) { + if (blending_info_ec.mode != BlendMode::kReplace) has_blending = true; + } + // No early Flush() if blending is enabled. + if (has_blending && !is_finalized_) { + return false; + } + // No early Flush() - nothing to do - if the frame is a kSkipProgressive + // frame. + if (frame_header_.frame_type == FrameType::kSkipProgressive && + !is_finalized_) { + return true; + } + if (decoded_->IsJPEG()) { + // Nothing to do. + return true; + } + JXL_RETURN_IF_ERROR(AllocateOutput()); + + uint32_t completely_decoded_ac_pass = *std::min_element( + decoded_passes_per_ac_group_.begin(), decoded_passes_per_ac_group_.end()); + if (completely_decoded_ac_pass < frame_header_.passes.num_passes) { + // We don't have all AC yet: force a draw of all the missing areas. + // Mark all sections as not complete. + for (size_t i = 0; i < decoded_passes_per_ac_group_.size(); i++) { + if (decoded_passes_per_ac_group_[i] < frame_header_.passes.num_passes) { + dec_state_->render_pipeline->ClearDone(i); + } + } + std::atomic has_error{false}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool_, 0, decoded_passes_per_ac_group_.size(), + [this](const size_t num_threads) { + return PrepareStorage(num_threads, + decoded_passes_per_ac_group_.size()); + }, + [this, &has_error](const uint32_t g, size_t thread) { + if (decoded_passes_per_ac_group_[g] == + frame_header_.passes.num_passes) { + // This group was drawn already, nothing to do. + return; + } + BitReader* JXL_RESTRICT readers[kMaxNumPasses] = {}; + bool ok = ProcessACGroup( + g, readers, /*num_passes=*/0, GetStorageLocation(thread, g), + /*force_draw=*/true, /*dc_only=*/!decoded_ac_global_); + if (!ok) has_error = true; + }, + "ForceDrawGroup")); + if (has_error) { + return JXL_FAILURE("Drawing groups failed"); + } + } + + // undo global modular transforms and copy int pixel buffers to float ones + JXL_RETURN_IF_ERROR(modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_, + is_finalized_)); + + return true; +} + +int FrameDecoder::SavedAs(const FrameHeader& header) { + if (header.frame_type == FrameType::kDCFrame) { + // bits 16, 32, 64, 128 for DC level + return 16 << (header.dc_level - 1); + } else if (header.CanBeReferenced()) { + // bits 1, 2, 4 and 8 for the references + return 1 << header.save_as_reference; + } + + return 0; +} + +bool FrameDecoder::HasEverything() const { + if (!decoded_dc_global_) return false; + if (!decoded_ac_global_) return false; + for (auto& have_dc_group : decoded_dc_groups_) { + if (!have_dc_group) return false; + } + for (auto& nb_passes : decoded_passes_per_ac_group_) { + if (nb_passes < frame_header_.passes.num_passes) return false; + } + return true; +} + +int FrameDecoder::References() const { + if (is_finalized_) { + return 0; + } + if (!HasEverything()) return 0; + + int result = 0; + + // Blending + if (frame_header_.frame_type == FrameType::kRegularFrame || + frame_header_.frame_type == FrameType::kSkipProgressive) { + bool cropped = frame_header_.custom_size_or_origin; + if (cropped || frame_header_.blending_info.mode != BlendMode::kReplace) { + result |= (1 << frame_header_.blending_info.source); + } + const auto& extra = frame_header_.extra_channel_blending_info; + for (size_t i = 0; i < extra.size(); ++i) { + if (cropped || extra[i].mode != BlendMode::kReplace) { + result |= (1 << extra[i].source); + } + } + } + + // Patches + if (frame_header_.flags & FrameHeader::kPatches) { + result |= dec_state_->shared->image_features.patches.GetReferences(); + } + + // DC Level + if (frame_header_.flags & FrameHeader::kUseDcFrame) { + // Reads from the next dc level + int dc_level = frame_header_.dc_level + 1; + // bits 16, 32, 64, 128 for DC level + result |= (16 << (dc_level - 1)); + } + + return result; +} + +Status FrameDecoder::FinalizeFrame() { + if (is_finalized_) { + return JXL_FAILURE("FinalizeFrame called multiple times"); + } + is_finalized_ = true; + if (decoded_->IsJPEG()) { + // Nothing to do. + return true; + } + + // undo global modular transforms and copy int pixel buffers to float ones + JXL_RETURN_IF_ERROR( + modular_frame_decoder_.FinalizeDecoding(dec_state_, pool_, + /*inplace=*/true)); + + if (frame_header_.CanBeReferenced()) { + auto& info = dec_state_->shared_storage + .reference_frames[frame_header_.save_as_reference]; + info.frame = std::move(dec_state_->frame_storage_for_referencing); + info.ib_is_in_xyb = frame_header_.save_before_color_transform; + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_frame.h b/third-party/libjxl/libjxl/lib/jxl/dec_frame.h new file mode 100644 index 0000000000..6b54ac631f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_frame.h @@ -0,0 +1,329 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_FRAME_H_ +#define LIB_JXL_DEC_FRAME_H_ + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/blending.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/dec_modular.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +// Decodes a frame. Groups may be processed in parallel by `pool`. +// `metadata` is the metadata that applies to all frames of the codestream +// `decoded->metadata` must already be set and must match metadata.m. +// Used in the encoder to model decoder behaviour, and in tests. +Status DecodeFrame(PassesDecoderState* dec_state, ThreadPool* JXL_RESTRICT pool, + const uint8_t* next_in, size_t avail_in, + ImageBundle* decoded, const CodecMetadata& metadata, + bool use_slow_rendering_pipeline = false); + +// TODO(veluca): implement "forced drawing". +class FrameDecoder { + public: + // All parameters must outlive the FrameDecoder. + FrameDecoder(PassesDecoderState* dec_state, const CodecMetadata& metadata, + ThreadPool* pool, bool use_slow_rendering_pipeline) + : dec_state_(dec_state), + pool_(pool), + frame_header_(&metadata), + use_slow_rendering_pipeline_(use_slow_rendering_pipeline) {} + + void SetRenderSpotcolors(bool rsc) { render_spotcolors_ = rsc; } + void SetCoalescing(bool c) { coalescing_ = c; } + + // Read FrameHeader and table of contents from the given BitReader. + Status InitFrame(BitReader* JXL_RESTRICT br, ImageBundle* decoded, + bool is_preview); + + // Checks frame dimensions for their limits, and sets the output + // image buffer. + Status InitFrameOutput(); + + struct SectionInfo { + BitReader* JXL_RESTRICT br; + // Logical index of the section, regardless of any permutation that may be + // applied in the table of contents or of the physical position in the file. + size_t id; + // Index of the section in the order of the bytes inside the frame. + size_t index; + }; + + struct TocEntry { + size_t size; + size_t id; + }; + + enum SectionStatus { + // Processed correctly. + kDone = 0, + // Skipped because other required sections were not yet processed. + kSkipped = 1, + // Skipped because the section was already processed. + kDuplicate = 2, + // Only partially decoded: the section will need to be processed again. + kPartial = 3, + }; + + // Processes `num` sections; each SectionInfo contains the index + // of the section and a BitReader that only contains the data of the section. + // `section_status` should point to `num` elements, and will be filled with + // information about whether each section was processed or not. + // A section is a part of the encoded file that is indexed by the TOC. + Status ProcessSections(const SectionInfo* sections, size_t num, + SectionStatus* section_status); + + // Flushes all the data decoded so far to pixels. + Status Flush(); + + // Runs final operations once a frame data is decoded. + // Must be called exactly once per frame, after all calls to ProcessSections. + Status FinalizeFrame(); + + // Returns dependencies of this frame on reference ids as a bit mask: bits 0-3 + // indicate reference frame 0-3 for patches and blending, bits 4-7 indicate DC + // frames this frame depends on. Only returns a valid result after all calls + // to ProcessSections are finished and before FinalizeFrame. + int References() const; + + // Returns reference id of storage location where this frame is stored as a + // bit flag, or 0 if not stored. + // Matches the bit mask used for GetReferences: bits 0-3 indicate it is stored + // for patching or blending, bits 4-7 indicate DC frame. + // Unlike References, can be ran at any time as + // soon as the frame header is known. + static int SavedAs(const FrameHeader& header); + + uint64_t SumSectionSizes() const { return section_sizes_sum_; } + const std::vector& Toc() const { return toc_; } + + const FrameHeader& GetFrameHeader() const { return frame_header_; } + + // Returns whether a DC image has been decoded, accessible at low resolution + // at passes.shared_storage.dc_storage + bool HasDecodedDC() const { return finalized_dc_; } + bool HasDecodedAll() const { return toc_.size() == num_sections_done_; } + + size_t NumCompletePasses() const { + return *std::min_element(decoded_passes_per_ac_group_.begin(), + decoded_passes_per_ac_group_.end()); + } + + // If enabled, ProcessSections will stop and return true when the DC + // sections have been processed, instead of starting the AC sections. This + // will only occur if supported (that is, flushing will produce a valid + // 1/8th*1/8th resolution image). The return value of true then does not mean + // all sections have been processed, use HasDecodedDC and HasDecodedAll + // to check the true finished state. + // Returns the progressive detail that will be effective for the frame. + JxlProgressiveDetail SetPauseAtProgressive(JxlProgressiveDetail prog_detail) { + bool single_section = + frame_dim_.num_groups == 1 && frame_header_.passes.num_passes == 1; + if (frame_header_.frame_type != kSkipProgressive && + // If there's only one group and one pass, there is no separate section + // for DC and the entire full resolution image is available at once. + !single_section && + // If extra channels are encoded with modular without squeeze, they + // don't support DC. If the are encoded with squeeze, DC works in theory + // but the implementation may not yet correctly support this for Flush. + // Therefore, can't correctly pause for a progressive step if there is + // an extra channel (including alpha channel) + // TODO(firsching): Check if this is still the case. + decoded_->metadata()->extra_channel_info.empty() && + // DC is not guaranteed to be available in modular mode and may be a + // black image. If squeeze is used, it may be available depending on the + // current implementation. + // TODO(lode): do return DC if it's known that flushing at this point + // will produce a valid 1/8th downscaled image with modular encoding. + frame_header_.encoding == FrameEncoding::kVarDCT) { + progressive_detail_ = prog_detail; + } else { + progressive_detail_ = JxlProgressiveDetail::kFrames; + } + if (progressive_detail_ >= JxlProgressiveDetail::kPasses) { + for (size_t i = 1; i < frame_header_.passes.num_passes; ++i) { + passes_to_pause_.push_back(i); + } + } else if (progressive_detail_ >= JxlProgressiveDetail::kLastPasses) { + for (size_t i = 0; i < frame_header_.passes.num_downsample; ++i) { + passes_to_pause_.push_back(frame_header_.passes.last_pass[i] + 1); + } + // The format does not guarantee that these values are sorted. + std::sort(passes_to_pause_.begin(), passes_to_pause_.end()); + } + return progressive_detail_; + } + + size_t NextNumPassesToPause() const { + auto it = std::upper_bound(passes_to_pause_.begin(), passes_to_pause_.end(), + NumCompletePasses()); + return (it != passes_to_pause_.end() ? *it + : std::numeric_limits::max()); + } + + // Sets the pixel callback or image buffer where the pixels will be decoded. + // + // @param undo_orientation: if true, indicates the frame decoder should apply + // the exif orientation to bring the image to the intended display + // orientation. + void SetImageOutput(const PixelCallback& pixel_callback, void* image_buffer, + size_t image_buffer_size, size_t xsize, size_t ysize, + JxlPixelFormat format, size_t bits_per_sample, + bool unpremul_alpha, bool undo_orientation) const { + dec_state_->width = xsize; + dec_state_->height = ysize; + dec_state_->main_output.format = format; + dec_state_->main_output.bits_per_sample = bits_per_sample; + dec_state_->main_output.callback = pixel_callback; + dec_state_->main_output.buffer = image_buffer; + dec_state_->main_output.buffer_size = image_buffer_size; + dec_state_->main_output.stride = GetStride(xsize, format); + const jxl::ExtraChannelInfo* alpha = + decoded_->metadata()->Find(jxl::ExtraChannel::kAlpha); + if (alpha && alpha->alpha_associated && unpremul_alpha) { + dec_state_->unpremul_alpha = true; + } + if (undo_orientation) { + dec_state_->undo_orientation = decoded_->metadata()->GetOrientation(); + if (static_cast(dec_state_->undo_orientation) > 4) { + std::swap(dec_state_->width, dec_state_->height); + } + } + dec_state_->extra_output.clear(); +#if !JXL_HIGH_PRECISION + if (dec_state_->main_output.buffer && + (format.data_type == JXL_TYPE_UINT8) && (format.num_channels >= 3) && + !dec_state_->unpremul_alpha && + (dec_state_->undo_orientation == Orientation::kIdentity) && + decoded_->metadata()->xyb_encoded && + dec_state_->output_encoding_info.color_encoding.IsSRGB() && + dec_state_->output_encoding_info.all_default_opsin && + (dec_state_->output_encoding_info.desired_intensity_target == + dec_state_->output_encoding_info.orig_intensity_target) && + HasFastXYBTosRGB8() && frame_header_.needs_color_transform()) { + dec_state_->fast_xyb_srgb8_conversion = true; + } +#endif + } + + void AddExtraChannelOutput(void* buffer, size_t buffer_size, size_t xsize, + JxlPixelFormat format, size_t bits_per_sample) { + ImageOutput out; + out.format = format; + out.bits_per_sample = bits_per_sample; + out.buffer = buffer; + out.buffer_size = buffer_size; + out.stride = GetStride(xsize, format); + dec_state_->extra_output.push_back(out); + } + + private: + Status ProcessDCGlobal(BitReader* br); + Status ProcessDCGroup(size_t dc_group_id, BitReader* br); + void FinalizeDC(); + Status AllocateOutput(); + Status ProcessACGlobal(BitReader* br); + Status ProcessACGroup(size_t ac_group_id, BitReader* JXL_RESTRICT* br, + size_t num_passes, size_t thread, bool force_draw, + bool dc_only); + void MarkSections(const SectionInfo* sections, size_t num, + SectionStatus* section_status); + + // Allocates storage for parallel decoding using up to `num_threads` threads + // of up to `num_tasks` tasks. The value of `thread` passed to + // `GetStorageLocation` must be smaller than the `num_threads` value passed + // here. The value of `task` passed to `GetStorageLocation` must be smaller + // than the value of `num_tasks` passed here. + Status PrepareStorage(size_t num_threads, size_t num_tasks) { + size_t storage_size = std::min(num_threads, num_tasks); + if (storage_size > group_dec_caches_.size()) { + group_dec_caches_.resize(storage_size); + } + use_task_id_ = num_threads > num_tasks; + bool use_group_ids = (modular_frame_decoder_.UsesFullImage() && + (frame_header_.encoding == FrameEncoding::kVarDCT || + (frame_header_.flags & FrameHeader::kNoise))); + if (dec_state_->render_pipeline) { + JXL_RETURN_IF_ERROR(dec_state_->render_pipeline->PrepareForThreads( + storage_size, use_group_ids)); + } + return true; + } + + size_t GetStorageLocation(size_t thread, size_t task) { + if (use_task_id_) return task; + return thread; + } + + static size_t BytesPerChannel(JxlDataType data_type) { + return (data_type == JXL_TYPE_UINT8 ? 1u + : data_type == JXL_TYPE_FLOAT ? 4u + : 2u); + } + + static size_t GetStride(const size_t xsize, JxlPixelFormat format) { + size_t stride = + (xsize * BytesPerChannel(format.data_type) * format.num_channels); + if (format.align > 1) { + stride = (jxl::DivCeil(stride, format.align) * format.align); + } + return stride; + } + + PassesDecoderState* dec_state_; + ThreadPool* pool_; + std::vector toc_; + uint64_t section_sizes_sum_; + // TODO(veluca): figure out the duplication between these and dec_state_. + FrameHeader frame_header_; + FrameDimensions frame_dim_; + ImageBundle* decoded_; + ModularFrameDecoder modular_frame_decoder_; + bool render_spotcolors_ = true; + bool coalescing_ = true; + + std::vector processed_section_; + std::vector decoded_passes_per_ac_group_; + std::vector decoded_dc_groups_; + bool decoded_dc_global_; + bool decoded_ac_global_; + bool HasEverything() const; + bool finalized_dc_ = true; + size_t num_sections_done_ = 0; + bool is_finalized_ = true; + bool allocated_ = false; + + std::vector group_dec_caches_; + + // Whether or not the task id should be used for storage indexing, instead of + // the thread id. + bool use_task_id_ = false; + + // Testing setting: whether or not to use the slow rendering pipeline. + bool use_slow_rendering_pipeline_; + + JxlProgressiveDetail progressive_detail_ = kFrames; + // Number of completed passes where section decoding should pause. + // Used for progressive details at least kLastPasses. + std::vector passes_to_pause_; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_FRAME_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group.cc b/third-party/libjxl/libjxl/lib/jxl/dec_group.cc new file mode 100644 index 0000000000..37bb3d2dc1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_group.cc @@ -0,0 +1,791 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_group.h" + +#include +#include + +#include +#include +#include + +#include "lib/jxl/frame_header.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dec_group.cc" +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/dec_transforms-inl.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/epf.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer-inl.h" +#include "lib/jxl/quantizer.h" + +#ifndef LIB_JXL_DEC_GROUP_CC +#define LIB_JXL_DEC_GROUP_CC +namespace jxl { + +struct AuxOut; + +// Interface for reading groups for DecodeGroupImpl. +class GetBlock { + public: + virtual void StartRow(size_t by) = 0; + virtual Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, + size_t size, size_t log2_covered_blocks, + ACPtr block[3], ACType ac_type) = 0; + virtual ~GetBlock() {} +}; + +// Controls whether DecodeGroupImpl renders to pixels or not. +enum DrawMode { + // Render to pixels. + kDraw = 0, + // Don't render to pixels. + kDontDraw = 1, +}; + +} // namespace jxl +#endif // LIB_JXL_DEC_GROUP_CC + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftRight; + +using D = HWY_FULL(float); +using DU = HWY_FULL(uint32_t); +using DI = HWY_FULL(int32_t); +using DI16 = Rebind; +constexpr D d; +constexpr DI di; +constexpr DI16 di16; + +// TODO(veluca): consider SIMDfying. +void Transpose8x8InPlace(int32_t* JXL_RESTRICT block) { + for (size_t x = 0; x < 8; x++) { + for (size_t y = x + 1; y < 8; y++) { + std::swap(block[y * 8 + x], block[x * 8 + y]); + } + } +} + +template +void DequantLane(Vec scaled_dequant_x, Vec scaled_dequant_y, + Vec scaled_dequant_b, + const float* JXL_RESTRICT dequant_matrices, size_t size, + size_t k, Vec x_cc_mul, Vec b_cc_mul, + const float* JXL_RESTRICT biases, ACPtr qblock[3], + float* JXL_RESTRICT block) { + const auto x_mul = Mul(Load(d, dequant_matrices + k), scaled_dequant_x); + const auto y_mul = + Mul(Load(d, dequant_matrices + size + k), scaled_dequant_y); + const auto b_mul = + Mul(Load(d, dequant_matrices + 2 * size + k), scaled_dequant_b); + + Vec quantized_x_int; + Vec quantized_y_int; + Vec quantized_b_int; + if (ac_type == ACType::k16) { + Rebind di16; + quantized_x_int = PromoteTo(di, Load(di16, qblock[0].ptr16 + k)); + quantized_y_int = PromoteTo(di, Load(di16, qblock[1].ptr16 + k)); + quantized_b_int = PromoteTo(di, Load(di16, qblock[2].ptr16 + k)); + } else { + quantized_x_int = Load(di, qblock[0].ptr32 + k); + quantized_y_int = Load(di, qblock[1].ptr32 + k); + quantized_b_int = Load(di, qblock[2].ptr32 + k); + } + + const auto dequant_x_cc = + Mul(AdjustQuantBias(di, 0, quantized_x_int, biases), x_mul); + const auto dequant_y = + Mul(AdjustQuantBias(di, 1, quantized_y_int, biases), y_mul); + const auto dequant_b_cc = + Mul(AdjustQuantBias(di, 2, quantized_b_int, biases), b_mul); + + const auto dequant_x = MulAdd(x_cc_mul, dequant_y, dequant_x_cc); + const auto dequant_b = MulAdd(b_cc_mul, dequant_y, dequant_b_cc); + Store(dequant_x, d, block + k); + Store(dequant_y, d, block + size + k); + Store(dequant_b, d, block + 2 * size + k); +} + +template +void DequantBlock(const AcStrategy& acs, float inv_global_scale, int quant, + float x_dm_multiplier, float b_dm_multiplier, Vec x_cc_mul, + Vec b_cc_mul, size_t kind, size_t size, + const Quantizer& quantizer, size_t covered_blocks, + const size_t* sbx, + const float* JXL_RESTRICT* JXL_RESTRICT dc_row, + size_t dc_stride, const float* JXL_RESTRICT biases, + ACPtr qblock[3], float* JXL_RESTRICT block, + float* JXL_RESTRICT scratch) { + const auto scaled_dequant_s = inv_global_scale / quant; + + const auto scaled_dequant_x = Set(d, scaled_dequant_s * x_dm_multiplier); + const auto scaled_dequant_y = Set(d, scaled_dequant_s); + const auto scaled_dequant_b = Set(d, scaled_dequant_s * b_dm_multiplier); + + const float* dequant_matrices = quantizer.DequantMatrix(kind, 0); + + for (size_t k = 0; k < covered_blocks * kDCTBlockSize; k += Lanes(d)) { + DequantLane(scaled_dequant_x, scaled_dequant_y, scaled_dequant_b, + dequant_matrices, size, k, x_cc_mul, b_cc_mul, biases, + qblock, block); + } + for (size_t c = 0; c < 3; c++) { + LowestFrequenciesFromDC(acs.Strategy(), dc_row[c] + sbx[c], dc_stride, + block + c * size, scratch); + } +} + +Status DecodeGroupImpl(GetBlock* JXL_RESTRICT get_block, + GroupDecCache* JXL_RESTRICT group_dec_cache, + PassesDecoderState* JXL_RESTRICT dec_state, + size_t thread, size_t group_idx, + RenderPipelineInput& render_pipeline_input, + ImageBundle* decoded, DrawMode draw) { + // TODO(veluca): investigate cache usage in this function. + const Rect block_rect = dec_state->shared->BlockGroupRect(group_idx); + const AcStrategyImage& ac_strategy = dec_state->shared->ac_strategy; + + const size_t xsize_blocks = block_rect.xsize(); + const size_t ysize_blocks = block_rect.ysize(); + + const size_t dc_stride = dec_state->shared->dc->PixelsPerRow(); + + const float inv_global_scale = dec_state->shared->quantizer.InvGlobalScale(); + + const YCbCrChromaSubsampling& cs = + dec_state->shared->frame_header.chroma_subsampling; + + size_t idct_stride[3]; + for (size_t c = 0; c < 3; c++) { + idct_stride[c] = render_pipeline_input.GetBuffer(c).first->PixelsPerRow(); + } + + HWY_ALIGN int32_t scaled_qtable[64 * 3]; + + ACType ac_type = dec_state->coefficients->Type(); + auto dequant_block = ac_type == ACType::k16 ? DequantBlock + : DequantBlock; + // Whether or not coefficients should be stored for future usage, and/or read + // from past usage. + bool accumulate = !dec_state->coefficients->IsEmpty(); + // Offset of the current block in the group. + size_t offset = 0; + + std::array jpeg_c_map; + bool jpeg_is_gray = false; + std::array dcoff = {}; + + // TODO(veluca): all of this should be done only once per image. + if (decoded->IsJPEG()) { + if (!dec_state->shared->cmap.IsJPEGCompatible()) { + return JXL_FAILURE("The CfL map is not JPEG-compatible"); + } + jpeg_is_gray = (decoded->jpeg_data->components.size() == 1); + jpeg_c_map = JpegOrder(dec_state->shared->frame_header.color_transform, + jpeg_is_gray); + const std::vector& qe = + dec_state->shared->matrices.encodings(); + if (qe.empty() || qe[0].mode != QuantEncoding::Mode::kQuantModeRAW || + std::abs(qe[0].qraw.qtable_den - 1.f / (8 * 255)) > 1e-8f) { + return JXL_FAILURE( + "Quantization table is not a JPEG quantization table."); + } + for (size_t c = 0; c < 3; c++) { + if (dec_state->shared->frame_header.color_transform == + ColorTransform::kNone) { + dcoff[c] = 1024 / (*qe[0].qraw.qtable)[64 * c]; + } + for (size_t i = 0; i < 64; i++) { + // Transpose the matrix, as it will be used on the transposed block. + int n = qe[0].qraw.qtable->at(64 + i); + int d = qe[0].qraw.qtable->at(64 * c + i); + if (n <= 0 || d <= 0 || n >= 65536 || d >= 65536) { + return JXL_FAILURE("Invalid JPEG quantization table"); + } + scaled_qtable[64 * c + (i % 8) * 8 + (i / 8)] = + (1 << kCFLFixedPointPrecision) * n / d; + } + } + } + + size_t hshift[3] = {cs.HShift(0), cs.HShift(1), cs.HShift(2)}; + size_t vshift[3] = {cs.VShift(0), cs.VShift(1), cs.VShift(2)}; + Rect r[3]; + for (size_t i = 0; i < 3; i++) { + r[i] = + Rect(block_rect.x0() >> hshift[i], block_rect.y0() >> vshift[i], + block_rect.xsize() >> hshift[i], block_rect.ysize() >> vshift[i]); + if (!r[i].IsInside({0, 0, dec_state->shared->dc->Plane(i).xsize(), + dec_state->shared->dc->Plane(i).ysize()})) { + return JXL_FAILURE("Frame dimensions are too big for the image."); + } + } + + for (size_t by = 0; by < ysize_blocks; ++by) { + get_block->StartRow(by); + size_t sby[3] = {by >> vshift[0], by >> vshift[1], by >> vshift[2]}; + + const int32_t* JXL_RESTRICT row_quant = + block_rect.ConstRow(dec_state->shared->raw_quant_field, by); + + const float* JXL_RESTRICT dc_rows[3] = { + r[0].ConstPlaneRow(*dec_state->shared->dc, 0, sby[0]), + r[1].ConstPlaneRow(*dec_state->shared->dc, 1, sby[1]), + r[2].ConstPlaneRow(*dec_state->shared->dc, 2, sby[2]), + }; + + const size_t ty = (block_rect.y0() + by) / kColorTileDimInBlocks; + AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by); + + const int8_t* JXL_RESTRICT row_cmap[3] = { + dec_state->shared->cmap.ytox_map.ConstRow(ty), + nullptr, + dec_state->shared->cmap.ytob_map.ConstRow(ty), + }; + + float* JXL_RESTRICT idct_row[3]; + int16_t* JXL_RESTRICT jpeg_row[3]; + for (size_t c = 0; c < 3; c++) { + idct_row[c] = render_pipeline_input.GetBuffer(c).second.Row( + render_pipeline_input.GetBuffer(c).first, sby[c] * kBlockDim); + if (decoded->IsJPEG()) { + auto& component = decoded->jpeg_data->components[jpeg_c_map[c]]; + jpeg_row[c] = + component.coeffs.data() + + (component.width_in_blocks * (r[c].y0() + sby[c]) + r[c].x0()) * + kDCTBlockSize; + } + } + + size_t bx = 0; + for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks); + tx++) { + size_t abs_tx = tx + block_rect.x0() / kColorTileDimInBlocks; + auto x_cc_mul = + Set(d, dec_state->shared->cmap.YtoXRatio(row_cmap[0][abs_tx])); + auto b_cc_mul = + Set(d, dec_state->shared->cmap.YtoBRatio(row_cmap[2][abs_tx])); + // Increment bx by llf_x because those iterations would otherwise + // immediately continue (!IsFirstBlock). Reduces mispredictions. + for (; bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks;) { + size_t sbx[3] = {bx >> hshift[0], bx >> hshift[1], bx >> hshift[2]}; + AcStrategy acs = acs_row[bx]; + const size_t llf_x = acs.covered_blocks_x(); + + // Can only happen in the second or lower rows of a varblock. + if (JXL_UNLIKELY(!acs.IsFirstBlock())) { + bx += llf_x; + continue; + } + const size_t log2_covered_blocks = acs.log2_covered_blocks(); + + const size_t covered_blocks = 1 << log2_covered_blocks; + const size_t size = covered_blocks * kDCTBlockSize; + + ACPtr qblock[3]; + if (accumulate) { + for (size_t c = 0; c < 3; c++) { + qblock[c] = dec_state->coefficients->PlaneRow(c, group_idx, offset); + } + } else { + // No point in reading from bitstream without accumulating and not + // drawing. + JXL_ASSERT(draw == kDraw); + if (ac_type == ACType::k16) { + memset(group_dec_cache->dec_group_qblock16, 0, + size * 3 * sizeof(int16_t)); + for (size_t c = 0; c < 3; c++) { + qblock[c].ptr16 = group_dec_cache->dec_group_qblock16 + c * size; + } + } else { + memset(group_dec_cache->dec_group_qblock, 0, + size * 3 * sizeof(int32_t)); + for (size_t c = 0; c < 3; c++) { + qblock[c].ptr32 = group_dec_cache->dec_group_qblock + c * size; + } + } + } + JXL_RETURN_IF_ERROR(get_block->LoadBlock( + bx, by, acs, size, log2_covered_blocks, qblock, ac_type)); + offset += size; + if (draw == kDontDraw) { + bx += llf_x; + continue; + } + + if (JXL_UNLIKELY(decoded->IsJPEG())) { + if (acs.Strategy() != AcStrategy::Type::DCT) { + return JXL_FAILURE( + "Can only decode to JPEG if only DCT-8 is used."); + } + + HWY_ALIGN int32_t transposed_dct_y[64]; + for (size_t c : {1, 0, 2}) { + // Propagate only Y for grayscale. + if (jpeg_is_gray && c != 1) { + continue; + } + if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) { + continue; + } + int16_t* JXL_RESTRICT jpeg_pos = + jpeg_row[c] + sbx[c] * kDCTBlockSize; + // JPEG XL is transposed, JPEG is not. + auto transposed_dct = qblock[c].ptr32; + Transpose8x8InPlace(transposed_dct); + // No CfL - no need to store the y block converted to integers. + if (!cs.Is444() || + (row_cmap[0][abs_tx] == 0 && row_cmap[2][abs_tx] == 0)) { + for (size_t i = 0; i < 64; i += Lanes(d)) { + const auto ini = Load(di, transposed_dct + i); + const auto ini16 = DemoteTo(di16, ini); + StoreU(ini16, di16, jpeg_pos + i); + } + } else if (c == 1) { + // Y channel: save for restoring X/B, but nothing else to do. + for (size_t i = 0; i < 64; i += Lanes(d)) { + const auto ini = Load(di, transposed_dct + i); + Store(ini, di, transposed_dct_y + i); + const auto ini16 = DemoteTo(di16, ini); + StoreU(ini16, di16, jpeg_pos + i); + } + } else { + // transposed_dct_y contains the y channel block, transposed. + const auto scale = Set( + di, dec_state->shared->cmap.RatioJPEG(row_cmap[c][abs_tx])); + const auto round = Set(di, 1 << (kCFLFixedPointPrecision - 1)); + for (int i = 0; i < 64; i += Lanes(d)) { + auto in = Load(di, transposed_dct + i); + auto in_y = Load(di, transposed_dct_y + i); + auto qt = Load(di, scaled_qtable + c * size + i); + auto coeff_scale = ShiftRight( + Add(Mul(qt, scale), round)); + auto cfl_factor = ShiftRight( + Add(Mul(in_y, coeff_scale), round)); + StoreU(DemoteTo(di16, Add(in, cfl_factor)), di16, jpeg_pos + i); + } + } + jpeg_pos[0] = + Clamp1(dc_rows[c][sbx[c]] - dcoff[c], -2047, 2047); + } + } else { + HWY_ALIGN float* const block = group_dec_cache->dec_group_block; + // Dequantize and add predictions. + dequant_block( + acs, inv_global_scale, row_quant[bx], dec_state->x_dm_multiplier, + dec_state->b_dm_multiplier, x_cc_mul, b_cc_mul, acs.RawStrategy(), + size, dec_state->shared->quantizer, + acs.covered_blocks_y() * acs.covered_blocks_x(), sbx, dc_rows, + dc_stride, + dec_state->output_encoding_info.opsin_params.quant_biases, qblock, + block, group_dec_cache->scratch_space); + + for (size_t c : {1, 0, 2}) { + if ((sbx[c] << hshift[c] != bx) || (sby[c] << vshift[c] != by)) { + continue; + } + // IDCT + float* JXL_RESTRICT idct_pos = idct_row[c] + sbx[c] * kBlockDim; + TransformToPixels(acs.Strategy(), block + c * size, idct_pos, + idct_stride[c], group_dec_cache->scratch_space); + } + } + bx += llf_x; + } + } + } + return true; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +namespace { +// Decode quantized AC coefficients of DCT blocks. +// LLF components in the output block will not be modified. +template +Status DecodeACVarBlock(size_t ctx_offset, size_t log2_covered_blocks, + int32_t* JXL_RESTRICT row_nzeros, + const int32_t* JXL_RESTRICT row_nzeros_top, + size_t nzeros_stride, size_t c, size_t bx, size_t by, + size_t lbx, AcStrategy acs, + const coeff_order_t* JXL_RESTRICT coeff_order, + BitReader* JXL_RESTRICT br, + ANSSymbolReader* JXL_RESTRICT decoder, + const std::vector& context_map, + const uint8_t* qdc_row, const int32_t* qf_row, + const BlockCtxMap& block_ctx_map, ACPtr block, + size_t shift = 0) { + // Equal to number of LLF coefficients. + const size_t covered_blocks = 1 << log2_covered_blocks; + const size_t size = covered_blocks * kDCTBlockSize; + int32_t predicted_nzeros = + PredictFromTopAndLeft(row_nzeros_top, row_nzeros, bx, 32); + + size_t ord = kStrategyOrder[acs.RawStrategy()]; + const coeff_order_t* JXL_RESTRICT order = + &coeff_order[CoeffOrderOffset(ord, c)]; + + size_t block_ctx = block_ctx_map.Context(qdc_row[lbx], qf_row[bx], ord, c); + const int32_t nzero_ctx = + block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx) + ctx_offset; + + size_t nzeros = + decoder->ReadHybridUintInlined(nzero_ctx, br, context_map); + if (nzeros + covered_blocks > size) { + return JXL_FAILURE("Invalid AC: nzeros too large"); + } + for (size_t y = 0; y < acs.covered_blocks_y(); y++) { + for (size_t x = 0; x < acs.covered_blocks_x(); x++) { + row_nzeros[bx + x + y * nzeros_stride] = + (nzeros + covered_blocks - 1) >> log2_covered_blocks; + } + } + + const size_t histo_offset = + ctx_offset + block_ctx_map.ZeroDensityContextsOffset(block_ctx); + + size_t prev = (nzeros > size / 16 ? 0 : 1); + for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) { + const size_t ctx = + histo_offset + ZeroDensityContext(nzeros, k, covered_blocks, + log2_covered_blocks, prev); + const size_t u_coeff = + decoder->ReadHybridUintInlined(ctx, br, context_map); + // Hand-rolled version of UnpackSigned, shifting before the conversion to + // signed integer to avoid undefined behavior of shifting negative numbers. + const size_t magnitude = u_coeff >> 1; + const size_t neg_sign = (~u_coeff) & 1; + const intptr_t coeff = + static_cast((magnitude ^ (neg_sign - 1)) << shift); + if (ac_type == ACType::k16) { + block.ptr16[order[k]] += coeff; + } else { + block.ptr32[order[k]] += coeff; + } + prev = static_cast(u_coeff != 0); + nzeros -= prev; + } + if (JXL_UNLIKELY(nzeros != 0)) { + return JXL_FAILURE("Invalid AC: nzeros not 0. Block (%" PRIuS ", %" PRIuS + "), channel %" PRIuS, + bx, by, c); + } + + return true; +} + +// Structs used by DecodeGroupImpl to get a quantized block. +// GetBlockFromBitstream uses ANS decoding (and thus keeps track of row +// pointers in row_nzeros), GetBlockFromEncoder simply reads the coefficient +// image provided by the encoder. + +struct GetBlockFromBitstream : public GetBlock { + void StartRow(size_t by) override { + qf_row = rect.ConstRow(*qf, by); + for (size_t c = 0; c < 3; c++) { + size_t sby = by >> vshift[c]; + quant_dc_row = quant_dc->ConstRow(rect.y0() + by) + rect.x0(); + for (size_t i = 0; i < num_passes; i++) { + row_nzeros[i][c] = group_dec_cache->num_nzeroes[i].PlaneRow(c, sby); + row_nzeros_top[i][c] = + sby == 0 + ? nullptr + : group_dec_cache->num_nzeroes[i].ConstPlaneRow(c, sby - 1); + } + } + } + + Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size, + size_t log2_covered_blocks, ACPtr block[3], + ACType ac_type) override { + ; + for (size_t c : {1, 0, 2}) { + size_t sbx = bx >> hshift[c]; + size_t sby = by >> vshift[c]; + if (JXL_UNLIKELY((sbx << hshift[c] != bx) || (sby << vshift[c] != by))) { + continue; + } + + for (size_t pass = 0; JXL_UNLIKELY(pass < num_passes); pass++) { + auto decode_ac_varblock = + decoders[pass].UsesLZ77() + ? (ac_type == ACType::k16 ? DecodeACVarBlock + : DecodeACVarBlock) + : (ac_type == ACType::k16 ? DecodeACVarBlock + : DecodeACVarBlock); + JXL_RETURN_IF_ERROR(decode_ac_varblock( + ctx_offset[pass], log2_covered_blocks, row_nzeros[pass][c], + row_nzeros_top[pass][c], nzeros_stride, c, sbx, sby, bx, acs, + &coeff_orders[pass * coeff_order_size], readers[pass], + &decoders[pass], context_map[pass], quant_dc_row, qf_row, + *block_ctx_map, block[c], shift_for_pass[pass])); + } + } + return true; + } + + Status Init(BitReader* JXL_RESTRICT* JXL_RESTRICT readers, size_t num_passes, + size_t group_idx, size_t histo_selector_bits, const Rect& rect, + GroupDecCache* JXL_RESTRICT group_dec_cache, + PassesDecoderState* dec_state, size_t first_pass) { + for (size_t i = 0; i < 3; i++) { + hshift[i] = dec_state->shared->frame_header.chroma_subsampling.HShift(i); + vshift[i] = dec_state->shared->frame_header.chroma_subsampling.VShift(i); + } + this->coeff_order_size = dec_state->shared->coeff_order_size; + this->coeff_orders = + dec_state->shared->coeff_orders.data() + first_pass * coeff_order_size; + this->context_map = dec_state->context_map.data() + first_pass; + this->readers = readers; + this->num_passes = num_passes; + this->shift_for_pass = + dec_state->shared->frame_header.passes.shift + first_pass; + this->group_dec_cache = group_dec_cache; + this->rect = rect; + block_ctx_map = &dec_state->shared->block_ctx_map; + qf = &dec_state->shared->raw_quant_field; + quant_dc = &dec_state->shared->quant_dc; + + for (size_t pass = 0; pass < num_passes; pass++) { + // Select which histogram set to use among those of the current pass. + size_t cur_histogram = 0; + if (histo_selector_bits != 0) { + cur_histogram = readers[pass]->ReadBits(histo_selector_bits); + } + if (cur_histogram >= dec_state->shared->num_histograms) { + return JXL_FAILURE("Invalid histogram selector"); + } + ctx_offset[pass] = cur_histogram * block_ctx_map->NumACContexts(); + + decoders[pass] = + ANSSymbolReader(&dec_state->code[pass + first_pass], readers[pass]); + } + nzeros_stride = group_dec_cache->num_nzeroes[0].PixelsPerRow(); + for (size_t i = 0; i < num_passes; i++) { + JXL_ASSERT( + nzeros_stride == + static_cast(group_dec_cache->num_nzeroes[i].PixelsPerRow())); + } + return true; + } + + const uint32_t* shift_for_pass = nullptr; // not owned + const coeff_order_t* JXL_RESTRICT coeff_orders; + size_t coeff_order_size; + const std::vector* JXL_RESTRICT context_map; + ANSSymbolReader decoders[kMaxNumPasses]; + BitReader* JXL_RESTRICT* JXL_RESTRICT readers; + size_t num_passes; + size_t ctx_offset[kMaxNumPasses]; + size_t nzeros_stride; + int32_t* JXL_RESTRICT row_nzeros[kMaxNumPasses][3]; + const int32_t* JXL_RESTRICT row_nzeros_top[kMaxNumPasses][3]; + GroupDecCache* JXL_RESTRICT group_dec_cache; + const BlockCtxMap* block_ctx_map; + const ImageI* qf; + const ImageB* quant_dc; + const int32_t* qf_row; + const uint8_t* quant_dc_row; + Rect rect; + size_t hshift[3], vshift[3]; +}; + +struct GetBlockFromEncoder : public GetBlock { + void StartRow(size_t by) override {} + + Status LoadBlock(size_t bx, size_t by, const AcStrategy& acs, size_t size, + size_t log2_covered_blocks, ACPtr block[3], + ACType ac_type) override { + JXL_DASSERT(ac_type == ACType::k32); + for (size_t c = 0; c < 3; c++) { + // for each pass + for (size_t i = 0; i < quantized_ac->size(); i++) { + for (size_t k = 0; k < size; k++) { + // TODO(veluca): SIMD. + block[c].ptr32[k] += + rows[i][c][offset + k] * (1 << shift_for_pass[i]); + } + } + } + offset += size; + return true; + } + + GetBlockFromEncoder(const std::vector>& ac, + size_t group_idx, const uint32_t* shift_for_pass) + : quantized_ac(&ac), shift_for_pass(shift_for_pass) { + // TODO(veluca): not supported with chroma subsampling. + for (size_t i = 0; i < quantized_ac->size(); i++) { + JXL_CHECK((*quantized_ac)[i]->Type() == ACType::k32); + for (size_t c = 0; c < 3; c++) { + rows[i][c] = (*quantized_ac)[i]->PlaneRow(c, group_idx, 0).ptr32; + } + } + } + + const std::vector>* JXL_RESTRICT quantized_ac; + size_t offset = 0; + const int32_t* JXL_RESTRICT rows[kMaxNumPasses][3]; + const uint32_t* shift_for_pass = nullptr; // not owned +}; + +HWY_EXPORT(DecodeGroupImpl); + +} // namespace + +Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers, + size_t num_passes, size_t group_idx, + PassesDecoderState* JXL_RESTRICT dec_state, + GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread, + RenderPipelineInput& render_pipeline_input, + ImageBundle* JXL_RESTRICT decoded, size_t first_pass, + bool force_draw, bool dc_only, bool* should_run_pipeline) { + DrawMode draw = (num_passes + first_pass == + dec_state->shared->frame_header.passes.num_passes) || + force_draw + ? kDraw + : kDontDraw; + + if (should_run_pipeline) { + *should_run_pipeline = draw != kDontDraw; + } + + if (draw == kDraw && num_passes == 0 && first_pass == 0) { + group_dec_cache->InitDCBufferOnce(); + const YCbCrChromaSubsampling& cs = + dec_state->shared->frame_header.chroma_subsampling; + for (size_t c : {0, 1, 2}) { + size_t hs = cs.HShift(c); + size_t vs = cs.VShift(c); + // We reuse filter_input_storage here as it is not currently in use. + const Rect src_rect_precs = dec_state->shared->BlockGroupRect(group_idx); + const Rect src_rect = + Rect(src_rect_precs.x0() >> hs, src_rect_precs.y0() >> vs, + src_rect_precs.xsize() >> hs, src_rect_precs.ysize() >> vs); + const Rect copy_rect(kRenderPipelineXOffset, 2, src_rect.xsize(), + src_rect.ysize()); + CopyImageToWithPadding(src_rect, dec_state->shared->dc->Plane(c), 2, + copy_rect, &group_dec_cache->dc_buffer); + // Mirrorpad. Interleaving left and right padding ensures that padding + // works out correctly even for images with DC size of 1. + for (size_t y = 0; y < src_rect.ysize() + 4; y++) { + size_t xend = kRenderPipelineXOffset + + (dec_state->shared->dc->Plane(c).xsize() >> hs) - + src_rect.x0(); + for (size_t ix = 0; ix < 2; ix++) { + if (src_rect.x0() == 0) { + group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset - ix - 1] = + group_dec_cache->dc_buffer.Row(y)[kRenderPipelineXOffset + ix]; + } + if (src_rect.x0() + src_rect.xsize() + 2 >= + (dec_state->shared->dc->xsize() >> hs)) { + group_dec_cache->dc_buffer.Row(y)[xend + ix] = + group_dec_cache->dc_buffer.Row(y)[xend - ix - 1]; + } + } + } + Rect dst_rect = render_pipeline_input.GetBuffer(c).second; + ImageF* upsampling_dst = render_pipeline_input.GetBuffer(c).first; + JXL_ASSERT(dst_rect.IsInside(*upsampling_dst)); + + RenderPipelineStage::RowInfo input_rows(1, std::vector(5)); + RenderPipelineStage::RowInfo output_rows(1, std::vector(8)); + for (size_t y = src_rect.y0(); y < src_rect.y0() + src_rect.ysize(); + y++) { + for (ssize_t iy = 0; iy < 5; iy++) { + input_rows[0][iy] = group_dec_cache->dc_buffer.Row( + Mirror(ssize_t(y) + iy - 2, + dec_state->shared->dc->Plane(c).ysize() >> vs) + + 2 - src_rect.y0()); + } + for (size_t iy = 0; iy < 8; iy++) { + output_rows[0][iy] = + dst_rect.Row(upsampling_dst, ((y - src_rect.y0()) << 3) + iy) - + kRenderPipelineXOffset; + } + // Arguments set to 0/nullptr are not used. + dec_state->upsampler8x->ProcessRow(input_rows, output_rows, + /*xextra=*/0, src_rect.xsize(), 0, 0, + thread); + } + } + return true; + } + + size_t histo_selector_bits = 0; + if (dc_only) { + JXL_ASSERT(num_passes == 0); + } else { + JXL_ASSERT(dec_state->shared->num_histograms > 0); + histo_selector_bits = CeilLog2Nonzero(dec_state->shared->num_histograms); + } + + auto get_block = jxl::make_unique(); + JXL_RETURN_IF_ERROR( + get_block->Init(readers, num_passes, group_idx, histo_selector_bits, + dec_state->shared->BlockGroupRect(group_idx), + group_dec_cache, dec_state, first_pass)); + + JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)( + get_block.get(), group_dec_cache, dec_state, thread, group_idx, + render_pipeline_input, decoded, draw)); + + for (size_t pass = 0; pass < num_passes; pass++) { + if (!get_block->decoders[pass].CheckANSFinalState()) { + return JXL_FAILURE("ANS checksum failure."); + } + } + return true; +} + +Status DecodeGroupForRoundtrip(const std::vector>& ac, + size_t group_idx, + PassesDecoderState* JXL_RESTRICT dec_state, + GroupDecCache* JXL_RESTRICT group_dec_cache, + size_t thread, + RenderPipelineInput& render_pipeline_input, + ImageBundle* JXL_RESTRICT decoded, + AuxOut* aux_out) { + GetBlockFromEncoder get_block(ac, group_idx, + dec_state->shared->frame_header.passes.shift); + group_dec_cache->InitOnce( + /*num_passes=*/0, + /*used_acs=*/(1u << AcStrategy::kNumValidStrategies) - 1); + + return HWY_DYNAMIC_DISPATCH(DecodeGroupImpl)( + &get_block, group_dec_cache, dec_state, thread, group_idx, + render_pipeline_input, decoded, kDraw); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group.h b/third-party/libjxl/libjxl/lib/jxl/dec_group.h new file mode 100644 index 0000000000..e32ea67b5f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_group.h @@ -0,0 +1,49 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_GROUP_H_ +#define LIB_JXL_DEC_GROUP_H_ + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { + +struct AuxOut; + +Status DecodeGroup(BitReader* JXL_RESTRICT* JXL_RESTRICT readers, + size_t num_passes, size_t group_idx, + PassesDecoderState* JXL_RESTRICT dec_state, + GroupDecCache* JXL_RESTRICT group_dec_cache, size_t thread, + RenderPipelineInput& render_pipeline_input, + ImageBundle* JXL_RESTRICT decoded, size_t first_pass, + bool force_draw, bool dc_only, bool* should_run_pipeline); + +Status DecodeGroupForRoundtrip(const std::vector>& ac, + size_t group_idx, + PassesDecoderState* JXL_RESTRICT dec_state, + GroupDecCache* JXL_RESTRICT group_dec_cache, + size_t thread, + RenderPipelineInput& render_pipeline_input, + ImageBundle* JXL_RESTRICT decoded, + AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_DEC_GROUP_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group_border.cc b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.cc new file mode 100644 index 0000000000..4bee3ae6ef --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.cc @@ -0,0 +1,184 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_group_border.h" + +#include + +namespace jxl { + +void GroupBorderAssigner::Init(const FrameDimensions& frame_dim) { + frame_dim_ = frame_dim; + size_t num_corners = + (frame_dim_.xsize_groups + 1) * (frame_dim_.ysize_groups + 1); + counters_.reset(new std::atomic[num_corners]); + // Initialize counters. + for (size_t y = 0; y < frame_dim_.ysize_groups + 1; y++) { + for (size_t x = 0; x < frame_dim_.xsize_groups + 1; x++) { + // Counters at image borders don't have anything on the other side, we + // pre-fill their value to have more uniform handling afterwards. + uint8_t init_value = 0; + if (x == 0) { + init_value |= kTopLeft | kBottomLeft; + } + if (x == frame_dim_.xsize_groups) { + init_value |= kTopRight | kBottomRight; + } + if (y == 0) { + init_value |= kTopLeft | kTopRight; + } + if (y == frame_dim_.ysize_groups) { + init_value |= kBottomLeft | kBottomRight; + } + counters_[y * (frame_dim_.xsize_groups + 1) + x] = init_value; + } + } +} + +void GroupBorderAssigner::ClearDone(size_t group_id) { + size_t x = group_id % frame_dim_.xsize_groups; + size_t y = group_id / frame_dim_.xsize_groups; + size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x; + size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1; + size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1; + size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x; + counters_[top_left_idx].fetch_and(~kBottomRight); + counters_[top_right_idx].fetch_and(~kBottomLeft); + counters_[bottom_left_idx].fetch_and(~kTopRight); + counters_[bottom_right_idx].fetch_and(~kTopLeft); +} + +// Looking at each corner between groups, we can guarantee that the four +// involved groups will agree between each other regarding the order in which +// each of the four groups terminated. Thus, the last of the four groups +// gets the responsibility of handling the corner. For borders, every border +// is assigned to its top corner (for vertical borders) or to its left corner +// (for horizontal borders): the order as seen on those corners will decide who +// handles that border. + +void GroupBorderAssigner::GroupDone(size_t group_id, size_t padx, size_t pady, + Rect* rects_to_finalize, + size_t* num_to_finalize) { + size_t x = group_id % frame_dim_.xsize_groups; + size_t y = group_id / frame_dim_.xsize_groups; + Rect block_rect(x * frame_dim_.group_dim / kBlockDim, + y * frame_dim_.group_dim / kBlockDim, + frame_dim_.group_dim / kBlockDim, + frame_dim_.group_dim / kBlockDim, frame_dim_.xsize_blocks, + frame_dim_.ysize_blocks); + + size_t top_left_idx = y * (frame_dim_.xsize_groups + 1) + x; + size_t top_right_idx = y * (frame_dim_.xsize_groups + 1) + x + 1; + size_t bottom_right_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x + 1; + size_t bottom_left_idx = (y + 1) * (frame_dim_.xsize_groups + 1) + x; + + auto fetch_status = [this](size_t idx, uint8_t bit) { + // Note that the acq-rel semantics of this fetch are actually needed to + // ensure that the pixel data of the group is already written to memory. + size_t status = counters_[idx].fetch_or(bit); + JXL_DASSERT((bit & status) == 0); + return bit | status; + }; + + size_t top_left_status = fetch_status(top_left_idx, kBottomRight); + size_t top_right_status = fetch_status(top_right_idx, kBottomLeft); + size_t bottom_right_status = fetch_status(bottom_right_idx, kTopLeft); + size_t bottom_left_status = fetch_status(bottom_left_idx, kTopRight); + + size_t x1 = block_rect.x0() + block_rect.xsize(); + size_t y1 = block_rect.y0() + block_rect.ysize(); + + bool is_last_group_x = frame_dim_.xsize_groups == x + 1; + bool is_last_group_y = frame_dim_.ysize_groups == y + 1; + + // Start of border of neighbouring group, end of border of this group, start + // of border of this group (on the other side), end of border of next group. + size_t xpos[4] = { + block_rect.x0() == 0 ? 0 : block_rect.x0() * kBlockDim - padx, + block_rect.x0() == 0 + ? 0 + : std::min(frame_dim_.xsize, block_rect.x0() * kBlockDim + padx), + is_last_group_x ? frame_dim_.xsize : x1 * kBlockDim - padx, + std::min(frame_dim_.xsize, x1 * kBlockDim + padx)}; + size_t ypos[4] = { + block_rect.y0() == 0 ? 0 : block_rect.y0() * kBlockDim - pady, + block_rect.y0() == 0 + ? 0 + : std::min(frame_dim_.ysize, block_rect.y0() * kBlockDim + pady), + is_last_group_y ? frame_dim_.ysize : y1 * kBlockDim - pady, + std::min(frame_dim_.ysize, y1 * kBlockDim + pady)}; + + *num_to_finalize = 0; + auto append_rect = [&](size_t x0, size_t x1, size_t y0, size_t y1) { + Rect rect(xpos[x0], ypos[y0], xpos[x1] - xpos[x0], ypos[y1] - ypos[y0]); + if (rect.xsize() == 0 || rect.ysize() == 0) return; + JXL_DASSERT(*num_to_finalize < kMaxToFinalize); + rects_to_finalize[(*num_to_finalize)++] = rect; + }; + + // Because of how group borders are assigned, it is impossible that we need to + // process the left and right side of some area but not the center area. Thus, + // we compute the first/last part to process in every horizontal strip and + // merge them together. We first collect a mask of what parts should be + // processed. + // We do this horizontally rather than vertically because horizontal borders + // are larger. + bool available_parts_mask[3][3] = {}; // [x][y] + // Center + available_parts_mask[1][1] = true; + // Corners + if (top_left_status == 0xF) available_parts_mask[0][0] = true; + if (top_right_status == 0xF) available_parts_mask[2][0] = true; + if (bottom_right_status == 0xF) available_parts_mask[2][2] = true; + if (bottom_left_status == 0xF) available_parts_mask[0][2] = true; + // Other borders + if (top_left_status & kTopRight) available_parts_mask[1][0] = true; + if (top_left_status & kBottomLeft) available_parts_mask[0][1] = true; + if (top_right_status & kBottomRight) available_parts_mask[2][1] = true; + if (bottom_left_status & kBottomRight) available_parts_mask[1][2] = true; + + // Collect horizontal ranges. + constexpr size_t kNoSegment = 3; + std::pair horizontal_segments[3] = {{kNoSegment, kNoSegment}, + {kNoSegment, kNoSegment}, + {kNoSegment, kNoSegment}}; + for (size_t y = 0; y < 3; y++) { + for (size_t x = 0; x < 3; x++) { + if (!available_parts_mask[x][y]) continue; + JXL_DASSERT(horizontal_segments[y].second == kNoSegment || + horizontal_segments[y].second == x); + JXL_DASSERT((horizontal_segments[y].first == kNoSegment) == + (horizontal_segments[y].second == kNoSegment)); + if (horizontal_segments[y].first == kNoSegment) { + horizontal_segments[y].first = x; + } + horizontal_segments[y].second = x + 1; + } + } + if (horizontal_segments[0] == horizontal_segments[1] && + horizontal_segments[0] == horizontal_segments[2]) { + append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0, + 3); + } else if (horizontal_segments[0] == horizontal_segments[1]) { + append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0, + 2); + append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2, + 3); + } else if (horizontal_segments[1] == horizontal_segments[2]) { + append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0, + 1); + append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1, + 3); + } else { + append_rect(horizontal_segments[0].first, horizontal_segments[0].second, 0, + 1); + append_rect(horizontal_segments[1].first, horizontal_segments[1].second, 1, + 2); + append_rect(horizontal_segments[2].first, horizontal_segments[2].second, 2, + 3); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_group_border.h b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.h new file mode 100644 index 0000000000..2d974c9987 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_group_border.h @@ -0,0 +1,47 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_GROUP_BORDER_H_ +#define LIB_JXL_DEC_GROUP_BORDER_H_ + +#include + +#include + +#include "lib/jxl/base/arch_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" + +namespace jxl { + +class GroupBorderAssigner { + public: + // Prepare the GroupBorderAssigner to handle a given frame. + void Init(const FrameDimensions& frame_dim); + // Marks a group as done, and returns the (at most 3) rects to run + // FinalizeImageRect on. `block_rect` must be the rect corresponding + // to the given `group_id`, measured in blocks. + void GroupDone(size_t group_id, size_t padx, size_t pady, + Rect* rects_to_finalize, size_t* num_to_finalize); + // Marks a group as not-done, for running re-paints. + void ClearDone(size_t group_id); + + static constexpr size_t kMaxToFinalize = 3; + + private: + FrameDimensions frame_dim_; + std::unique_ptr[]> counters_; + + // Constants to identify group positions relative to the corners. + static constexpr uint8_t kTopLeft = 0x01; + static constexpr uint8_t kTopRight = 0x02; + static constexpr uint8_t kBottomRight = 0x04; + static constexpr uint8_t kBottomLeft = 0x08; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_GROUP_BORDER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_huffman.cc b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.cc new file mode 100644 index 0000000000..05b275773a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.cc @@ -0,0 +1,255 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_huffman.h" + +#include /* for memset */ + +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/huffman_table.h" + +namespace jxl { + +static const int kCodeLengthCodes = 18; +static const uint8_t kCodeLengthCodeOrder[kCodeLengthCodes] = { + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15, +}; +static const uint8_t kDefaultCodeLength = 8; +static const uint8_t kCodeLengthRepeatCode = 16; + +int ReadHuffmanCodeLengths(const uint8_t* code_length_code_lengths, + int num_symbols, uint8_t* code_lengths, + BitReader* br) { + int symbol = 0; + uint8_t prev_code_len = kDefaultCodeLength; + int repeat = 0; + uint8_t repeat_code_len = 0; + int space = 32768; + HuffmanCode table[32]; + + uint16_t counts[16] = {0}; + for (int i = 0; i < kCodeLengthCodes; ++i) { + ++counts[code_length_code_lengths[i]]; + } + if (!BuildHuffmanTable(table, 5, code_length_code_lengths, kCodeLengthCodes, + &counts[0])) { + return 0; + } + + while (symbol < num_symbols && space > 0) { + const HuffmanCode* p = table; + uint8_t code_len; + br->Refill(); + p += br->PeekFixedBits<5>(); + br->Consume(p->bits); + code_len = (uint8_t)p->value; + if (code_len < kCodeLengthRepeatCode) { + repeat = 0; + code_lengths[symbol++] = code_len; + if (code_len != 0) { + prev_code_len = code_len; + space -= 32768u >> code_len; + } + } else { + const int extra_bits = code_len - 14; + int old_repeat; + int repeat_delta; + uint8_t new_len = 0; + if (code_len == kCodeLengthRepeatCode) { + new_len = prev_code_len; + } + if (repeat_code_len != new_len) { + repeat = 0; + repeat_code_len = new_len; + } + old_repeat = repeat; + if (repeat > 0) { + repeat -= 2; + repeat <<= extra_bits; + } + repeat += (int)br->ReadBits(extra_bits) + 3; + repeat_delta = repeat - old_repeat; + if (symbol + repeat_delta > num_symbols) { + return 0; + } + memset(&code_lengths[symbol], repeat_code_len, (size_t)repeat_delta); + symbol += repeat_delta; + if (repeat_code_len != 0) { + space -= repeat_delta << (15 - repeat_code_len); + } + } + } + if (space != 0) { + return 0; + } + memset(&code_lengths[symbol], 0, (size_t)(num_symbols - symbol)); + return true; +} + +static JXL_INLINE bool ReadSimpleCode(size_t alphabet_size, BitReader* br, + HuffmanCode* table) { + size_t max_bits = + (alphabet_size > 1u) ? FloorLog2Nonzero(alphabet_size - 1u) + 1 : 0; + + size_t num_symbols = br->ReadFixedBits<2>() + 1; + + uint16_t symbols[4] = {0}; + for (size_t i = 0; i < num_symbols; ++i) { + uint16_t symbol = br->ReadBits(max_bits); + if (symbol >= alphabet_size) { + return false; + } + symbols[i] = symbol; + } + + for (size_t i = 0; i < num_symbols - 1; ++i) { + for (size_t j = i + 1; j < num_symbols; ++j) { + if (symbols[i] == symbols[j]) return false; + } + } + + // 4 symbols have to option to encode. + if (num_symbols == 4) num_symbols += br->ReadFixedBits<1>(); + + const auto swap_symbols = [&symbols](size_t i, size_t j) { + uint16_t t = symbols[j]; + symbols[j] = symbols[i]; + symbols[i] = t; + }; + + size_t table_size = 1; + switch (num_symbols) { + case 1: + table[0] = {0, symbols[0]}; + break; + case 2: + if (symbols[0] > symbols[1]) swap_symbols(0, 1); + table[0] = {1, symbols[0]}; + table[1] = {1, symbols[1]}; + table_size = 2; + break; + case 3: + if (symbols[1] > symbols[2]) swap_symbols(1, 2); + table[0] = {1, symbols[0]}; + table[2] = {1, symbols[0]}; + table[1] = {2, symbols[1]}; + table[3] = {2, symbols[2]}; + table_size = 4; + break; + case 4: { + for (size_t i = 0; i < 3; ++i) { + for (size_t j = i + 1; j < 4; ++j) { + if (symbols[i] > symbols[j]) swap_symbols(i, j); + } + } + table[0] = {2, symbols[0]}; + table[2] = {2, symbols[1]}; + table[1] = {2, symbols[2]}; + table[3] = {2, symbols[3]}; + table_size = 4; + break; + } + case 5: { + if (symbols[2] > symbols[3]) swap_symbols(2, 3); + table[0] = {1, symbols[0]}; + table[1] = {2, symbols[1]}; + table[2] = {1, symbols[0]}; + table[3] = {3, symbols[2]}; + table[4] = {1, symbols[0]}; + table[5] = {2, symbols[1]}; + table[6] = {1, symbols[0]}; + table[7] = {3, symbols[3]}; + table_size = 8; + break; + } + default: { + // Unreachable. + return false; + } + } + + const uint32_t goal_size = 1u << kHuffmanTableBits; + while (table_size != goal_size) { + memcpy(&table[table_size], &table[0], + (size_t)table_size * sizeof(table[0])); + table_size <<= 1; + } + + return true; +} + +bool HuffmanDecodingData::ReadFromBitStream(size_t alphabet_size, + BitReader* br) { + if (alphabet_size > (1 << PREFIX_MAX_BITS)) return false; + + /* simple_code_or_skip is used as follows: + 1 for simple code; + 0 for no skipping, 2 skips 2 code lengths, 3 skips 3 code lengths */ + uint32_t simple_code_or_skip = br->ReadFixedBits<2>(); + if (simple_code_or_skip == 1u) { + table_.resize(1u << kHuffmanTableBits); + return ReadSimpleCode(alphabet_size, br, table_.data()); + } + + std::vector code_lengths(alphabet_size, 0); + uint8_t code_length_code_lengths[kCodeLengthCodes] = {0}; + int space = 32; + int num_codes = 0; + /* Static Huffman code for the code length code lengths */ + static const HuffmanCode huff[16] = { + {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 1}, + {2, 0}, {2, 4}, {2, 3}, {3, 2}, {2, 0}, {2, 4}, {2, 3}, {4, 5}, + }; + for (size_t i = simple_code_or_skip; i < kCodeLengthCodes && space > 0; ++i) { + const int code_len_idx = kCodeLengthCodeOrder[i]; + const HuffmanCode* p = huff; + uint8_t v; + br->Refill(); + p += br->PeekFixedBits<4>(); + br->Consume(p->bits); + v = (uint8_t)p->value; + code_length_code_lengths[code_len_idx] = v; + if (v != 0) { + space -= (32u >> v); + ++num_codes; + } + } + bool ok = (num_codes == 1 || space == 0) && + ReadHuffmanCodeLengths(code_length_code_lengths, alphabet_size, + &code_lengths[0], br); + + if (!ok) return false; + uint16_t counts[16] = {0}; + for (size_t i = 0; i < alphabet_size; ++i) { + ++counts[code_lengths[i]]; + } + table_.resize(alphabet_size + 376); + uint32_t table_size = + BuildHuffmanTable(table_.data(), kHuffmanTableBits, &code_lengths[0], + alphabet_size, &counts[0]); + table_.resize(table_size); + return (table_size > 0); +} + +// Decodes the next Huffman coded symbol from the bit-stream. +uint16_t HuffmanDecodingData::ReadSymbol(BitReader* br) const { + size_t n_bits; + const HuffmanCode* table = table_.data(); + table += br->PeekBits(kHuffmanTableBits); + n_bits = table->bits; + if (n_bits > kHuffmanTableBits) { + br->Consume(kHuffmanTableBits); + n_bits -= kHuffmanTableBits; + table += table->value; + table += br->PeekBits(n_bits); + } + br->Consume(table->bits); + return table->value; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_huffman.h b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.h new file mode 100644 index 0000000000..162c3e309c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_huffman.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_HUFFMAN_H_ +#define LIB_JXL_DEC_HUFFMAN_H_ + +#include +#include + +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/huffman_table.h" + +namespace jxl { + +static constexpr size_t kHuffmanTableBits = 8u; + +struct HuffmanDecodingData { + // Decodes the Huffman code lengths from the bit-stream and fills in the + // pre-allocated table with the corresponding 2-level Huffman decoding table. + // Returns false if the Huffman code lengths can not de decoded. + bool ReadFromBitStream(size_t alphabet_size, BitReader* br); + + uint16_t ReadSymbol(BitReader* br) const; + + std::vector table_; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_HUFFMAN_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_modular.cc b/third-party/libjxl/libjxl/lib/jxl/dec_modular.cc new file mode 100644 index 0000000000..0509b32269 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_modular.cc @@ -0,0 +1,776 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_modular.h" + +#include + +#include +#include +#include + +#include "lib/jxl/frame_header.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dec_modular.cc" +#include +#include + +#include "lib/jxl/alpha.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/compressed_dc.h" +#include "lib/jxl/epf.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Rebind; + +void MultiplySum(const size_t xsize, + const pixel_type* const JXL_RESTRICT row_in, + const pixel_type* const JXL_RESTRICT row_in_Y, + const float factor, float* const JXL_RESTRICT row_out) { + const HWY_FULL(float) df; + const Rebind di; // assumes pixel_type <= float + const auto factor_v = Set(df, factor); + for (size_t x = 0; x < xsize; x += Lanes(di)) { + const auto in = Add(Load(di, row_in + x), Load(di, row_in_Y + x)); + const auto out = Mul(ConvertTo(df, in), factor_v); + Store(out, df, row_out + x); + } +} + +void RgbFromSingle(const size_t xsize, + const pixel_type* const JXL_RESTRICT row_in, + const float factor, float* out_r, float* out_g, + float* out_b) { + const HWY_FULL(float) df; + const Rebind di; // assumes pixel_type <= float + + const auto factor_v = Set(df, factor); + for (size_t x = 0; x < xsize; x += Lanes(di)) { + const auto in = Load(di, row_in + x); + const auto out = Mul(ConvertTo(df, in), factor_v); + Store(out, df, out_r + x); + Store(out, df, out_g + x); + Store(out, df, out_b + x); + } +} + +void SingleFromSingle(const size_t xsize, + const pixel_type* const JXL_RESTRICT row_in, + const float factor, float* row_out) { + const HWY_FULL(float) df; + const Rebind di; // assumes pixel_type <= float + + const auto factor_v = Set(df, factor); + for (size_t x = 0; x < xsize; x += Lanes(di)) { + const auto in = Load(di, row_in + x); + const auto out = Mul(ConvertTo(df, in), factor_v); + Store(out, df, row_out + x); + } +} +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(MultiplySum); // Local function +HWY_EXPORT(RgbFromSingle); // Local function +HWY_EXPORT(SingleFromSingle); // Local function + +// Slow conversion using double precision multiplication, only +// needed when the bit depth is too high for single precision +void SingleFromSingleAccurate(const size_t xsize, + const pixel_type* const JXL_RESTRICT row_in, + const double factor, float* row_out) { + for (size_t x = 0; x < xsize; x++) { + row_out[x] = row_in[x] * factor; + } +} + +// convert custom [bits]-bit float (with [exp_bits] exponent bits) stored as int +// back to binary32 float +void int_to_float(const pixel_type* const JXL_RESTRICT row_in, + float* const JXL_RESTRICT row_out, const size_t xsize, + const int bits, const int exp_bits) { + if (bits == 32) { + JXL_ASSERT(sizeof(pixel_type) == sizeof(float)); + JXL_ASSERT(exp_bits == 8); + memcpy(row_out, row_in, xsize * sizeof(float)); + return; + } + int exp_bias = (1 << (exp_bits - 1)) - 1; + int sign_shift = bits - 1; + int mant_bits = bits - exp_bits - 1; + int mant_shift = 23 - mant_bits; + for (size_t x = 0; x < xsize; ++x) { + uint32_t f; + memcpy(&f, &row_in[x], 4); + int signbit = (f >> sign_shift); + f &= (1 << sign_shift) - 1; + if (f == 0) { + row_out[x] = (signbit ? -0.f : 0.f); + continue; + } + int exp = (f >> mant_bits); + int mantissa = (f & ((1 << mant_bits) - 1)); + mantissa <<= mant_shift; + // Try to normalize only if there is space for maneuver. + if (exp == 0 && exp_bits < 8) { + // subnormal number + while ((mantissa & 0x800000) == 0) { + mantissa <<= 1; + exp--; + } + exp++; + // remove leading 1 because it is implicit now + mantissa &= 0x7fffff; + } + exp -= exp_bias; + // broke up the arbitrary float into its parts, now reassemble into + // binary32 + exp += 127; + JXL_ASSERT(exp >= 0); + f = (signbit ? 0x80000000 : 0); + f |= (exp << 23); + f |= mantissa; + memcpy(&row_out[x], &f, 4); + } +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string ModularStreamId::DebugString() const { + std::ostringstream os; + os << (kind == kGlobalData ? "ModularGlobal" + : kind == kVarDCTDC ? "VarDCTDC" + : kind == kModularDC ? "ModularDC" + : kind == kACMetadata ? "ACMeta" + : kind == kQuantTable ? "QuantTable" + : kind == kModularAC ? "ModularAC" + : ""); + if (kind == kVarDCTDC || kind == kModularDC || kind == kACMetadata || + kind == kModularAC) { + os << " group " << group_id; + } + if (kind == kModularAC) { + os << " pass " << pass_id; + } + if (kind == kQuantTable) { + os << " " << quant_table_id; + } + return os.str(); +} +#endif + +Status ModularFrameDecoder::DecodeGlobalInfo(BitReader* reader, + const FrameHeader& frame_header, + bool allow_truncated_group) { + bool decode_color = frame_header.encoding == FrameEncoding::kModular; + const auto& metadata = frame_header.nonserialized_metadata->m; + bool is_gray = metadata.color_encoding.IsGray(); + size_t nb_chans = 3; + if (is_gray && frame_header.color_transform == ColorTransform::kNone) { + nb_chans = 1; + } + do_color = decode_color; + size_t nb_extra = metadata.extra_channel_info.size(); + bool has_tree = reader->ReadBits(1); + if (!allow_truncated_group || + reader->TotalBitsConsumed() < reader->TotalBytes() * kBitsPerByte) { + if (has_tree) { + size_t tree_size_limit = + std::min(static_cast(1 << 22), + 1024 + frame_dim.xsize * frame_dim.ysize * + (nb_chans + nb_extra) / 16); + JXL_RETURN_IF_ERROR(DecodeTree(reader, &tree, tree_size_limit)); + JXL_RETURN_IF_ERROR( + DecodeHistograms(reader, (tree.size() + 1) / 2, &code, &context_map)); + } + } + if (!do_color) nb_chans = 0; + + bool fp = metadata.bit_depth.floating_point_sample; + + // bits_per_sample is just metadata for XYB images. + if (metadata.bit_depth.bits_per_sample >= 32 && do_color && + frame_header.color_transform != ColorTransform::kXYB) { + if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { + return JXL_FAILURE("uint32_t not supported in dec_modular"); + } else if (metadata.bit_depth.bits_per_sample > 32) { + return JXL_FAILURE("bits_per_sample > 32 not supported"); + } + } + + Image gi(frame_dim.xsize, frame_dim.ysize, metadata.bit_depth.bits_per_sample, + nb_chans + nb_extra); + + all_same_shift = true; + if (frame_header.color_transform == ColorTransform::kYCbCr) { + for (size_t c = 0; c < nb_chans; c++) { + gi.channel[c].hshift = frame_header.chroma_subsampling.HShift(c); + gi.channel[c].vshift = frame_header.chroma_subsampling.VShift(c); + size_t xsize_shifted = + DivCeil(frame_dim.xsize, 1 << gi.channel[c].hshift); + size_t ysize_shifted = + DivCeil(frame_dim.ysize, 1 << gi.channel[c].vshift); + gi.channel[c].shrink(xsize_shifted, ysize_shifted); + if (gi.channel[c].hshift != gi.channel[0].hshift || + gi.channel[c].vshift != gi.channel[0].vshift) + all_same_shift = false; + } + } + + for (size_t ec = 0, c = nb_chans; ec < nb_extra; ec++, c++) { + size_t ecups = frame_header.extra_channel_upsampling[ec]; + gi.channel[c].shrink(DivCeil(frame_dim.xsize_upsampled, ecups), + DivCeil(frame_dim.ysize_upsampled, ecups)); + gi.channel[c].hshift = gi.channel[c].vshift = + CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); + if (gi.channel[c].hshift != gi.channel[0].hshift || + gi.channel[c].vshift != gi.channel[0].vshift) + all_same_shift = false; + } + + JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (w/o transforms) %s", + gi.DebugString().c_str()); + ModularOptions options; + options.max_chan_size = frame_dim.group_dim; + options.group_dim = frame_dim.group_dim; + Status dec_status = ModularGenericDecompress( + reader, gi, &global_header, ModularStreamId::Global().ID(frame_dim), + &options, + /*undo_transforms=*/false, &tree, &code, &context_map, + allow_truncated_group); + if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status); + if (dec_status.IsFatalError()) { + return JXL_FAILURE("Failed to decode global modular info"); + } + + // TODO(eustas): are we sure this can be done after partial decode? + have_something = false; + for (size_t c = 0; c < gi.channel.size(); c++) { + Channel& gic = gi.channel[c]; + if (c >= gi.nb_meta_channels && gic.w <= frame_dim.group_dim && + gic.h <= frame_dim.group_dim) + have_something = true; + } + // move global transforms to groups if possible + if (!have_something && all_same_shift) { + if (gi.transform.size() == 1 && gi.transform[0].id == TransformId::kRCT) { + global_transform = gi.transform; + gi.transform.clear(); + // TODO(jon): also move no-delta-palette out (trickier though) + } + } + full_image = std::move(gi); + JXL_DEBUG_V(6, "DecodeGlobalInfo: full_image (with transforms) %s", + full_image.DebugString().c_str()); + return dec_status; +} + +void ModularFrameDecoder::MaybeDropFullImage() { + if (full_image.transform.empty() && !have_something && all_same_shift) { + use_full_image = false; + JXL_DEBUG_V(6, "Dropping full image"); + for (auto& ch : full_image.channel) { + // keep metadata on channels around, but dealloc their planes + ch.plane = Plane(); + } + } +} + +Status ModularFrameDecoder::DecodeGroup( + const Rect& rect, BitReader* reader, int minShift, int maxShift, + const ModularStreamId& stream, bool zerofill, PassesDecoderState* dec_state, + RenderPipelineInput* render_pipeline_input, bool allow_truncated, + bool* should_run_pipeline) { + JXL_DEBUG_V(6, "Decoding %s with rect %s and shift bracket %d..%d %s", + stream.DebugString().c_str(), Description(rect).c_str(), minShift, + maxShift, zerofill ? "using zerofill" : ""); + JXL_DASSERT(stream.kind == ModularStreamId::kModularDC || + stream.kind == ModularStreamId::kModularAC); + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + Image gi(xsize, ysize, full_image.bitdepth, 0); + // start at the first bigger-than-groupsize non-metachannel + size_t c = full_image.nb_meta_channels; + for (; c < full_image.channel.size(); c++) { + Channel& fc = full_image.channel[c]; + if (fc.w > frame_dim.group_dim || fc.h > frame_dim.group_dim) break; + } + size_t beginc = c; + for (; c < full_image.channel.size(); c++) { + Channel& fc = full_image.channel[c]; + int shift = std::min(fc.hshift, fc.vshift); + if (shift > maxShift) continue; + if (shift < minShift) continue; + Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, + rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); + if (r.xsize() == 0 || r.ysize() == 0) continue; + if (zerofill && use_full_image) { + for (size_t y = 0; y < r.ysize(); ++y) { + pixel_type* const JXL_RESTRICT row_out = r.Row(&fc.plane, y); + memset(row_out, 0, r.xsize() * sizeof(*row_out)); + } + } else { + Channel gc(r.xsize(), r.ysize()); + if (zerofill) ZeroFillImage(&gc.plane); + gc.hshift = fc.hshift; + gc.vshift = fc.vshift; + gi.channel.emplace_back(std::move(gc)); + } + } + if (zerofill && use_full_image) return true; + // Return early if there's nothing to decode. Otherwise there might be + // problems later (in ModularImageToDecodedRect). + if (gi.channel.empty()) { + if (dec_state && should_run_pipeline) { + const auto& frame_header = dec_state->shared->frame_header; + const auto* metadata = frame_header.nonserialized_metadata; + if (do_color || metadata->m.num_extra_channels > 0) { + // Signal to FrameDecoder that we do not have some of the required input + // for the render pipeline. + *should_run_pipeline = false; + } + } + JXL_DEBUG_V(6, "Nothing to decode, returning early."); + return true; + } + ModularOptions options; + if (!zerofill) { + auto status = ModularGenericDecompress( + reader, gi, /*header=*/nullptr, stream.ID(frame_dim), &options, + /*undo_transforms=*/true, &tree, &code, &context_map, allow_truncated); + if (!allow_truncated) JXL_RETURN_IF_ERROR(status); + if (status.IsFatalError()) return status; + } + // Undo global transforms that have been pushed to the group level + if (!use_full_image) { + JXL_ASSERT(render_pipeline_input); + for (auto t : global_transform) { + JXL_RETURN_IF_ERROR(t.Inverse(gi, global_header.wp_header)); + } + JXL_RETURN_IF_ERROR(ModularImageToDecodedRect(gi, dec_state, nullptr, + *render_pipeline_input, + Rect(0, 0, gi.w, gi.h))); + return true; + } + int gic = 0; + for (c = beginc; c < full_image.channel.size(); c++) { + Channel& fc = full_image.channel[c]; + int shift = std::min(fc.hshift, fc.vshift); + if (shift > maxShift) continue; + if (shift < minShift) continue; + Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, + rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); + if (r.xsize() == 0 || r.ysize() == 0) continue; + JXL_ASSERT(use_full_image); + CopyImageTo(/*rect_from=*/Rect(0, 0, r.xsize(), r.ysize()), + /*from=*/gi.channel[gic].plane, + /*rect_to=*/r, /*to=*/&fc.plane); + gic++; + } + return true; +} + +Status ModularFrameDecoder::DecodeVarDCTDC(size_t group_id, BitReader* reader, + PassesDecoderState* dec_state) { + const Rect r = dec_state->shared->DCGroupRect(group_id); + // TODO(eustas): investigate if we could reduce the impact of + // EvalRationalPolynomial; generally speaking, the limit is + // 2**(128/(3*magic)), where 128 comes from IEEE 754 exponent, + // 3 comes from XybToRgb that cubes the values, and "magic" is + // the sum of all other contributions. 2**18 is known to lead + // to NaN on input found by fuzzing (see commit message). + Image image(r.xsize(), r.ysize(), full_image.bitdepth, 3); + size_t stream_id = ModularStreamId::VarDCTDC(group_id).ID(frame_dim); + reader->Refill(); + size_t extra_precision = reader->ReadFixedBits<2>(); + float mul = 1.0f / (1 << extra_precision); + ModularOptions options; + for (size_t c = 0; c < 3; c++) { + Channel& ch = image.channel[c < 2 ? c ^ 1 : c]; + ch.w >>= dec_state->shared->frame_header.chroma_subsampling.HShift(c); + ch.h >>= dec_state->shared->frame_header.chroma_subsampling.VShift(c); + ch.shrink(); + } + if (!ModularGenericDecompress( + reader, image, /*header=*/nullptr, stream_id, &options, + /*undo_transforms=*/true, &tree, &code, &context_map)) { + return JXL_FAILURE("Failed to decode modular DC group"); + } + DequantDC(r, &dec_state->shared_storage.dc_storage, + &dec_state->shared_storage.quant_dc, image, + dec_state->shared->quantizer.MulDC(), mul, + dec_state->shared->cmap.DCFactors(), + dec_state->shared->frame_header.chroma_subsampling, + dec_state->shared->block_ctx_map); + return true; +} + +Status ModularFrameDecoder::DecodeAcMetadata(size_t group_id, BitReader* reader, + PassesDecoderState* dec_state) { + const Rect r = dec_state->shared->DCGroupRect(group_id); + size_t upper_bound = r.xsize() * r.ysize(); + reader->Refill(); + size_t count = reader->ReadBits(CeilLog2Nonzero(upper_bound)) + 1; + size_t stream_id = ModularStreamId::ACMetadata(group_id).ID(frame_dim); + // YToX, YToB, ACS + QF, EPF + Image image(r.xsize(), r.ysize(), full_image.bitdepth, 4); + static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); + Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); + image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3); + image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3); + image.channel[2] = Channel(count, 2, 0, 0); + ModularOptions options; + if (!ModularGenericDecompress( + reader, image, /*header=*/nullptr, stream_id, &options, + /*undo_transforms=*/true, &tree, &code, &context_map)) { + return JXL_FAILURE("Failed to decode AC metadata"); + } + ConvertPlaneAndClamp(Rect(image.channel[0].plane), image.channel[0].plane, cr, + &dec_state->shared_storage.cmap.ytox_map); + ConvertPlaneAndClamp(Rect(image.channel[1].plane), image.channel[1].plane, cr, + &dec_state->shared_storage.cmap.ytob_map); + size_t num = 0; + bool is444 = dec_state->shared->frame_header.chroma_subsampling.Is444(); + auto& ac_strategy = dec_state->shared_storage.ac_strategy; + size_t xlim = std::min(ac_strategy.xsize(), r.x0() + r.xsize()); + size_t ylim = std::min(ac_strategy.ysize(), r.y0() + r.ysize()); + uint32_t local_used_acs = 0; + for (size_t iy = 0; iy < r.ysize(); iy++) { + size_t y = r.y0() + iy; + int32_t* row_qf = r.Row(&dec_state->shared_storage.raw_quant_field, iy); + uint8_t* row_epf = r.Row(&dec_state->shared_storage.epf_sharpness, iy); + int32_t* row_in_1 = image.channel[2].plane.Row(0); + int32_t* row_in_2 = image.channel[2].plane.Row(1); + int32_t* row_in_3 = image.channel[3].plane.Row(iy); + for (size_t ix = 0; ix < r.xsize(); ix++) { + size_t x = r.x0() + ix; + int sharpness = row_in_3[ix]; + if (sharpness < 0 || sharpness >= LoopFilter::kEpfSharpEntries) { + return JXL_FAILURE("Corrupted sharpness field"); + } + row_epf[ix] = sharpness; + if (ac_strategy.IsValid(x, y)) { + continue; + } + + if (num >= count) return JXL_FAILURE("Corrupted stream"); + + if (!AcStrategy::IsRawStrategyValid(row_in_1[num])) { + return JXL_FAILURE("Invalid AC strategy"); + } + local_used_acs |= 1u << row_in_1[num]; + AcStrategy acs = AcStrategy::FromRawStrategy(row_in_1[num]); + if ((acs.covered_blocks_x() > 1 || acs.covered_blocks_y() > 1) && + !is444) { + return JXL_FAILURE( + "AC strategy not compatible with chroma subsampling"); + } + // Ensure that blocks do not overflow *AC* groups. + size_t next_x_ac_block = (x / kGroupDimInBlocks + 1) * kGroupDimInBlocks; + size_t next_y_ac_block = (y / kGroupDimInBlocks + 1) * kGroupDimInBlocks; + size_t next_x_dct_block = x + acs.covered_blocks_x(); + size_t next_y_dct_block = y + acs.covered_blocks_y(); + if (next_x_dct_block > next_x_ac_block || next_x_dct_block > xlim) { + return JXL_FAILURE("Invalid AC strategy, x overflow"); + } + if (next_y_dct_block > next_y_ac_block || next_y_dct_block > ylim) { + return JXL_FAILURE("Invalid AC strategy, y overflow"); + } + JXL_RETURN_IF_ERROR( + ac_strategy.SetNoBoundsCheck(x, y, AcStrategy::Type(row_in_1[num]))); + row_qf[ix] = 1 + std::max(0, std::min(Quantizer::kQuantMax - 1, + row_in_2[num])); + num++; + } + } + dec_state->used_acs |= local_used_acs; + if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) { + ComputeSigma(r, dec_state); + } + return true; +} + +Status ModularFrameDecoder::ModularImageToDecodedRect( + Image& gi, PassesDecoderState* dec_state, jxl::ThreadPool* pool, + RenderPipelineInput& render_pipeline_input, Rect modular_rect) { + const auto& frame_header = dec_state->shared->frame_header; + const auto* metadata = frame_header.nonserialized_metadata; + JXL_CHECK(gi.transform.empty()); + + auto get_row = [&](size_t c, size_t y) { + const auto& buffer = render_pipeline_input.GetBuffer(c); + return buffer.second.Row(buffer.first, y); + }; + + size_t c = 0; + if (do_color) { + const bool rgb_from_gray = + metadata->m.color_encoding.IsGray() && + frame_header.color_transform == ColorTransform::kNone; + const bool fp = metadata->m.bit_depth.floating_point_sample && + frame_header.color_transform != ColorTransform::kXYB; + for (; c < 3; c++) { + double factor = full_image.bitdepth < 32 + ? 1.0 / ((1u << full_image.bitdepth) - 1) + : 0; + size_t c_in = c; + if (frame_header.color_transform == ColorTransform::kXYB) { + factor = dec_state->shared->matrices.DCQuants()[c]; + // XYB is encoded as YX(B-Y) + if (c < 2) c_in = 1 - c; + } else if (rgb_from_gray) { + c_in = 0; + } + JXL_ASSERT(c_in < gi.channel.size()); + Channel& ch_in = gi.channel[c_in]; + // TODO(eustas): could we detect it on earlier stage? + if (ch_in.w == 0 || ch_in.h == 0) { + return JXL_FAILURE("Empty image"); + } + JXL_CHECK(ch_in.hshift <= 3 && ch_in.vshift <= 3); + Rect r = render_pipeline_input.GetBuffer(c).second; + Rect mr(modular_rect.x0() >> ch_in.hshift, + modular_rect.y0() >> ch_in.vshift, + DivCeil(modular_rect.xsize(), 1 << ch_in.hshift), + DivCeil(modular_rect.ysize(), 1 << ch_in.vshift)); + mr = mr.Crop(ch_in.plane); + size_t xsize_shifted = r.xsize(); + size_t ysize_shifted = r.ysize(); + if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) { + return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS + "x%" PRIuS + " modular channel into " + "a %" PRIuS "x%" PRIuS " rect", + mr.xsize(), mr.ysize(), r.xsize(), r.ysize()); + } + if (frame_header.color_transform == ColorTransform::kXYB && c == 2) { + JXL_ASSERT(!fp); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, ysize_shifted, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + const pixel_type* const JXL_RESTRICT row_in = + mr.Row(&ch_in.plane, y); + const pixel_type* const JXL_RESTRICT row_in_Y = + mr.Row(&gi.channel[0].plane, y); + float* const JXL_RESTRICT row_out = get_row(c, y); + HWY_DYNAMIC_DISPATCH(MultiplySum) + (xsize_shifted, row_in, row_in_Y, factor, row_out); + }, + "ModularIntToFloat")); + } else if (fp) { + int bits = metadata->m.bit_depth.bits_per_sample; + int exp_bits = metadata->m.bit_depth.exponent_bits_per_sample; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, ysize_shifted, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + const pixel_type* const JXL_RESTRICT row_in = + mr.Row(&ch_in.plane, y); + if (rgb_from_gray) { + for (size_t cc = 0; cc < 3; cc++) { + float* const JXL_RESTRICT row_out = get_row(cc, y); + int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits); + } + } else { + float* const JXL_RESTRICT row_out = get_row(c, y); + int_to_float(row_in, row_out, xsize_shifted, bits, exp_bits); + } + }, + "ModularIntToFloat_losslessfloat")); + } else { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, ysize_shifted, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + const pixel_type* const JXL_RESTRICT row_in = + mr.Row(&ch_in.plane, y); + if (rgb_from_gray) { + if (full_image.bitdepth < 23) { + HWY_DYNAMIC_DISPATCH(RgbFromSingle) + (xsize_shifted, row_in, factor, get_row(0, y), get_row(1, y), + get_row(2, y)); + } else { + SingleFromSingleAccurate(xsize_shifted, row_in, factor, + get_row(0, y)); + SingleFromSingleAccurate(xsize_shifted, row_in, factor, + get_row(1, y)); + SingleFromSingleAccurate(xsize_shifted, row_in, factor, + get_row(2, y)); + } + } else { + float* const JXL_RESTRICT row_out = get_row(c, y); + if (full_image.bitdepth < 23) { + HWY_DYNAMIC_DISPATCH(SingleFromSingle) + (xsize_shifted, row_in, factor, row_out); + } else { + SingleFromSingleAccurate(xsize_shifted, row_in, factor, + row_out); + } + } + }, + "ModularIntToFloat")); + } + if (rgb_from_gray) { + break; + } + } + if (rgb_from_gray) { + c = 1; + } + } + size_t num_extra_channels = metadata->m.num_extra_channels; + for (size_t ec = 0; ec < num_extra_channels; ec++, c++) { + const ExtraChannelInfo& eci = metadata->m.extra_channel_info[ec]; + int bits = eci.bit_depth.bits_per_sample; + int exp_bits = eci.bit_depth.exponent_bits_per_sample; + bool fp = eci.bit_depth.floating_point_sample; + JXL_ASSERT(fp || bits < 32); + const double factor = fp ? 0 : (1.0 / ((1u << bits) - 1)); + JXL_ASSERT(c < gi.channel.size()); + Channel& ch_in = gi.channel[c]; + Rect r = render_pipeline_input.GetBuffer(3 + ec).second; + Rect mr(modular_rect.x0() >> ch_in.hshift, + modular_rect.y0() >> ch_in.vshift, + DivCeil(modular_rect.xsize(), 1 << ch_in.hshift), + DivCeil(modular_rect.ysize(), 1 << ch_in.vshift)); + mr = mr.Crop(ch_in.plane); + if (r.ysize() != mr.ysize() || r.xsize() != mr.xsize()) { + return JXL_FAILURE("Dimension mismatch: trying to fit a %" PRIuS + "x%" PRIuS + " modular channel into " + "a %" PRIuS "x%" PRIuS " rect", + mr.xsize(), mr.ysize(), r.xsize(), r.ysize()); + } + for (size_t y = 0; y < r.ysize(); ++y) { + float* const JXL_RESTRICT row_out = + r.Row(render_pipeline_input.GetBuffer(3 + ec).first, y); + const pixel_type* const JXL_RESTRICT row_in = mr.Row(&ch_in.plane, y); + if (fp) { + int_to_float(row_in, row_out, r.xsize(), bits, exp_bits); + } else { + if (full_image.bitdepth < 23) { + HWY_DYNAMIC_DISPATCH(SingleFromSingle) + (r.xsize(), row_in, factor, row_out); + } else { + SingleFromSingleAccurate(r.xsize(), row_in, factor, row_out); + } + } + } + } + return true; +} + +Status ModularFrameDecoder::FinalizeDecoding(PassesDecoderState* dec_state, + jxl::ThreadPool* pool, + bool inplace) { + if (!use_full_image) return true; + Image gi = (inplace ? std::move(full_image) : full_image.clone()); + size_t xsize = gi.w; + size_t ysize = gi.h; + + JXL_DEBUG_V(3, "Finalizing decoding for modular image: %s", + gi.DebugString().c_str()); + + // Don't use threads if total image size is smaller than a group + if (xsize * ysize < frame_dim.group_dim * frame_dim.group_dim) pool = nullptr; + + // Undo the global transforms + gi.undo_transforms(global_header.wp_header, pool); + JXL_DASSERT(global_transform.empty()); + if (gi.error) return JXL_FAILURE("Undoing transforms failed"); + + for (size_t i = 0; i < dec_state->shared->frame_dim.num_groups; i++) { + dec_state->render_pipeline->ClearDone(i); + } + std::atomic has_error{false}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, dec_state->shared->frame_dim.num_groups, + [&](size_t num_threads) { + const auto& frame_header = dec_state->shared->frame_header; + bool use_group_ids = (frame_header.encoding == FrameEncoding::kVarDCT || + (frame_header.flags & FrameHeader::kNoise)); + return dec_state->render_pipeline->PrepareForThreads(num_threads, + use_group_ids); + }, + [&](const uint32_t group, size_t thread_id) { + RenderPipelineInput input = + dec_state->render_pipeline->GetInputBuffers(group, thread_id); + if (!ModularImageToDecodedRect(gi, dec_state, nullptr, input, + dec_state->shared->GroupRect(group))) { + has_error = true; + return; + } + input.Done(); + }, + "ModularToRect")); + if (has_error) { + return JXL_FAILURE("Error producing input to render pipeline"); + } + return true; +} + +static constexpr const float kAlmostZero = 1e-8f; + +Status ModularFrameDecoder::DecodeQuantTable( + size_t required_size_x, size_t required_size_y, BitReader* br, + QuantEncoding* encoding, size_t idx, + ModularFrameDecoder* modular_frame_decoder) { + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->qraw.qtable_den)); + if (encoding->qraw.qtable_den < kAlmostZero) { + // qtable[] values are already checked for <= 0 so the denominator may not + // be negative. + return JXL_FAILURE("Invalid qtable_den: value too small"); + } + Image image(required_size_x, required_size_y, 8, 3); + ModularOptions options; + if (modular_frame_decoder) { + JXL_RETURN_IF_ERROR(ModularGenericDecompress( + br, image, /*header=*/nullptr, + ModularStreamId::QuantTable(idx).ID(modular_frame_decoder->frame_dim), + &options, /*undo_transforms=*/true, &modular_frame_decoder->tree, + &modular_frame_decoder->code, &modular_frame_decoder->context_map)); + } else { + JXL_RETURN_IF_ERROR(ModularGenericDecompress(br, image, /*header=*/nullptr, + 0, &options, + /*undo_transforms=*/true)); + } + if (!encoding->qraw.qtable) { + encoding->qraw.qtable = new std::vector(); + } + encoding->qraw.qtable->resize(required_size_x * required_size_y * 3); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < required_size_y; y++) { + int32_t* JXL_RESTRICT row = image.channel[c].Row(y); + for (size_t x = 0; x < required_size_x; x++) { + (*encoding->qraw.qtable)[c * required_size_x * required_size_y + + y * required_size_x + x] = row[x]; + if (row[x] <= 0) { + return JXL_FAILURE("Invalid raw quantization table"); + } + } + } + } + return true; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_modular.h b/third-party/libjxl/libjxl/lib/jxl/dec_modular.h new file mode 100644 index 0000000000..aae643cf1f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_modular.h @@ -0,0 +1,140 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_MODULAR_H_ +#define LIB_JXL_DEC_MODULAR_H_ + +#include + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +struct ModularStreamId { + enum Kind { + kGlobalData, + kVarDCTDC, + kModularDC, + kACMetadata, + kQuantTable, + kModularAC + }; + Kind kind; + size_t quant_table_id; + size_t group_id; // DC or AC group id. + size_t pass_id; // Only for kModularAC. + size_t ID(const FrameDimensions& frame_dim) const { + size_t id = 0; + switch (kind) { + case kGlobalData: + id = 0; + break; + case kVarDCTDC: + id = 1 + group_id; + break; + case kModularDC: + id = 1 + frame_dim.num_dc_groups + group_id; + break; + case kACMetadata: + id = 1 + 2 * frame_dim.num_dc_groups + group_id; + break; + case kQuantTable: + id = 1 + 3 * frame_dim.num_dc_groups + quant_table_id; + break; + case kModularAC: + id = 1 + 3 * frame_dim.num_dc_groups + DequantMatrices::kNum + + frame_dim.num_groups * pass_id + group_id; + break; + }; + return id; + } + static ModularStreamId Global() { + return ModularStreamId{kGlobalData, 0, 0, 0}; + } + static ModularStreamId VarDCTDC(size_t group_id) { + return ModularStreamId{kVarDCTDC, 0, group_id, 0}; + } + static ModularStreamId ModularDC(size_t group_id) { + return ModularStreamId{kModularDC, 0, group_id, 0}; + } + static ModularStreamId ACMetadata(size_t group_id) { + return ModularStreamId{kACMetadata, 0, group_id, 0}; + } + static ModularStreamId QuantTable(size_t quant_table_id) { + JXL_ASSERT(quant_table_id < DequantMatrices::kNum); + return ModularStreamId{kQuantTable, quant_table_id, 0, 0}; + } + static ModularStreamId ModularAC(size_t group_id, size_t pass_id) { + return ModularStreamId{kModularAC, 0, group_id, pass_id}; + } + static size_t Num(const FrameDimensions& frame_dim, size_t passes) { + return ModularAC(0, passes).ID(frame_dim); + } + std::string DebugString() const; +}; + +class ModularFrameDecoder { + public: + void Init(const FrameDimensions& frame_dim) { this->frame_dim = frame_dim; } + Status DecodeGlobalInfo(BitReader* reader, const FrameHeader& frame_header, + bool allow_truncated_group); + Status DecodeGroup(const Rect& rect, BitReader* reader, int minShift, + int maxShift, const ModularStreamId& stream, bool zerofill, + PassesDecoderState* dec_state, + RenderPipelineInput* render_pipeline_input, + bool allow_truncated, bool* should_run_pipeline = nullptr); + // Decodes a VarDCT DC group (`group_id`) from the given `reader`. + Status DecodeVarDCTDC(size_t group_id, BitReader* reader, + PassesDecoderState* dec_state); + // Decodes a VarDCT AC Metadata group (`group_id`) from the given `reader`. + Status DecodeAcMetadata(size_t group_id, BitReader* reader, + PassesDecoderState* dec_state); + // Decodes a RAW quant table from `br` into the given `encoding`, of size + // `required_size_x x required_size_y`. If `modular_frame_decoder` is passed, + // its global tree is used, otherwise no global tree is used. + static Status DecodeQuantTable(size_t required_size_x, size_t required_size_y, + BitReader* br, QuantEncoding* encoding, + size_t idx, + ModularFrameDecoder* modular_frame_decoder); + // if inplace is true, this can only be called once + // if it is false, it can be called multiple times (e.g. for progressive + // steps) + Status FinalizeDecoding(PassesDecoderState* dec_state, jxl::ThreadPool* pool, + bool inplace); + bool have_dc() const { return have_something; } + void MaybeDropFullImage(); + bool UsesFullImage() const { return use_full_image; } + + private: + Status ModularImageToDecodedRect(Image& gi, PassesDecoderState* dec_state, + jxl::ThreadPool* pool, + RenderPipelineInput& render_pipeline_input, + Rect modular_rect); + + Image full_image; + std::vector global_transform; + FrameDimensions frame_dim; + bool do_color; + bool have_something; + bool use_full_image = true; + bool all_same_shift; + Tree tree; + ANSCode code; + std::vector context_map; + GroupHeader global_header; +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_MODULAR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_noise.cc b/third-party/libjxl/libjxl/lib/jxl/dec_noise.cc new file mode 100644 index 0000000000..275a6d0b21 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_noise.cc @@ -0,0 +1,131 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_noise.h" + +#include +#include +#include + +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dec_noise.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/xorshift128plus-inl.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Vec; + +using D = HWY_CAPPED(float, kBlockDim); +using DI = hwy::HWY_NAMESPACE::Rebind; +using DI8 = hwy::HWY_NAMESPACE::Repartition; + +// Converts one vector's worth of random bits to floats in [1, 2). +// NOTE: as the convolution kernel sums to 0, it doesn't matter if inputs are in +// [0, 1) or in [1, 2). +void BitsToFloat(const uint32_t* JXL_RESTRICT random_bits, + float* JXL_RESTRICT floats) { + const HWY_FULL(float) df; + const HWY_FULL(uint32_t) du; + + const auto bits = Load(du, random_bits); + // 1.0 + 23 random mantissa bits = [1, 2) + const auto rand12 = BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000))); + Store(rand12, df, floats); +} + +void RandomImage(Xorshift128Plus* rng, const Rect& rect, + ImageF* JXL_RESTRICT noise) { + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + + // May exceed the vector size, hence we have two loops over x below. + constexpr size_t kFloatsPerBatch = + Xorshift128Plus::N * sizeof(uint64_t) / sizeof(float); + HWY_ALIGN uint64_t batch[Xorshift128Plus::N] = {}; + + const HWY_FULL(float) df; + const size_t N = Lanes(df); + + for (size_t y = 0; y < ysize; ++y) { + float* JXL_RESTRICT row = rect.Row(noise, y); + + size_t x = 0; + // Only entire batches (avoids exceeding the image padding). + for (; x + kFloatsPerBatch < xsize; x += kFloatsPerBatch) { + rng->Fill(batch); + for (size_t i = 0; i < kFloatsPerBatch; i += Lanes(df)) { + BitsToFloat(reinterpret_cast(batch) + i, row + x + i); + } + } + + // Any remaining pixels, rounded up to vectors (safe due to padding). + rng->Fill(batch); + size_t batch_pos = 0; // < kFloatsPerBatch + for (; x < xsize; x += N) { + BitsToFloat(reinterpret_cast(batch) + batch_pos, + row + x); + batch_pos += N; + } + } +} +void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index, + size_t x0, size_t y0, const std::pair& plane0, + const std::pair& plane1, + const std::pair& plane2) { + HWY_ALIGN Xorshift128Plus rng(visible_frame_index, nonvisible_frame_index, x0, + y0); + RandomImage(&rng, plane0.second, plane0.first); + RandomImage(&rng, plane1.second, plane1.first); + RandomImage(&rng, plane2.second, plane2.first); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(Random3Planes); +void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index, + size_t x0, size_t y0, const std::pair& plane0, + const std::pair& plane1, + const std::pair& plane2) { + return HWY_DYNAMIC_DISPATCH(Random3Planes)(visible_frame_index, + nonvisible_frame_index, x0, y0, + plane0, plane1, plane2); +} + +void DecodeFloatParam(float precision, float* val, BitReader* br) { + const int absval_quant = br->ReadFixedBits<10>(); + *val = absval_quant / precision; +} + +Status DecodeNoise(BitReader* br, NoiseParams* noise_params) { + for (float& i : noise_params->lut) { + DecodeFloatParam(kNoisePrecision, &i, br); + } + return true; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_noise.h b/third-party/libjxl/libjxl/lib/jxl/dec_noise.h new file mode 100644 index 0000000000..ac05866470 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_noise.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_NOISE_H_ +#define LIB_JXL_DEC_NOISE_H_ + +// Noise synthesis. Currently disabled. + +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/image.h" +#include "lib/jxl/noise.h" + +namespace jxl { + +void Random3Planes(size_t visible_frame_index, size_t nonvisible_frame_index, + size_t x0, size_t y0, const std::pair& plane0, + const std::pair& plane1, + const std::pair& plane2); + +// Must only call if FrameHeader.flags.kNoise. +Status DecodeNoise(BitReader* br, NoiseParams* noise_params); + +} // namespace jxl + +#endif // LIB_JXL_DEC_NOISE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.cc b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.cc new file mode 100644 index 0000000000..56538bc232 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.cc @@ -0,0 +1,357 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_patch_dictionary.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/blending.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_frame.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/patch_dictionary_internal.h" + +namespace jxl { + +Status PatchDictionary::Decode(BitReader* br, size_t xsize, size_t ysize, + bool* uses_extra_channels) { + positions_.clear(); + std::vector context_map; + ANSCode code; + JXL_RETURN_IF_ERROR( + DecodeHistograms(br, kNumPatchDictionaryContexts, &code, &context_map)); + ANSSymbolReader decoder(&code, br); + + auto read_num = [&](size_t context) { + size_t r = decoder.ReadHybridUint(context, br, context_map); + return r; + }; + + size_t num_ref_patch = read_num(kNumRefPatchContext); + // Limit max memory usage of patches to about 66 bytes per pixel (assuming 8 + // bytes per size_t) + const size_t num_pixels = xsize * ysize; + const size_t max_ref_patches = 1024 + num_pixels / 4; + const size_t max_patches = max_ref_patches * 4; + const size_t max_blending_infos = max_patches * 4; + if (num_ref_patch > max_ref_patches) { + return JXL_FAILURE("Too many patches in dictionary"); + } + size_t num_ec = shared_->metadata->m.num_extra_channels; + + size_t total_patches = 0; + size_t next_size = 1; + + for (size_t id = 0; id < num_ref_patch; id++) { + PatchReferencePosition ref_pos; + ref_pos.ref = read_num(kReferenceFrameContext); + if (ref_pos.ref >= kMaxNumReferenceFrames || + shared_->reference_frames[ref_pos.ref].frame.xsize() == 0) { + return JXL_FAILURE("Invalid reference frame ID"); + } + if (!shared_->reference_frames[ref_pos.ref].ib_is_in_xyb) { + return JXL_FAILURE( + "Patches cannot use frames saved post color transforms"); + } + const ImageBundle& ib = shared_->reference_frames[ref_pos.ref].frame; + ref_pos.x0 = read_num(kPatchReferencePositionContext); + ref_pos.y0 = read_num(kPatchReferencePositionContext); + ref_pos.xsize = read_num(kPatchSizeContext) + 1; + ref_pos.ysize = read_num(kPatchSizeContext) + 1; + if (ref_pos.x0 + ref_pos.xsize > ib.xsize()) { + return JXL_FAILURE("Invalid position specified in reference frame"); + } + if (ref_pos.y0 + ref_pos.ysize > ib.ysize()) { + return JXL_FAILURE("Invalid position specified in reference frame"); + } + size_t id_count = read_num(kPatchCountContext) + 1; + total_patches += id_count; + if (total_patches > max_patches) { + return JXL_FAILURE("Too many patches in dictionary"); + } + if (next_size < total_patches) { + next_size *= 2; + next_size = std::min(next_size, max_patches); + } + if (next_size * (num_ec + 1) > max_blending_infos) { + return JXL_FAILURE("Too many patches in dictionary"); + } + positions_.reserve(next_size); + blendings_.reserve(next_size * (num_ec + 1)); + for (size_t i = 0; i < id_count; i++) { + PatchPosition pos; + pos.ref_pos_idx = ref_positions_.size(); + if (i == 0) { + pos.x = read_num(kPatchPositionContext); + pos.y = read_num(kPatchPositionContext); + } else { + ssize_t deltax = UnpackSigned(read_num(kPatchOffsetContext)); + if (deltax < 0 && static_cast(-deltax) > positions_.back().x) { + return JXL_FAILURE("Invalid patch: negative x coordinate (%" PRIuS + " base x %" PRIdS " delta x)", + positions_.back().x, deltax); + } + pos.x = positions_.back().x + deltax; + ssize_t deltay = UnpackSigned(read_num(kPatchOffsetContext)); + if (deltay < 0 && static_cast(-deltay) > positions_.back().y) { + return JXL_FAILURE("Invalid patch: negative y coordinate (%" PRIuS + " base y %" PRIdS " delta y)", + positions_.back().y, deltay); + } + pos.y = positions_.back().y + deltay; + } + if (pos.x + ref_pos.xsize > xsize) { + return JXL_FAILURE("Invalid patch x: at %" PRIuS " + %" PRIuS + " > %" PRIuS, + pos.x, ref_pos.xsize, xsize); + } + if (pos.y + ref_pos.ysize > ysize) { + return JXL_FAILURE("Invalid patch y: at %" PRIuS " + %" PRIuS + " > %" PRIuS, + pos.y, ref_pos.ysize, ysize); + } + for (size_t j = 0; j < num_ec + 1; j++) { + uint32_t blend_mode = read_num(kPatchBlendModeContext); + if (blend_mode >= uint32_t(PatchBlendMode::kNumBlendModes)) { + return JXL_FAILURE("Invalid patch blend mode: %u", blend_mode); + } + PatchBlending info; + info.mode = static_cast(blend_mode); + if (UsesAlpha(info.mode)) { + *uses_extra_channels = true; + } + if (info.mode != PatchBlendMode::kNone && j > 0) { + *uses_extra_channels = true; + } + if (UsesAlpha(info.mode) && + shared_->metadata->m.extra_channel_info.size() > 1) { + info.alpha_channel = read_num(kPatchAlphaChannelContext); + if (info.alpha_channel >= + shared_->metadata->m.extra_channel_info.size()) { + return JXL_FAILURE( + "Invalid alpha channel for blending: %u out of %u\n", + info.alpha_channel, + (uint32_t)shared_->metadata->m.extra_channel_info.size()); + } + } else { + info.alpha_channel = 0; + } + if (UsesClamp(info.mode)) { + info.clamp = read_num(kPatchClampContext); + } else { + info.clamp = false; + } + blendings_.push_back(info); + } + positions_.push_back(std::move(pos)); + } + ref_positions_.emplace_back(std::move(ref_pos)); + } + positions_.shrink_to_fit(); + + if (!decoder.CheckANSFinalState()) { + return JXL_FAILURE("ANS checksum failure."); + } + + ComputePatchTree(); + return true; +} + +int PatchDictionary::GetReferences() const { + int result = 0; + for (size_t i = 0; i < ref_positions_.size(); ++i) { + result |= (1 << static_cast(ref_positions_[i].ref)); + } + return result; +} + +namespace { +struct PatchInterval { + size_t idx; + size_t y0, y1; +}; +} // namespace + +void PatchDictionary::ComputePatchTree() { + patch_tree_.clear(); + num_patches_.clear(); + sorted_patches_y0_.clear(); + sorted_patches_y1_.clear(); + if (positions_.empty()) { + return; + } + // Create a y-interval for each patch. + std::vector intervals(positions_.size()); + for (size_t i = 0; i < positions_.size(); ++i) { + const auto& pos = positions_[i]; + intervals[i].idx = i; + intervals[i].y0 = pos.y; + intervals[i].y1 = pos.y + ref_positions_[pos.ref_pos_idx].ysize; + } + auto sort_by_y0 = [&intervals](size_t start, size_t end) { + std::sort(intervals.data() + start, intervals.data() + end, + [](const PatchInterval& i0, const PatchInterval& i1) { + return i0.y0 < i1.y0; + }); + }; + auto sort_by_y1 = [&intervals](size_t start, size_t end) { + std::sort(intervals.data() + start, intervals.data() + end, + [](const PatchInterval& i0, const PatchInterval& i1) { + return i0.y1 < i1.y1; + }); + }; + // Count the number of patches for each row. + sort_by_y1(0, intervals.size()); + num_patches_.resize(intervals.back().y1); + for (auto iv : intervals) { + for (size_t y = iv.y0; y < iv.y1; ++y) num_patches_[y]++; + } + PatchTreeNode root; + root.start = 0; + root.num = intervals.size(); + patch_tree_.push_back(root); + size_t next = 0; + while (next < patch_tree_.size()) { + auto& node = patch_tree_[next]; + size_t start = node.start; + size_t end = node.start + node.num; + // Choose the y_center for this node to be the median of interval starts. + sort_by_y0(start, end); + size_t middle_idx = start + node.num / 2; + node.y_center = intervals[middle_idx].y0; + // Divide the intervals in [start, end) into three groups: + // * those completely to the right of y_center: [right_start, end) + // * those overlapping y_center: [left_end, right_start) + // * those completely to the left of y_center: [start, left_end) + size_t right_start = middle_idx; + while (right_start < end && intervals[right_start].y0 == node.y_center) { + ++right_start; + } + sort_by_y1(start, right_start); + size_t left_end = right_start; + while (left_end > start && intervals[left_end - 1].y1 > node.y_center) { + --left_end; + } + // Fill in sorted_patches_y0_ and sorted_patches_y1_ for the current node. + node.num = right_start - left_end; + node.start = sorted_patches_y0_.size(); + for (ssize_t i = static_cast(right_start) - 1; + i >= static_cast(left_end); --i) { + sorted_patches_y1_.push_back({intervals[i].y1, intervals[i].idx}); + } + sort_by_y0(left_end, right_start); + for (size_t i = left_end; i < right_start; ++i) { + sorted_patches_y0_.push_back({intervals[i].y0, intervals[i].idx}); + } + // Create the left and right nodes (if not empty). + node.left_child = node.right_child = -1; + if (left_end > start) { + PatchTreeNode left; + left.start = start; + left.num = left_end - left.start; + patch_tree_[next].left_child = patch_tree_.size(); + patch_tree_.push_back(left); + } + if (right_start < end) { + PatchTreeNode right; + right.start = right_start; + right.num = end - right.start; + patch_tree_[next].right_child = patch_tree_.size(); + patch_tree_.push_back(right); + } + ++next; + } +} + +std::vector PatchDictionary::GetPatchesForRow(size_t y) const { + std::vector result; + if (y < num_patches_.size() && num_patches_[y] > 0) { + result.reserve(num_patches_[y]); + for (ssize_t tree_idx = 0; tree_idx != -1;) { + JXL_DASSERT(tree_idx < (ssize_t)patch_tree_.size()); + const auto& node = patch_tree_[tree_idx]; + if (y <= node.y_center) { + for (size_t i = 0; i < node.num; ++i) { + const auto& p = sorted_patches_y0_[node.start + i]; + if (y < p.first) break; + result.push_back(p.second); + } + tree_idx = y < node.y_center ? node.left_child : -1; + } else { + for (size_t i = 0; i < node.num; ++i) { + const auto& p = sorted_patches_y1_[node.start + i]; + if (y >= p.first) break; + result.push_back(p.second); + } + tree_idx = node.right_child; + } + } + // Ensure that he relative order of patches that affect the same pixels is + // preserved. This is important for patches that have a blend mode + // different from kAdd. + std::sort(result.begin(), result.end()); + } + return result; +} + +// Adds patches to a segment of `xsize` pixels, starting at `inout`, assumed +// to be located at position (x0, y) in the frame. +void PatchDictionary::AddOneRow(float* const* inout, size_t y, size_t x0, + size_t xsize) const { + size_t num_ec = shared_->metadata->m.num_extra_channels; + std::vector fg_ptrs(3 + num_ec); + for (size_t pos_idx : GetPatchesForRow(y)) { + const size_t blending_idx = pos_idx * (num_ec + 1); + const PatchPosition& pos = positions_[pos_idx]; + const PatchReferencePosition& ref_pos = ref_positions_[pos.ref_pos_idx]; + size_t by = pos.y; + size_t bx = pos.x; + size_t patch_xsize = ref_pos.xsize; + JXL_DASSERT(y >= by); + JXL_DASSERT(y < by + ref_pos.ysize); + size_t iy = y - by; + size_t ref = ref_pos.ref; + if (bx >= x0 + xsize) continue; + if (bx + patch_xsize < x0) continue; + size_t patch_x0 = std::max(bx, x0); + size_t patch_x1 = std::min(bx + patch_xsize, x0 + xsize); + for (size_t c = 0; c < 3; c++) { + fg_ptrs[c] = shared_->reference_frames[ref].frame.color().ConstPlaneRow( + c, ref_pos.y0 + iy) + + ref_pos.x0 + x0 - bx; + } + for (size_t i = 0; i < num_ec; i++) { + fg_ptrs[3 + i] = + shared_->reference_frames[ref].frame.extra_channels()[i].ConstRow( + ref_pos.y0 + iy) + + ref_pos.x0 + x0 - bx; + } + PerformBlending(inout, fg_ptrs.data(), inout, patch_x0 - x0, + patch_x1 - patch_x0, blendings_[blending_idx], + blendings_.data() + blending_idx + 1, + shared_->metadata->m.extra_channel_info); + } +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.h b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.h new file mode 100644 index 0000000000..a950e83e85 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_patch_dictionary.h @@ -0,0 +1,151 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_PATCH_DICTIONARY_H_ +#define LIB_JXL_DEC_PATCH_DICTIONARY_H_ + +// Chooses reference patches, and avoids encoding them once per occurrence. + +#include +#include +#include + +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/image.h" +#include "lib/jxl/opsin_params.h" + +namespace jxl { + +enum class PatchBlendMode : uint8_t { + // The new values are the old ones. Useful to skip some channels. + kNone = 0, + // The new values (in the crop) replace the old ones: sample = new + kReplace = 1, + // The new values (in the crop) get added to the old ones: sample = old + new + kAdd = 2, + // The new values (in the crop) get multiplied by the old ones: + // sample = old * new + // This blend mode is only supported if BlendColorSpace is kEncoded. The + // range of the new value matters for multiplication purposes, and its + // nominal range of 0..1 is computed the same way as this is done for the + // alpha values in kBlend and kAlphaWeightedAdd. + kMul = 3, + // The new values (in the crop) replace the old ones if alpha>0: + // For first alpha channel: + // alpha = old + new * (1 - old) + // For other channels if !alpha_associated: + // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha + // For other channels if alpha_associated: + // sample = (1 - new_alpha) * old + new + // The alpha formula applies to the alpha used for the division in the other + // channels formula, and applies to the alpha channel itself if its + // blend_channel value matches itself. + // If using kBlendAbove, new is the patch and old is the original image; if + // using kBlendBelow, the meaning is inverted. + kBlendAbove = 4, + kBlendBelow = 5, + // The new values (in the crop) are added to the old ones if alpha>0: + // For first alpha channel: sample = sample = old + new * (1 - old) + // For other channels: sample = old + alpha * new + kAlphaWeightedAddAbove = 6, + kAlphaWeightedAddBelow = 7, + kNumBlendModes, +}; + +inline bool UsesAlpha(PatchBlendMode mode) { + return mode == PatchBlendMode::kBlendAbove || + mode == PatchBlendMode::kBlendBelow || + mode == PatchBlendMode::kAlphaWeightedAddAbove || + mode == PatchBlendMode::kAlphaWeightedAddBelow; +} +inline bool UsesClamp(PatchBlendMode mode) { + return UsesAlpha(mode) || mode == PatchBlendMode::kMul; +} + +struct PatchBlending { + PatchBlendMode mode; + uint32_t alpha_channel; + bool clamp; +}; + +// Position and size of the patch in the reference frame. +struct PatchReferencePosition { + size_t ref, x0, y0, xsize, ysize; +}; + +struct PatchPosition { + // Position of top-left corner of the patch in the image. + size_t x, y; + size_t ref_pos_idx; +}; + +struct PassesSharedState; + +// Encoder-side helper class to encode the PatchesDictionary. +class PatchDictionaryEncoder; + +class PatchDictionary { + public: + PatchDictionary() = default; + + void SetPassesSharedState(const PassesSharedState* shared) { + shared_ = shared; + } + + bool HasAny() const { return !positions_.empty(); } + + Status Decode(BitReader* br, size_t xsize, size_t ysize, + bool* uses_extra_channels); + + void Clear() { + positions_.clear(); + ComputePatchTree(); + } + + // Adds patches to a segment of `xsize` pixels, starting at `inout`, assumed + // to be located at position (x0, y) in the frame. + void AddOneRow(float* const* inout, size_t y, size_t x0, size_t xsize) const; + + // Returns dependencies of this patch dictionary on reference frame ids as a + // bit mask: bits 0-3 indicate reference frame 0-3. + int GetReferences() const; + + std::vector GetPatchesForRow(size_t y) const; + + private: + friend class PatchDictionaryEncoder; + + const PassesSharedState* shared_; + std::vector positions_; + std::vector ref_positions_; + std::vector blendings_; + + // Interval tree on the y coordinates of the patches. + struct PatchTreeNode { + ssize_t left_child; + ssize_t right_child; + size_t y_center; + // Range of patches in sorted_patches_y0_ and sorted_patches_y1_ that + // contain the row y_center. + size_t start; + size_t num; + }; + std::vector patch_tree_; + // Number of patches for each row. + std::vector num_patches_; + std::vector> sorted_patches_y0_; + std::vector> sorted_patches_y1_; + + void ComputePatchTree(); +}; + +} // namespace jxl + +#endif // LIB_JXL_DEC_PATCH_DICTIONARY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_tone_mapping-inl.h b/third-party/libjxl/libjxl/lib/jxl/dec_tone_mapping-inl.h new file mode 100644 index 0000000000..ffe0c10bff --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_tone_mapping-inl.h @@ -0,0 +1,239 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JXL_DEC_TONE_MAPPING_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_DEC_TONE_MAPPING_INL_H_ +#undef LIB_JXL_DEC_TONE_MAPPING_INL_H_ +#else +#define LIB_JXL_DEC_TONE_MAPPING_INL_H_ +#endif + +#include + +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Clamp; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +template +class Rec2408ToneMapper { + private: + using V = hwy::HWY_NAMESPACE::Vec; + + public: + explicit Rec2408ToneMapper(std::pair source_range, + std::pair target_range, + const float primaries_luminances[3]) + : source_range_(source_range), + target_range_(target_range), + red_Y_(primaries_luminances[0]), + green_Y_(primaries_luminances[1]), + blue_Y_(primaries_luminances[2]) {} + + void ToneMap(V* red, V* green, V* blue) const { + const V luminance = Mul(Set(df_, source_range_.second), + (MulAdd(Set(df_, red_Y_), *red, + MulAdd(Set(df_, green_Y_), *green, + Mul(Set(df_, blue_Y_), *blue))))); + const V pq_mastering_min = Set(df_, pq_mastering_min_); + const V inv_pq_mastering_range = Set(df_, inv_pq_mastering_range_); + const V normalized_pq = Min( + Set(df_, 1.f), + Mul(Sub(InvEOTF(luminance), pq_mastering_min), inv_pq_mastering_range)); + const V ks = Set(df_, ks_); + const V e2 = + IfThenElse(Lt(normalized_pq, ks), normalized_pq, P(normalized_pq)); + const V one_minus_e2 = Sub(Set(df_, 1), e2); + const V one_minus_e2_2 = Mul(one_minus_e2, one_minus_e2); + const V one_minus_e2_4 = Mul(one_minus_e2_2, one_minus_e2_2); + const V b = Set(df_, min_lum_); + const V e3 = MulAdd(b, one_minus_e2_4, e2); + const V pq_mastering_range = Set(df_, pq_mastering_range_); + const V e4 = MulAdd(e3, pq_mastering_range, pq_mastering_min); + const V new_luminance = + Min(Set(df_, target_range_.second), + ZeroIfNegative( + Mul(Set(df_, 10000), TF_PQ().DisplayFromEncoded(df_, e4)))); + const V min_luminance = Set(df_, 1e-6f); + const auto use_cap = Le(luminance, min_luminance); + const V ratio = Div(new_luminance, Max(luminance, min_luminance)); + const V cap = Mul(new_luminance, Set(df_, inv_target_peak_)); + const V normalizer = Set(df_, normalizer_); + const V multiplier = Mul(ratio, normalizer); + for (V* const val : {red, green, blue}) { + *val = IfThenElse(use_cap, cap, Mul(*val, multiplier)); + } + } + + private: + V InvEOTF(const V luminance) const { + return TF_PQ().EncodedFromDisplay(df_, + Mul(luminance, Set(df_, 1. / 10000))); + } + float InvEOTF(const float luminance) const { + return TF_PQ().EncodedFromDisplay(luminance / 10000.0f); + } + V T(const V a) const { + const V ks = Set(df_, ks_); + const V inv_one_minus_ks = Set(df_, inv_one_minus_ks_); + return Mul(Sub(a, ks), inv_one_minus_ks); + } + V P(const V b) const { + const V t_b = T(b); + const V t_b_2 = Mul(t_b, t_b); + const V t_b_3 = Mul(t_b_2, t_b); + const V ks = Set(df_, ks_); + const V max_lum = Set(df_, max_lum_); + return MulAdd( + MulAdd(Set(df_, 2), t_b_3, MulAdd(Set(df_, -3), t_b_2, Set(df_, 1))), + ks, + MulAdd(Add(t_b_3, MulAdd(Set(df_, -2), t_b_2, t_b)), + Sub(Set(df_, 1), ks), + Mul(MulAdd(Set(df_, -2), t_b_3, Mul(Set(df_, 3), t_b_2)), + max_lum))); + } + + D df_; + const std::pair source_range_; + const std::pair target_range_; + const float red_Y_; + const float green_Y_; + const float blue_Y_; + + const float pq_mastering_min_ = InvEOTF(source_range_.first); + const float pq_mastering_max_ = InvEOTF(source_range_.second); + const float pq_mastering_range_ = pq_mastering_max_ - pq_mastering_min_; + const float inv_pq_mastering_range_ = 1.0f / pq_mastering_range_; + // TODO(eustas): divide instead of inverse-multiply? + const float min_lum_ = (InvEOTF(target_range_.first) - pq_mastering_min_) * + inv_pq_mastering_range_; + // TODO(eustas): divide instead of inverse-multiply? + const float max_lum_ = (InvEOTF(target_range_.second) - pq_mastering_min_) * + inv_pq_mastering_range_; + const float ks_ = 1.5f * max_lum_ - 0.5f; + + const float inv_one_minus_ks_ = 1.0f / std::max(1e-6f, 1.0f - ks_); + + const float normalizer_ = source_range_.second / target_range_.second; + const float inv_target_peak_ = 1.f / target_range_.second; +}; + +class HlgOOTF { + public: + explicit HlgOOTF(float source_luminance, float target_luminance, + const float primaries_luminances[3]) + : HlgOOTF(/*gamma=*/std::pow( + 1.111f, std::log2(target_luminance / source_luminance)), + primaries_luminances) {} + + static HlgOOTF FromSceneLight(float display_luminance, + const float primaries_luminances[3]) { + return HlgOOTF(/*gamma=*/1.2f * + std::pow(1.111f, std::log2(display_luminance / 1000.f)), + primaries_luminances); + } + + static HlgOOTF ToSceneLight(float display_luminance, + const float primaries_luminances[3]) { + return HlgOOTF( + /*gamma=*/(1 / 1.2f) * + std::pow(1.111f, -std::log2(display_luminance / 1000.f)), + primaries_luminances); + } + + template + void Apply(V* red, V* green, V* blue) const { + hwy::HWY_NAMESPACE::DFromV df; + if (!apply_ootf_) return; + const V luminance = + MulAdd(Set(df, red_Y_), *red, + MulAdd(Set(df, green_Y_), *green, Mul(Set(df, blue_Y_), *blue))); + const V ratio = + Min(FastPowf(df, luminance, Set(df, exponent_)), Set(df, 1e9)); + *red = Mul(*red, ratio); + *green = Mul(*green, ratio); + *blue = Mul(*blue, ratio); + } + + bool WarrantsGamutMapping() const { return apply_ootf_ && exponent_ < 0; } + + private: + explicit HlgOOTF(float gamma, const float luminances[3]) + : exponent_(gamma - 1), + red_Y_(luminances[0]), + green_Y_(luminances[1]), + blue_Y_(luminances[2]) {} + const float exponent_; + const bool apply_ootf_ = exponent_ < -0.01f || 0.01f < exponent_; + const float red_Y_; + const float green_Y_; + const float blue_Y_; +}; + +template +void GamutMap(V* red, V* green, V* blue, const float primaries_luminances[3], + float preserve_saturation = 0.1f) { + hwy::HWY_NAMESPACE::DFromV df; + const V luminance = + MulAdd(Set(df, primaries_luminances[0]), *red, + MulAdd(Set(df, primaries_luminances[1]), *green, + Mul(Set(df, primaries_luminances[2]), *blue))); + + // Desaturate out-of-gamut pixels. This is done by mixing each pixel + // with just enough gray of the target luminance to make all + // components non-negative. + // - For saturation preservation, if a component is still larger than + // 1 then the pixel is normalized to have a maximum component of 1. + // That will reduce its luminance. + // - For luminance preservation, getting all components below 1 is + // done by mixing in yet more gray. That will desaturate it further. + const V zero = Zero(df); + const V one = Set(df, 1); + V gray_mix_saturation = zero; + V gray_mix_luminance = zero; + for (const V* ch : {red, green, blue}) { + const V& val = *ch; + const V val_minus_gray = Sub(val, luminance); + const V inv_val_minus_gray = + Div(one, IfThenElse(Eq(val_minus_gray, zero), one, val_minus_gray)); + const V val_over_val_minus_gray = Mul(val, inv_val_minus_gray); + gray_mix_saturation = + IfThenElse(Ge(val_minus_gray, zero), gray_mix_saturation, + Max(gray_mix_saturation, val_over_val_minus_gray)); + gray_mix_luminance = + Max(gray_mix_luminance, + IfThenElse(Le(val_minus_gray, zero), gray_mix_saturation, + Sub(val_over_val_minus_gray, inv_val_minus_gray))); + } + const V gray_mix = Clamp( + MulAdd(Set(df, preserve_saturation), + Sub(gray_mix_saturation, gray_mix_luminance), gray_mix_luminance), + zero, one); + for (V* const ch : {red, green, blue}) { + V& val = *ch; + val = MulAdd(gray_mix, Sub(luminance, val), val); + } + const V max_clr = Max(Max(one, *red), Max(*green, *blue)); + const V normalizer = Div(one, max_clr); + for (V* const ch : {red, green, blue}) { + V& val = *ch; + val = Mul(val, normalizer); + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_DEC_TONE_MAPPING_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_transforms-inl.h b/third-party/libjxl/libjxl/lib/jxl/dec_transforms-inl.h new file mode 100644 index 0000000000..c2267d75e7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_transforms-inl.h @@ -0,0 +1,827 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_ +#undef LIB_JXL_DEC_TRANSFORMS_INL_H_ +#else +#define LIB_JXL_DEC_TRANSFORMS_INL_H_ +#endif + +#include + +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dct-inl.h" +#include "lib/jxl/dct_scales.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::MulAdd; + +// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which +// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the +// input block. +template +JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride, + float* output, const size_t output_stride, + float* JXL_RESTRICT block, + float* JXL_RESTRICT scratch_space) { + static_assert(LF_ROWS == ROWS, + "ReinterpretingDCT should only be called with LF == N"); + static_assert(LF_COLS == COLS, + "ReinterpretingDCT should only be called with LF == N"); + ComputeScaledDCT()(DCTFrom(input, input_stride), block, + scratch_space); + if (ROWS < COLS) { + for (size_t y = 0; y < LF_ROWS; y++) { + for (size_t x = 0; x < LF_COLS; x++) { + output[y * output_stride + x] = + block[y * COLS + x] * DCTTotalResampleScale(y) * + DCTTotalResampleScale(x); + } + } + } else { + for (size_t y = 0; y < LF_COLS; y++) { + for (size_t x = 0; x < LF_ROWS; x++) { + output[y * output_stride + x] = + block[y * ROWS + x] * DCTTotalResampleScale(y) * + DCTTotalResampleScale(x); + } + } + } +} + +template +void IDCT2TopBlock(const float* block, size_t stride_out, float* out) { + static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); + static_assert(S % 2 == 0, "S should be even"); + float temp[kDCTBlockSize]; + constexpr size_t num_2x2 = S / 2; + for (size_t y = 0; y < num_2x2; y++) { + for (size_t x = 0; x < num_2x2; x++) { + float c00 = block[y * kBlockDim + x]; + float c01 = block[y * kBlockDim + num_2x2 + x]; + float c10 = block[(y + num_2x2) * kBlockDim + x]; + float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x]; + float r00 = c00 + c01 + c10 + c11; + float r01 = c00 + c01 - c10 - c11; + float r10 = c00 - c01 + c10 - c11; + float r11 = c00 - c01 - c10 + c11; + temp[y * 2 * kBlockDim + x * 2] = r00; + temp[y * 2 * kBlockDim + x * 2 + 1] = r01; + temp[(y * 2 + 1) * kBlockDim + x * 2] = r10; + temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11; + } + } + for (size_t y = 0; y < S; y++) { + for (size_t x = 0; x < S; x++) { + out[y * stride_out + x] = temp[y * kBlockDim + x]; + } + } +} + +void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) { + HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = { + { + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + 0.25, + }, + { + 0.876902929799142f, + 0.2206518106944235f, + -0.10140050393753763f, + -0.1014005039375375f, + 0.2206518106944236f, + -0.10140050393753777f, + -0.10140050393753772f, + -0.10140050393753763f, + -0.10140050393753758f, + -0.10140050393753769f, + -0.1014005039375375f, + -0.10140050393753768f, + -0.10140050393753768f, + -0.10140050393753759f, + -0.10140050393753763f, + -0.10140050393753741f, + }, + { + 0.0, + 0.0, + 0.40670075830260755f, + 0.44444816619734445f, + 0.0, + 0.0, + 0.19574399372042936f, + 0.2929100136981264f, + -0.40670075830260716f, + -0.19574399372042872f, + 0.0, + 0.11379074460448091f, + -0.44444816619734384f, + -0.29291001369812636f, + -0.1137907446044814f, + 0.0, + }, + { + 0.0, + 0.0, + -0.21255748058288748f, + 0.3085497062849767f, + 0.0, + 0.4706702258572536f, + -0.1621205195722993f, + 0.0, + -0.21255748058287047f, + -0.16212051957228327f, + -0.47067022585725277f, + -0.1464291867126764f, + 0.3085497062849487f, + 0.0, + -0.14642918671266536f, + 0.4251149611657548f, + }, + { + 0.0, + -0.7071067811865474f, + 0.0, + 0.0, + 0.7071067811865476f, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + }, + { + -0.4105377591765233f, + 0.6235485373547691f, + -0.06435071657946274f, + -0.06435071657946266f, + 0.6235485373547694f, + -0.06435071657946284f, + -0.0643507165794628f, + -0.06435071657946274f, + -0.06435071657946272f, + -0.06435071657946279f, + -0.06435071657946266f, + -0.06435071657946277f, + -0.06435071657946277f, + -0.06435071657946273f, + -0.06435071657946274f, + -0.0643507165794626f, + }, + { + 0.0, + 0.0, + -0.4517556589999482f, + 0.15854503551840063f, + 0.0, + -0.04038515160822202f, + 0.0074182263792423875f, + 0.39351034269210167f, + -0.45175565899994635f, + 0.007418226379244351f, + 0.1107416575309343f, + 0.08298163094882051f, + 0.15854503551839705f, + 0.3935103426921022f, + 0.0829816309488214f, + -0.45175565899994796f, + }, + { + 0.0, + 0.0, + -0.304684750724869f, + 0.5112616136591823f, + 0.0, + 0.0, + -0.290480129728998f, + -0.06578701549142804f, + 0.304684750724884f, + 0.2904801297290076f, + 0.0, + -0.23889773523344604f, + -0.5112616136592012f, + 0.06578701549142545f, + 0.23889773523345467f, + 0.0, + }, + { + 0.0, + 0.0, + 0.3017929516615495f, + 0.25792362796341184f, + 0.0, + 0.16272340142866204f, + 0.09520022653475037f, + 0.0, + 0.3017929516615503f, + 0.09520022653475055f, + -0.16272340142866173f, + -0.35312385449816297f, + 0.25792362796341295f, + 0.0, + -0.3531238544981624f, + -0.6035859033230976f, + }, + { + 0.0, + 0.0, + 0.40824829046386274f, + 0.0, + 0.0, + 0.0, + 0.0, + -0.4082482904638628f, + -0.4082482904638635f, + 0.0, + 0.0, + -0.40824829046386296f, + 0.0, + 0.4082482904638634f, + 0.408248290463863f, + 0.0, + }, + { + 0.0, + 0.0, + 0.1747866975480809f, + 0.0812611176717539f, + 0.0, + 0.0, + -0.3675398009862027f, + -0.307882213957909f, + -0.17478669754808135f, + 0.3675398009862011f, + 0.0, + 0.4826689115059883f, + -0.08126111767175039f, + 0.30788221395790305f, + -0.48266891150598584f, + 0.0, + }, + { + 0.0, + 0.0, + -0.21105601049335784f, + 0.18567180916109802f, + 0.0, + 0.0, + 0.49215859013738733f, + -0.38525013709251915f, + 0.21105601049335806f, + -0.49215859013738905f, + 0.0, + 0.17419412659916217f, + -0.18567180916109904f, + 0.3852501370925211f, + -0.1741941265991621f, + 0.0, + }, + { + 0.0, + 0.0, + -0.14266084808807264f, + -0.3416446842253372f, + 0.0, + 0.7367497537172237f, + 0.24627107722075148f, + -0.08574019035519306f, + -0.14266084808807344f, + 0.24627107722075137f, + 0.14883399227113567f, + -0.04768680350229251f, + -0.3416446842253373f, + -0.08574019035519267f, + -0.047686803502292804f, + -0.14266084808807242f, + }, + { + 0.0, + 0.0, + -0.13813540350758585f, + 0.3302282550303788f, + 0.0, + 0.08755115000587084f, + -0.07946706605909573f, + -0.4613374887461511f, + -0.13813540350758294f, + -0.07946706605910261f, + 0.49724647109535086f, + 0.12538059448563663f, + 0.3302282550303805f, + -0.4613374887461554f, + 0.12538059448564315f, + -0.13813540350758452f, + }, + { + 0.0, + 0.0, + -0.17437602599651067f, + 0.0702790691196284f, + 0.0, + -0.2921026642334881f, + 0.3623817333531167f, + 0.0, + -0.1743760259965108f, + 0.36238173335311646f, + 0.29210266423348785f, + -0.4326608024727445f, + 0.07027906911962818f, + 0.0, + -0.4326608024727457f, + 0.34875205199302267f, + }, + { + 0.0, + 0.0, + 0.11354987314994337f, + -0.07417504595810355f, + 0.0, + 0.19402893032594343f, + -0.435190496523228f, + 0.21918684838857466f, + 0.11354987314994257f, + -0.4351904965232251f, + 0.5550443808910661f, + -0.25468277124066463f, + -0.07417504595810233f, + 0.2191868483885728f, + -0.25468277124066413f, + 0.1135498731499429f, + }, + }; + + const HWY_CAPPED(float, 16) d; + for (size_t i = 0; i < 16; i += Lanes(d)) { + auto pixel = Zero(d); + for (size_t j = 0; j < 16; j++) { + auto cf = Set(d, coeffs[j]); + auto basis = Load(d, k4x4AFVBasis[j] + i); + pixel = MulAdd(cf, basis, pixel); + } + Store(pixel, d, pixels + i); + } +} + +template +void AFVTransformToPixels(const float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT pixels, size_t pixels_stride) { + HWY_ALIGN float scratch_space[4 * 8]; + size_t afv_x = afv_kind & 1; + size_t afv_y = afv_kind / 2; + float dcs[3] = {}; + float block00 = coefficients[0]; + float block01 = coefficients[1]; + float block10 = coefficients[8]; + dcs[0] = (block00 + block10 + block01) * 4.0f; + dcs[1] = (block00 + block10 - block01); + dcs[2] = block00 - block10; + // IAFV: (even, even) positions. + HWY_ALIGN float coeff[4 * 4]; + coeff[0] = dcs[0]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + if (ix == 0 && iy == 0) continue; + coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2]; + } + } + HWY_ALIGN float block[4 * 8]; + AFVIDCT4x4(coeff, block); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] = + block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)]; + } + } + // IDCT4x4 in (odd, even) positions. + block[0] = dcs[1]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + if (ix == 0 && iy == 0) continue; + block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1]; + } + } + ComputeScaledIDCT<4, 4>()( + block, + DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), + pixels_stride), + scratch_space); + // IDCT4x8. + block[0] = dcs[2]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + if (ix == 0 && iy == 0) continue; + block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix]; + } + } + ComputeScaledIDCT<4, 8>()( + block, + DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), + scratch_space); +} + +HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategy::Type strategy, + float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT pixels, + size_t pixels_stride, + float* scratch_space) { + using Type = AcStrategy::Type; + switch (strategy) { + case Type::IDENTITY: { + float dcs[4] = {}; + float block00 = coefficients[0]; + float block01 = coefficients[1]; + float block10 = coefficients[8]; + float block11 = coefficients[9]; + dcs[0] = block00 + block01 + block10 + block11; + dcs[1] = block00 + block01 - block10 - block11; + dcs[2] = block00 - block01 + block10 - block11; + dcs[3] = block00 - block01 - block10 + block11; + for (size_t y = 0; y < 2; y++) { + for (size_t x = 0; x < 2; x++) { + float block_dc = dcs[y * 2 + x]; + float residual_sum = 0; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + if (ix == 0 && iy == 0) continue; + residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2]; + } + } + pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] = + block_dc - residual_sum * (1.0f / 16); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + if (ix == 1 && iy == 1) continue; + pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] = + coefficients[(y + iy * 2) * 8 + x + ix * 2] + + pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; + } + } + pixels[y * 4 * pixels_stride + x * 4] = + coefficients[(y + 2) * 8 + x + 2] + + pixels[(4 * y + 1) * pixels_stride + 4 * x + 1]; + } + } + break; + } + case Type::DCT8X4: { + float dcs[2] = {}; + float block0 = coefficients[0]; + float block1 = coefficients[8]; + dcs[0] = block0 + block1; + dcs[1] = block0 - block1; + for (size_t x = 0; x < 2; x++) { + HWY_ALIGN float block[4 * 8]; + block[0] = dcs[x]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + if (ix == 0 && iy == 0) continue; + block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix]; + } + } + ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride), + scratch_space); + } + break; + } + case Type::DCT4X8: { + float dcs[2] = {}; + float block0 = coefficients[0]; + float block1 = coefficients[8]; + dcs[0] = block0 + block1; + dcs[1] = block0 - block1; + for (size_t y = 0; y < 2; y++) { + HWY_ALIGN float block[4 * 8]; + block[0] = dcs[y]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + if (ix == 0 && iy == 0) continue; + block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix]; + } + } + ComputeScaledIDCT<4, 8>()( + block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride), + scratch_space); + } + break; + } + case Type::DCT4X4: { + float dcs[4] = {}; + float block00 = coefficients[0]; + float block01 = coefficients[1]; + float block10 = coefficients[8]; + float block11 = coefficients[9]; + dcs[0] = block00 + block01 + block10 + block11; + dcs[1] = block00 + block01 - block10 - block11; + dcs[2] = block00 - block01 + block10 - block11; + dcs[3] = block00 - block01 - block10 + block11; + for (size_t y = 0; y < 2; y++) { + for (size_t x = 0; x < 2; x++) { + HWY_ALIGN float block[4 * 4]; + block[0] = dcs[y * 2 + x]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + if (ix == 0 && iy == 0) continue; + block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2]; + } + } + ComputeScaledIDCT<4, 4>()( + block, + DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), + scratch_space); + } + } + break; + } + case Type::DCT2X2: { + HWY_ALIGN float coeffs[kDCTBlockSize]; + memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize); + IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs); + IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs); + IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs); + for (size_t y = 0; y < kBlockDim; y++) { + for (size_t x = 0; x < kBlockDim; x++) { + pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x]; + } + } + break; + } + case Type::DCT16X16: { + ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT16X8: { + ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT8X16: { + ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT32X8: { + ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT8X32: { + ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT32X16: { + ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT16X32: { + ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT32X32: { + ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT: { + ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::AFV0: { + AFVTransformToPixels<0>(coefficients, pixels, pixels_stride); + break; + } + case Type::AFV1: { + AFVTransformToPixels<1>(coefficients, pixels, pixels_stride); + break; + } + case Type::AFV2: { + AFVTransformToPixels<2>(coefficients, pixels, pixels_stride); + break; + } + case Type::AFV3: { + AFVTransformToPixels<3>(coefficients, pixels, pixels_stride); + break; + } + case Type::DCT64X32: { + ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT32X64: { + ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT64X64: { + ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT128X64: { + ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT64X128: { + ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT128X128: { + ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT256X128: { + ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT128X256: { + ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::DCT256X256: { + ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride), + scratch_space); + break; + } + case Type::kNumValidStrategies: + JXL_UNREACHABLE("Invalid strategy"); + } +} + +HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategy::Type strategy, + const float* dc, size_t dc_stride, + float* llf, + float* JXL_RESTRICT scratch) { + using Type = AcStrategy::Type; + HWY_ALIGN float warm_block[4 * 4]; + HWY_ALIGN float warm_scratch_space[4 * 4]; + switch (strategy) { + case Type::DCT16X8: { + ReinterpretingDCT( + dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT8X16: { + ReinterpretingDCT( + dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT16X16: { + ReinterpretingDCT( + dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT32X8: { + ReinterpretingDCT( + dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT8X32: { + ReinterpretingDCT( + dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT32X16: { + ReinterpretingDCT( + dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT16X32: { + ReinterpretingDCT( + dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT32X32: { + ReinterpretingDCT( + dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space); + break; + } + case Type::DCT64X32: { + ReinterpretingDCT( + dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4); + break; + } + case Type::DCT32X64: { + ReinterpretingDCT( + dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8); + break; + } + case Type::DCT64X64: { + ReinterpretingDCT( + dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8); + break; + } + case Type::DCT128X64: { + ReinterpretingDCT( + dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8); + break; + } + case Type::DCT64X128: { + ReinterpretingDCT( + dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16); + break; + } + case Type::DCT128X128: { + ReinterpretingDCT< + /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, + /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( + dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16); + break; + } + case Type::DCT256X128: { + ReinterpretingDCT< + /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, + /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( + dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16); + break; + } + case Type::DCT128X256: { + ReinterpretingDCT< + /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, + /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( + dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32); + break; + } + case Type::DCT256X256: { + ReinterpretingDCT< + /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, + /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( + dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32); + break; + } + case Type::DCT: + case Type::DCT2X2: + case Type::DCT4X4: + case Type::DCT4X8: + case Type::DCT8X4: + case Type::AFV0: + case Type::AFV1: + case Type::AFV2: + case Type::AFV3: + case Type::IDENTITY: + llf[0] = dc[0]; + break; + case Type::kNumValidStrategies: + JXL_UNREACHABLE("Invalid strategy"); + }; +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_DEC_TRANSFORMS_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.cc b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.cc new file mode 100644 index 0000000000..2d40740262 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.cc @@ -0,0 +1,42 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_transforms_testonly.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dec_transforms_testonly.cc" +#include +#include + +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/dec_transforms-inl.h" + +namespace jxl { + +#if HWY_ONCE +HWY_EXPORT(TransformToPixels); +void TransformToPixels(AcStrategy::Type strategy, + float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT pixels, size_t pixels_stride, + float* scratch_space) { + return HWY_DYNAMIC_DISPATCH(TransformToPixels)(strategy, coefficients, pixels, + pixels_stride, scratch_space); +} + +HWY_EXPORT(LowestFrequenciesFromDC); +void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy, + const float* dc, size_t dc_stride, float* llf, + float* JXL_RESTRICT scratch) { + return HWY_DYNAMIC_DISPATCH(LowestFrequenciesFromDC)(strategy, dc, dc_stride, + llf, scratch); +} + +HWY_EXPORT(AFVIDCT4x4); +void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) { + return HWY_DYNAMIC_DISPATCH(AFVIDCT4x4)(coeffs, pixels); +} +#endif // HWY_ONCE + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.h b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.h new file mode 100644 index 0000000000..f68481fda9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_transforms_testonly.h @@ -0,0 +1,33 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_ +#define LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_ + +// Facade for (non-inlined) inverse integral transforms. + +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +void TransformToPixels(AcStrategy::Type strategy, + float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT scratch_space); + +// Equivalent of the above for DC image. +void LowestFrequenciesFromDC(const jxl::AcStrategy::Type strategy, + const float* dc, size_t dc_stride, float* llf, + float* JXL_RESTRICT scratch); + +void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels); + +} // namespace jxl + +#endif // LIB_JXL_DEC_TRANSFORMS_TESTONLY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_xyb-inl.h b/third-party/libjxl/libjxl/lib/jxl/dec_xyb-inl.h new file mode 100644 index 0000000000..495693b257 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_xyb-inl.h @@ -0,0 +1,346 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// XYB -> linear sRGB helper function. + +#if defined(LIB_JXL_DEC_XYB_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_DEC_XYB_INL_H_ +#undef LIB_JXL_DEC_XYB_INL_H_ +#else +#define LIB_JXL_DEC_XYB_INL_H_ +#endif + +#include + +#include "lib/jxl/dec_xyb.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Broadcast; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Sub; + +// Inverts the pixel-wise RGB->XYB conversion in OpsinDynamicsImage() (including +// the gamma mixing and simple gamma). Avoids clamping to [0, 1] - out of (sRGB) +// gamut values may be in-gamut after transforming to a wider space. +// "inverse_matrix" points to 9 broadcasted vectors, which are the 3x3 entries +// of the (row-major) opsin absorbance matrix inverse. Pre-multiplying its +// entries by c is equivalent to multiplying linear_* by c afterwards. +template +HWY_INLINE HWY_MAYBE_UNUSED void XybToRgb(D d, const V opsin_x, const V opsin_y, + const V opsin_b, + const OpsinParams& opsin_params, + V* const HWY_RESTRICT linear_r, + V* const HWY_RESTRICT linear_g, + V* const HWY_RESTRICT linear_b) { +#if HWY_TARGET == HWY_SCALAR + const auto neg_bias_r = Set(d, opsin_params.opsin_biases[0]); + const auto neg_bias_g = Set(d, opsin_params.opsin_biases[1]); + const auto neg_bias_b = Set(d, opsin_params.opsin_biases[2]); +#else + const auto neg_bias_rgb = LoadDup128(d, opsin_params.opsin_biases); + const auto neg_bias_r = Broadcast<0>(neg_bias_rgb); + const auto neg_bias_g = Broadcast<1>(neg_bias_rgb); + const auto neg_bias_b = Broadcast<2>(neg_bias_rgb); +#endif + + // Color space: XYB -> RGB + auto gamma_r = Add(opsin_y, opsin_x); + auto gamma_g = Sub(opsin_y, opsin_x); + auto gamma_b = opsin_b; + + gamma_r = Sub(gamma_r, Set(d, opsin_params.opsin_biases_cbrt[0])); + gamma_g = Sub(gamma_g, Set(d, opsin_params.opsin_biases_cbrt[1])); + gamma_b = Sub(gamma_b, Set(d, opsin_params.opsin_biases_cbrt[2])); + + // Undo gamma compression: linear = gamma^3 for efficiency. + const auto gamma_r2 = Mul(gamma_r, gamma_r); + const auto gamma_g2 = Mul(gamma_g, gamma_g); + const auto gamma_b2 = Mul(gamma_b, gamma_b); + const auto mixed_r = MulAdd(gamma_r2, gamma_r, neg_bias_r); + const auto mixed_g = MulAdd(gamma_g2, gamma_g, neg_bias_g); + const auto mixed_b = MulAdd(gamma_b2, gamma_b, neg_bias_b); + + const float* HWY_RESTRICT inverse_matrix = opsin_params.inverse_opsin_matrix; + + // Unmix (multiply by 3x3 inverse_matrix) + // TODO(eustas): ref would be more readable than pointer + *linear_r = Mul(LoadDup128(d, &inverse_matrix[0 * 4]), mixed_r); + *linear_g = Mul(LoadDup128(d, &inverse_matrix[3 * 4]), mixed_r); + *linear_b = Mul(LoadDup128(d, &inverse_matrix[6 * 4]), mixed_r); + *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[1 * 4]), mixed_g, *linear_r); + *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[4 * 4]), mixed_g, *linear_g); + *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[7 * 4]), mixed_g, *linear_b); + *linear_r = MulAdd(LoadDup128(d, &inverse_matrix[2 * 4]), mixed_b, *linear_r); + *linear_g = MulAdd(LoadDup128(d, &inverse_matrix[5 * 4]), mixed_b, *linear_g); + *linear_b = MulAdd(LoadDup128(d, &inverse_matrix[8 * 4]), mixed_b, *linear_b); +} + +static inline HWY_MAYBE_UNUSED bool HasFastXYBTosRGB8() { +#if HWY_TARGET == HWY_NEON + return true; +#else + return false; +#endif +} + +static inline HWY_MAYBE_UNUSED void FastXYBTosRGB8(const float* input[4], + uint8_t* output, + bool is_rgba, size_t xsize) { + // This function is very NEON-specific. As such, it uses intrinsics directly. +#if HWY_TARGET == HWY_NEON + // WARNING: doing fixed point arithmetic correctly is very complicated. + // Changes to this function should be thoroughly tested. + + // Note that the input is assumed to have 13 bits of mantissa, and the output + // will have 14 bits. + auto srgb_tf = [&](int16x8_t v16) { + int16x8_t clz = vclzq_s16(v16); + // Convert to [0.25, 0.5) range. + int16x8_t v025_05_16 = vqshlq_s16(v16, vqsubq_s16(clz, vdupq_n_s16(2))); + + // third degree polynomial approximation between 0.25 and 0.5 + // of 1.055/2^(7/2.4) * x^(1/2.4) / 32. + // poly ~ ((0.95x-1.75)*x+1.72)*x+0.29 + // We actually compute ~ ((0.47x-0.87)*x+0.86)*(2x)+0.29 as 1.75 and 1.72 + // overflow our fixed point representation. + + int16x8_t twov = vqaddq_s16(v025_05_16, v025_05_16); + + // 0.47 * x + int16x8_t step1 = vqrdmulhq_n_s16(v025_05_16, 15706); + // - 0.87 + int16x8_t step2 = vsubq_s16(step1, vdupq_n_s16(28546)); + // * x + int16x8_t step3 = vqrdmulhq_s16(step2, v025_05_16); + // + 0.86 + int16x8_t step4 = vaddq_s16(step3, vdupq_n_s16(28302)); + // * 2x + int16x8_t step5 = vqrdmulhq_s16(step4, twov); + // + 0.29 + int16x8_t mul16 = vaddq_s16(step5, vdupq_n_s16(9485)); + + int16x8_t exp16 = vsubq_s16(vdupq_n_s16(11), clz); + // Compute 2**(1/2.4*exp16)/32. Values of exp16 that would overflow are + // capped to 1. + // Generated with the following Python script: + // a = [] + // b = [] + // + // for i in range(0, 16): + // v = 2**(5/12.*i) + // v /= 16 + // v *= 256 * 128 + // v = int(v) + // a.append(v // 256) + // b.append(v % 256) + // + // print(", ".join("0x%02x" % x for x in a)) + // + // print(", ".join("0x%02x" % x for x in b)) + + HWY_ALIGN constexpr uint8_t k2to512powersm1div32_high[16] = { + 0x08, 0x0a, 0x0e, 0x13, 0x19, 0x21, 0x2d, 0x3c, + 0x50, 0x6b, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, 0x8f, + }; + HWY_ALIGN constexpr uint8_t k2to512powersm1div32_low[16] = { + 0x00, 0xad, 0x41, 0x06, 0x65, 0xe7, 0x41, 0x68, + 0xa2, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + // Using the highway implementation here since vqtbl1q is aarch64-only. + using hwy::HWY_NAMESPACE::Vec128; + uint8x16_t pow_low = + TableLookupBytes( + Vec128(vld1q_u8(k2to512powersm1div32_low)), + Vec128(vreinterpretq_u8_s16(exp16))) + .raw; + uint8x16_t pow_high = + TableLookupBytes( + Vec128(vld1q_u8(k2to512powersm1div32_high)), + Vec128(vreinterpretq_u8_s16(exp16))) + .raw; + int16x8_t pow16 = vreinterpretq_s16_u16(vsliq_n_u16( + vreinterpretq_u16_u8(pow_low), vreinterpretq_u16_u8(pow_high), 8)); + + // approximation of v * 12.92, divided by 2 + // Note that our input is using 13 mantissa bits instead of 15. + int16x8_t v16_linear = vrshrq_n_s16(vmulq_n_s16(v16, 826), 5); + // 1.055*pow(v, 1/2.4) - 0.055, divided by 2 + auto v16_pow = vsubq_s16(vqrdmulhq_s16(mul16, pow16), vdupq_n_s16(901)); + // > 0.0031308f (note that v16 has 13 mantissa bits) + return vbslq_s16(vcgeq_s16(v16, vdupq_n_s16(26)), v16_pow, v16_linear); + }; + + const float* JXL_RESTRICT row_in_x = input[0]; + const float* JXL_RESTRICT row_in_y = input[1]; + const float* JXL_RESTRICT row_in_b = input[2]; + const float* JXL_RESTRICT row_in_a = input[3]; + for (size_t x = 0; x < xsize; x += 8) { + // Normal ranges for xyb for in-gamut sRGB colors: + // x: -0.015386 0.028100 + // y: 0.000000 0.845308 + // b: 0.000000 0.845308 + + // We actually want x * 8 to have some extra precision. + // TODO(veluca): consider different approaches here, like vld1q_f32_x2. + float32x4_t opsin_x_left = vld1q_f32(row_in_x + x); + int16x4_t opsin_x16_times8_left = + vqmovn_s32(vcvtq_n_s32_f32(opsin_x_left, 18)); + float32x4_t opsin_x_right = + vld1q_f32(row_in_x + x + (x + 4 < xsize ? 4 : 0)); + int16x4_t opsin_x16_times8_right = + vqmovn_s32(vcvtq_n_s32_f32(opsin_x_right, 18)); + int16x8_t opsin_x16_times8 = + vcombine_s16(opsin_x16_times8_left, opsin_x16_times8_right); + + float32x4_t opsin_y_left = vld1q_f32(row_in_y + x); + int16x4_t opsin_y16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_left, 15)); + float32x4_t opsin_y_right = + vld1q_f32(row_in_y + x + (x + 4 < xsize ? 4 : 0)); + int16x4_t opsin_y16_right = vqmovn_s32(vcvtq_n_s32_f32(opsin_y_right, 15)); + int16x8_t opsin_y16 = vcombine_s16(opsin_y16_left, opsin_y16_right); + + float32x4_t opsin_b_left = vld1q_f32(row_in_b + x); + int16x4_t opsin_b16_left = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_left, 15)); + float32x4_t opsin_b_right = + vld1q_f32(row_in_b + x + (x + 4 < xsize ? 4 : 0)); + int16x4_t opsin_b16_right = vqmovn_s32(vcvtq_n_s32_f32(opsin_b_right, 15)); + int16x8_t opsin_b16 = vcombine_s16(opsin_b16_left, opsin_b16_right); + + int16x8_t neg_bias16 = vdupq_n_s16(-124); // -0.0037930732552754493 + int16x8_t neg_bias_cbrt16 = vdupq_n_s16(-5110); // -0.155954201 + int16x8_t neg_bias_half16 = vdupq_n_s16(-62); + + // Color space: XYB -> RGB + // Compute ((y+x-bias_cbrt)^3-(y-x-bias_cbrt)^3)/2, + // ((y+x-bias_cbrt)^3+(y-x-bias_cbrt)^3)/2+bias, (b-bias_cbrt)^3+bias. + // Note that ignoring x2 in the formulas below (as x << y) results in + // errors of at least 3 in the final sRGB values. + int16x8_t opsin_yp16 = vqsubq_s16(opsin_y16, neg_bias_cbrt16); + int16x8_t ysq16 = vqrdmulhq_s16(opsin_yp16, opsin_yp16); + int16x8_t twentyfourx16 = vmulq_n_s16(opsin_x16_times8, 3); + int16x8_t twentyfourxy16 = vqrdmulhq_s16(opsin_yp16, twentyfourx16); + int16x8_t threexsq16 = + vrshrq_n_s16(vqrdmulhq_s16(opsin_x16_times8, twentyfourx16), 6); + + // We can ignore x^3 here. Note that this is multiplied by 8. + int16x8_t mixed_rmg16 = vqrdmulhq_s16(twentyfourxy16, opsin_yp16); + + int16x8_t mixed_rpg_sos_half = vhaddq_s16(ysq16, threexsq16); + int16x8_t mixed_rpg16 = vhaddq_s16( + vqrdmulhq_s16(opsin_yp16, mixed_rpg_sos_half), neg_bias_half16); + + int16x8_t gamma_b16 = vqsubq_s16(opsin_b16, neg_bias_cbrt16); + int16x8_t gamma_bsq16 = vqrdmulhq_s16(gamma_b16, gamma_b16); + int16x8_t gamma_bcb16 = vqrdmulhq_s16(gamma_bsq16, gamma_b16); + int16x8_t mixed_b16 = vqaddq_s16(gamma_bcb16, neg_bias16); + // mixed_rpg and mixed_b are in 0-1 range. + // mixed_rmg has a smaller range (-0.035 to 0.035 for valid sRGB). Note + // that at this point it is already multiplied by 8. + + // We multiply all the mixed values by 1/4 (i.e. shift them to 13-bit + // fixed point) to ensure intermediate quantities are in range. Note that + // r-g is not shifted, and was x8 before here; this corresponds to a x32 + // overall multiplicative factor and ensures that all the matrix constants + // are in 0-1 range. + // Similarly, mixed_rpg16 is already multiplied by 1/4 because of the two + // vhadd + using neg_bias_half. + mixed_b16 = vshrq_n_s16(mixed_b16, 2); + + // Unmix (multiply by 3x3 inverse_matrix) + // For increased precision, we use a matrix for converting from + // ((mixed_r - mixed_g)/2, (mixed_r + mixed_g)/2, mixed_b) to rgb. This + // avoids cancellation effects when computing (y+x)^3-(y-x)^3. + // We compute mixed_rpg - mixed_b because the (1+c)*mixed_rpg - c * + // mixed_b pattern is repeated frequently in the code below. This allows + // us to save a multiply per channel, and removes the presence of + // some constants above 1. Moreover, mixed_rmg - mixed_b is in (-1, 1) + // range, so the subtraction is safe. + // All the magic-looking constants here are derived by computing the + // inverse opsin matrix for the transformation modified as described + // above. + + // Precomputation common to multiple color values. + int16x8_t mixed_rpgmb16 = vqsubq_s16(mixed_rpg16, mixed_b16); + int16x8_t mixed_rpgmb_times_016 = vqrdmulhq_n_s16(mixed_rpgmb16, 5394); + int16x8_t mixed_rg16 = vqaddq_s16(mixed_rpgmb_times_016, mixed_rpg16); + + // R + int16x8_t linear_r16 = + vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, 21400)); + + // G + int16x8_t linear_g16 = + vqaddq_s16(mixed_rg16, vqrdmulhq_n_s16(mixed_rmg16, -7857)); + + // B + int16x8_t linear_b16 = vqrdmulhq_n_s16(mixed_rpgmb16, -30996); + linear_b16 = vqaddq_s16(linear_b16, mixed_b16); + linear_b16 = vqaddq_s16(linear_b16, vqrdmulhq_n_s16(mixed_rmg16, -6525)); + + // Apply SRGB transfer function. + int16x8_t r = srgb_tf(linear_r16); + int16x8_t g = srgb_tf(linear_g16); + int16x8_t b = srgb_tf(linear_b16); + + uint8x8_t r8 = + vqmovun_s16(vrshrq_n_s16(vsubq_s16(r, vshrq_n_s16(r, 8)), 6)); + uint8x8_t g8 = + vqmovun_s16(vrshrq_n_s16(vsubq_s16(g, vshrq_n_s16(g, 8)), 6)); + uint8x8_t b8 = + vqmovun_s16(vrshrq_n_s16(vsubq_s16(b, vshrq_n_s16(b, 8)), 6)); + + size_t n = xsize - x; + if (is_rgba) { + float32x4_t a_f32_left = + row_in_a ? vld1q_f32(row_in_a + x) : vdupq_n_f32(1.0f); + float32x4_t a_f32_right = + row_in_a ? vld1q_f32(row_in_a + x + (x + 4 < xsize ? 4 : 0)) + : vdupq_n_f32(1.0f); + int16x4_t a16_left = vqmovn_s32(vcvtq_n_s32_f32(a_f32_left, 8)); + int16x4_t a16_right = vqmovn_s32(vcvtq_n_s32_f32(a_f32_right, 8)); + uint8x8_t a8 = vqmovun_s16(vcombine_s16(a16_left, a16_right)); + uint8_t* buf = output + 4 * x; + uint8x8x4_t data = {r8, g8, b8, a8}; + if (n >= 8) { + vst4_u8(buf, data); + } else { + uint8_t tmp[8 * 4]; + vst4_u8(tmp, data); + memcpy(buf, tmp, n * 4); + } + } else { + uint8_t* buf = output + 3 * x; + uint8x8x3_t data = {r8, g8, b8}; + if (n >= 8) { + vst3_u8(buf, data); + } else { + uint8_t tmp[8 * 3]; + vst3_u8(tmp, data); + memcpy(buf, tmp, n * 3); + } + } + } +#else + (void)input; + (void)output; + (void)is_rgba; + (void)xsize; + JXL_UNREACHABLE("Unreachable"); +#endif +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_DEC_XYB_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_xyb.cc b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.cc new file mode 100644 index 0000000000..bbd373f239 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.cc @@ -0,0 +1,325 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/dec_xyb.h" + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/dec_xyb.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_group_border.h" +#include "lib/jxl/dec_xyb-inl.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" +#include "lib/jxl/matrix_ops.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/quantizer.h" +#include "lib/jxl/sanitizers.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::MulAdd; + +void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, + const OpsinParams& opsin_params) { + JXL_CHECK_IMAGE_INITIALIZED(*inout, Rect(*inout)); + + const size_t xsize = inout->xsize(); // not padded + JXL_CHECK(RunOnPool( + pool, 0, inout->ysize(), ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + + // Faster than adding via ByteOffset at end of loop. + float* JXL_RESTRICT row0 = inout->PlaneRow(0, y); + float* JXL_RESTRICT row1 = inout->PlaneRow(1, y); + float* JXL_RESTRICT row2 = inout->PlaneRow(2, y); + + const HWY_FULL(float) d; + + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto in_opsin_x = Load(d, row0 + x); + const auto in_opsin_y = Load(d, row1 + x); + const auto in_opsin_b = Load(d, row2 + x); + auto linear_r = Undefined(d); + auto linear_g = Undefined(d); + auto linear_b = Undefined(d); + XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params, + &linear_r, &linear_g, &linear_b); + + Store(linear_r, d, row0 + x); + Store(linear_g, d, row1 + x); + Store(linear_b, d, row2 + x); + } + }, + "OpsinToLinear")); +} + +// Same, but not in-place. +void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, + Image3F* JXL_RESTRICT linear, + const OpsinParams& opsin_params) { + JXL_ASSERT(SameSize(rect, *linear)); + JXL_CHECK_IMAGE_INITIALIZED(opsin, rect); + + JXL_CHECK(RunOnPool( + pool, 0, static_cast(rect.ysize()), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const size_t y = static_cast(task); + + // Faster than adding via ByteOffset at end of loop. + const float* JXL_RESTRICT row_opsin_0 = rect.ConstPlaneRow(opsin, 0, y); + const float* JXL_RESTRICT row_opsin_1 = rect.ConstPlaneRow(opsin, 1, y); + const float* JXL_RESTRICT row_opsin_2 = rect.ConstPlaneRow(opsin, 2, y); + float* JXL_RESTRICT row_linear_0 = linear->PlaneRow(0, y); + float* JXL_RESTRICT row_linear_1 = linear->PlaneRow(1, y); + float* JXL_RESTRICT row_linear_2 = linear->PlaneRow(2, y); + + const HWY_FULL(float) d; + + for (size_t x = 0; x < rect.xsize(); x += Lanes(d)) { + const auto in_opsin_x = Load(d, row_opsin_0 + x); + const auto in_opsin_y = Load(d, row_opsin_1 + x); + const auto in_opsin_b = Load(d, row_opsin_2 + x); + auto linear_r = Undefined(d); + auto linear_g = Undefined(d); + auto linear_b = Undefined(d); + XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params, + &linear_r, &linear_g, &linear_b); + + Store(linear_r, d, row_linear_0 + x); + Store(linear_g, d, row_linear_1 + x); + Store(linear_b, d, row_linear_2 + x); + } + }, + "OpsinToLinear(Rect)")); + JXL_CHECK_IMAGE_INITIALIZED(*linear, rect); +} + +// Transform YCbCr to RGB. +// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B). +void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) { + JXL_CHECK_IMAGE_INITIALIZED(ycbcr, rect); + const HWY_CAPPED(float, kBlockDim) df; + const size_t S = Lanes(df); // Step. + + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + if ((xsize == 0) || (ysize == 0)) return; + + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto c128 = Set(df, 128.0f / 255); + const auto crcr = Set(df, 1.402f); + const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f); + const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f); + const auto cbcb = Set(df, 1.772f); + + for (size_t y = 0; y < ysize; y++) { + const float* y_row = rect.ConstPlaneRow(ycbcr, 1, y); + const float* cb_row = rect.ConstPlaneRow(ycbcr, 0, y); + const float* cr_row = rect.ConstPlaneRow(ycbcr, 2, y); + float* r_row = rect.PlaneRow(rgb, 0, y); + float* g_row = rect.PlaneRow(rgb, 1, y); + float* b_row = rect.PlaneRow(rgb, 2, y); + for (size_t x = 0; x < xsize; x += S) { + const auto y_vec = Add(Load(df, y_row + x), c128); + const auto cb_vec = Load(df, cb_row + x); + const auto cr_vec = Load(df, cr_row + x); + const auto r_vec = MulAdd(crcr, cr_vec, y_vec); + const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec)); + const auto b_vec = MulAdd(cbcb, cb_vec, y_vec); + Store(r_vec, df, r_row + x); + Store(g_vec, df, g_row + x); + Store(b_vec, df, b_row + x); + } + } + JXL_CHECK_IMAGE_INITIALIZED(*rgb, rect); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(OpsinToLinearInplace); +void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, + const OpsinParams& opsin_params) { + return HWY_DYNAMIC_DISPATCH(OpsinToLinearInplace)(inout, pool, opsin_params); +} + +HWY_EXPORT(OpsinToLinear); +void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, + Image3F* JXL_RESTRICT linear, + const OpsinParams& opsin_params) { + return HWY_DYNAMIC_DISPATCH(OpsinToLinear)(opsin, rect, pool, linear, + opsin_params); +} + +HWY_EXPORT(YcbcrToRgb); +void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect) { + return HWY_DYNAMIC_DISPATCH(YcbcrToRgb)(ycbcr, rgb, rect); +} + +HWY_EXPORT(HasFastXYBTosRGB8); +bool HasFastXYBTosRGB8() { return HWY_DYNAMIC_DISPATCH(HasFastXYBTosRGB8)(); } + +HWY_EXPORT(FastXYBTosRGB8); +void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba, + size_t xsize) { + return HWY_DYNAMIC_DISPATCH(FastXYBTosRGB8)(input, output, is_rgba, xsize); +} + +void OpsinParams::Init(float intensity_target) { + InitSIMDInverseMatrix(GetOpsinAbsorbanceInverseMatrix(), inverse_opsin_matrix, + intensity_target); + memcpy(opsin_biases, kNegOpsinAbsorbanceBiasRGB, + sizeof(kNegOpsinAbsorbanceBiasRGB)); + memcpy(quant_biases, kDefaultQuantBias, sizeof(kDefaultQuantBias)); + for (size_t c = 0; c < 4; c++) { + opsin_biases_cbrt[c] = cbrtf(opsin_biases[c]); + } +} + +bool CanOutputToColorEncoding(const ColorEncoding& c_desired) { + if (!c_desired.HaveFields()) { + return false; + } + // TODO(veluca): keep in sync with dec_reconstruct.cc + if (!c_desired.tf.IsPQ() && !c_desired.tf.IsSRGB() && + !c_desired.tf.IsGamma() && !c_desired.tf.IsLinear() && + !c_desired.tf.IsHLG() && !c_desired.tf.IsDCI() && !c_desired.tf.Is709()) { + return false; + } + if (c_desired.IsGray() && c_desired.white_point != WhitePoint::kD65) { + // TODO(veluca): figure out what should happen here. + return false; + } + return true; +} + +Status OutputEncodingInfo::SetFromMetadata(const CodecMetadata& metadata) { + orig_color_encoding = metadata.m.color_encoding; + orig_intensity_target = metadata.m.IntensityTarget(); + desired_intensity_target = orig_intensity_target; + const auto& im = metadata.transform_data.opsin_inverse_matrix; + memcpy(orig_inverse_matrix, im.inverse_matrix, sizeof(orig_inverse_matrix)); + default_transform = im.all_default; + xyb_encoded = metadata.m.xyb_encoded; + std::copy(std::begin(im.opsin_biases), std::end(im.opsin_biases), + opsin_params.opsin_biases); + for (int i = 0; i < 3; ++i) { + opsin_params.opsin_biases_cbrt[i] = cbrtf(opsin_params.opsin_biases[i]); + } + opsin_params.opsin_biases_cbrt[3] = opsin_params.opsin_biases[3] = 1; + std::copy(std::begin(im.quant_biases), std::end(im.quant_biases), + opsin_params.quant_biases); + bool orig_ok = CanOutputToColorEncoding(orig_color_encoding); + bool orig_grey = orig_color_encoding.IsGray(); + return SetColorEncoding(!xyb_encoded || orig_ok + ? orig_color_encoding + : ColorEncoding::LinearSRGB(orig_grey)); +} + +Status OutputEncodingInfo::MaybeSetColorEncoding( + const ColorEncoding& c_desired) { + if (c_desired.GetColorSpace() == ColorSpace::kXYB && + ((color_encoding.GetColorSpace() == ColorSpace::kRGB && + color_encoding.primaries != Primaries::kSRGB) || + color_encoding.tf.IsPQ())) { + return false; + } + if (!xyb_encoded && !CanOutputToColorEncoding(c_desired)) { + return false; + } + return SetColorEncoding(c_desired); +} + +Status OutputEncodingInfo::SetColorEncoding(const ColorEncoding& c_desired) { + color_encoding = c_desired; + color_encoding_is_original = orig_color_encoding.SameColorEncoding(c_desired); + + // Compute the opsin inverse matrix and luminances based on primaries and + // white point. + float inverse_matrix[9]; + bool inverse_matrix_is_default = default_transform; + memcpy(inverse_matrix, orig_inverse_matrix, sizeof(inverse_matrix)); + constexpr float kSRGBLuminances[3] = {0.2126, 0.7152, 0.0722}; + memcpy(luminances, kSRGBLuminances, sizeof(luminances)); + if ((c_desired.primaries != Primaries::kSRGB || + c_desired.white_point != WhitePoint::kD65) && + !c_desired.IsGray()) { + float srgb_to_xyzd50[9]; + const auto& srgb = ColorEncoding::SRGB(/*is_gray=*/false); + JXL_CHECK(PrimariesToXYZD50( + srgb.GetPrimaries().r.x, srgb.GetPrimaries().r.y, + srgb.GetPrimaries().g.x, srgb.GetPrimaries().g.y, + srgb.GetPrimaries().b.x, srgb.GetPrimaries().b.y, + srgb.GetWhitePoint().x, srgb.GetWhitePoint().y, srgb_to_xyzd50)); + float original_to_xyz[3][3]; + JXL_RETURN_IF_ERROR(PrimariesToXYZ( + c_desired.GetPrimaries().r.x, c_desired.GetPrimaries().r.y, + c_desired.GetPrimaries().g.x, c_desired.GetPrimaries().g.y, + c_desired.GetPrimaries().b.x, c_desired.GetPrimaries().b.y, + c_desired.GetWhitePoint().x, c_desired.GetWhitePoint().y, + &original_to_xyz[0][0])); + memcpy(luminances, original_to_xyz[1], sizeof luminances); + if (xyb_encoded) { + float adapt_to_d50[9]; + JXL_RETURN_IF_ERROR(AdaptToXYZD50(c_desired.GetWhitePoint().x, + c_desired.GetWhitePoint().y, + adapt_to_d50)); + float xyzd50_to_original[9]; + Mul3x3Matrix(adapt_to_d50, &original_to_xyz[0][0], xyzd50_to_original); + JXL_RETURN_IF_ERROR(Inv3x3Matrix(xyzd50_to_original)); + float srgb_to_original[9]; + Mul3x3Matrix(xyzd50_to_original, srgb_to_xyzd50, srgb_to_original); + Mul3x3Matrix(srgb_to_original, orig_inverse_matrix, inverse_matrix); + inverse_matrix_is_default = false; + } + } + + if (c_desired.IsGray()) { + float tmp_inv_matrix[9]; + memcpy(tmp_inv_matrix, inverse_matrix, sizeof(inverse_matrix)); + float srgb_to_luma[9]; + memcpy(&srgb_to_luma[0], luminances, sizeof(luminances)); + memcpy(&srgb_to_luma[3], luminances, sizeof(luminances)); + memcpy(&srgb_to_luma[6], luminances, sizeof(luminances)); + Mul3x3Matrix(srgb_to_luma, tmp_inv_matrix, inverse_matrix); + } + + // The internal XYB color space uses absolute luminance, so we scale back the + // opsin inverse matrix to relative luminance where 1.0 corresponds to the + // original intensity target, or to absolute luminance for PQ, where 1.0 + // corresponds to 10000 nits. + if (xyb_encoded) { + float intensity_target = + (c_desired.tf.IsPQ() ? 10000 : orig_intensity_target); + InitSIMDInverseMatrix(inverse_matrix, opsin_params.inverse_opsin_matrix, + intensity_target); + all_default_opsin = (std::abs(intensity_target - 255.0) <= 0.1f && + inverse_matrix_is_default); + } + + // Set the inverse gamma based on color space transfer function. + inverse_gamma = (c_desired.tf.IsGamma() ? c_desired.tf.GetGamma() + : c_desired.tf.IsDCI() ? 1.0f / 2.6f + : 1.0); + return true; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/dec_xyb.h b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.h new file mode 100644 index 0000000000..ebaae9a176 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/dec_xyb.h @@ -0,0 +1,89 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DEC_XYB_H_ +#define LIB_JXL_DEC_XYB_H_ + +// XYB -> linear sRGB. + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/opsin_params.h" + +namespace jxl { + +// Parameters for XYB->sRGB conversion. +struct OpsinParams { + float inverse_opsin_matrix[9 * 4]; + float opsin_biases[4]; + float opsin_biases_cbrt[4]; + float quant_biases[4]; + void Init(float intensity_target); +}; + +struct OutputEncodingInfo { + // + // Fields depending only on image metadata + // + ColorEncoding orig_color_encoding; + // Used for the HLG OOTF and PQ tone mapping. + float orig_intensity_target; + // Opsin inverse matrix taken from the metadata. + float orig_inverse_matrix[9]; + bool default_transform; + bool xyb_encoded; + // + // Fields depending on output color encoding + // + ColorEncoding color_encoding; + bool color_encoding_is_original; + // Contains an opsin matrix that converts to the primaries of the output + // encoding. + OpsinParams opsin_params; + bool all_default_opsin; + // Used for Gamma and DCI transfer functions. + float inverse_gamma; + // Luminances of color_encoding's primaries, used for the HLG inverse OOTF and + // for PQ tone mapping. + // Default to sRGB's. + float luminances[3]; + // Used for the HLG inverse OOTF and PQ tone mapping. + float desired_intensity_target; + + Status SetFromMetadata(const CodecMetadata& metadata); + Status MaybeSetColorEncoding(const ColorEncoding& c_desired); + + private: + Status SetColorEncoding(const ColorEncoding& c_desired); +}; + +// Converts `inout` (not padded) from opsin to linear sRGB in-place. Called from +// per-pass postprocessing, hence parallelized. +void OpsinToLinearInplace(Image3F* JXL_RESTRICT inout, ThreadPool* pool, + const OpsinParams& opsin_params); + +// Converts `opsin:rect` (opsin may be padded, rect.x0 must be vector-aligned) +// to linear sRGB. Called from whole-frame encoder, hence parallelized. +void OpsinToLinear(const Image3F& opsin, const Rect& rect, ThreadPool* pool, + Image3F* JXL_RESTRICT linear, + const OpsinParams& opsin_params); + +// Bt.601 to match JPEG/JFIF. Inputs are _signed_ YCbCr values suitable for DCT, +// see F.1.1.3 of T.81 (because our data type is float, there is no need to add +// a bias to make the values unsigned). +void YcbcrToRgb(const Image3F& ycbcr, Image3F* rgb, const Rect& rect); + +bool HasFastXYBTosRGB8(); +void FastXYBTosRGB8(const float* input[4], uint8_t* output, bool is_rgba, + size_t xsize); + +} // namespace jxl + +#endif // LIB_JXL_DEC_XYB_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/decode.cc b/third-party/libjxl/libjxl/lib/jxl/decode.cc new file mode 100644 index 0000000000..16ddcbcb1b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/decode.cc @@ -0,0 +1,2790 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#if JPEGXL_ENABLE_BOXES || JPEGXL_ENABLE_TRANSCODE_JPEG +#include "lib/jxl/box_content_decoder.h" +#endif +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/dec_frame.h" +#include "lib/jxl/dec_modular.h" +#if JPEGXL_ENABLE_TRANSCODE_JPEG +#include "lib/jxl/decode_to_jpeg.h" +#endif +#include "lib/jxl/fields.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/icc_codec.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/memory_manager_internal.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/toc.h" + +namespace { + +// Checks if a + b > size, taking possible integer overflow into account. +bool OutOfBounds(size_t a, size_t b, size_t size) { + size_t pos = a + b; + if (pos > size) return true; + if (pos < a) return true; // overflow happened + return false; +} + +JXL_INLINE size_t InitialBasicInfoSizeHint() { + // Amount of bytes before the start of the codestream in the container format, + // assuming that the codestream is the first box after the signature and + // filetype boxes. 12 bytes signature box + 20 bytes filetype box + 16 bytes + // codestream box length + name + optional XLBox length. + const size_t container_header_size = 48; + + // Worst-case amount of bytes for basic info of the JPEG XL codestream header, + // that is all information up to and including extra_channel_bits. Up to + // around 2 bytes signature + 8 bytes SizeHeader + 31 bytes ColorEncoding + 4 + // bytes rest of ImageMetadata + 5 bytes part of ImageMetadata2. + // TODO(lode): recompute and update this value when alpha_bits is moved to + // extra channels info. + const size_t max_codestream_basic_info_size = 50; + + return container_header_size + max_codestream_basic_info_size; +} + +// Debug-printing failure macro similar to JXL_FAILURE, but for the status code +// JXL_DEC_ERROR +#ifdef JXL_CRASH_ON_ERROR +#define JXL_API_ERROR(format, ...) \ + (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \ + ::jxl::Abort(), JXL_DEC_ERROR) +#else // JXL_CRASH_ON_ERROR +#define JXL_API_ERROR(format, ...) \ + (((JXL_DEBUG_ON_ERROR) && \ + ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \ + JXL_DEC_ERROR) +#endif // JXL_CRASH_ON_ERROR + +// Error caused by bad input (invalid file) rather than incorrect API usage. +// For now there is no way to distinguish these two types of errors yet. +#define JXL_INPUT_ERROR(format, ...) JXL_API_ERROR(format, ##__VA_ARGS__) + +JxlDecoderStatus ConvertStatus(JxlDecoderStatus status) { return status; } + +JxlDecoderStatus ConvertStatus(jxl::Status status) { + return status ? JXL_DEC_SUCCESS : JXL_DEC_ERROR; +} + +#define JXL_API_RETURN_IF_ERROR(expr) \ + { \ + JxlDecoderStatus status_ = ConvertStatus(expr); \ + if (status_ != JXL_DEC_SUCCESS) return status_; \ + } + +JxlSignature ReadSignature(const uint8_t* buf, size_t len, size_t* pos) { + if (*pos >= len) return JXL_SIG_NOT_ENOUGH_BYTES; + + buf += *pos; + len -= *pos; + + // JPEG XL codestream: 0xff 0x0a + if (len >= 1 && buf[0] == 0xff) { + if (len < 2) { + return JXL_SIG_NOT_ENOUGH_BYTES; + } else if (buf[1] == jxl::kCodestreamMarker) { + *pos += 2; + return JXL_SIG_CODESTREAM; + } else { + return JXL_SIG_INVALID; + } + } + + // JPEG XL container + if (len >= 1 && buf[0] == 0) { + if (len < 12) { + return JXL_SIG_NOT_ENOUGH_BYTES; + } else if (buf[1] == 0 && buf[2] == 0 && buf[3] == 0xC && buf[4] == 'J' && + buf[5] == 'X' && buf[6] == 'L' && buf[7] == ' ' && + buf[8] == 0xD && buf[9] == 0xA && buf[10] == 0x87 && + buf[11] == 0xA) { + *pos += 12; + return JXL_SIG_CONTAINER; + } else { + return JXL_SIG_INVALID; + } + } + + return JXL_SIG_INVALID; +} + +} // namespace + +uint32_t JxlDecoderVersion(void) { + return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 + + JPEGXL_PATCH_VERSION; +} + +JxlSignature JxlSignatureCheck(const uint8_t* buf, size_t len) { + size_t pos = 0; + return ReadSignature(buf, len, &pos); +} + +namespace { + +size_t BitsPerChannel(JxlDataType data_type) { + switch (data_type) { + case JXL_TYPE_UINT8: + return 8; + case JXL_TYPE_UINT16: + return 16; + case JXL_TYPE_FLOAT: + return 32; + case JXL_TYPE_FLOAT16: + return 16; + default: + return 0; // signals unhandled JxlDataType + } +} + +template +uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata, + JxlPixelFormat format) { + if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) { + return BitsPerChannel(format.data_type); + } else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) { + return metadata.bit_depth.bits_per_sample; + } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) { + return bit_depth.bits_per_sample; + } + return 0; +} + +enum class DecoderStage : uint32_t { + kInited, // Decoder created, no JxlDecoderProcessInput called yet + kStarted, // Running JxlDecoderProcessInput calls + kCodestreamFinished, // Codestream done, but other boxes could still occur. + // This stage can also occur before having seen the + // entire codestream if the user didn't subscribe to any + // codestream events at all, e.g. only to box events, + // or, the user only subscribed to basic info, and only + // the header of the codestream was parsed. + kError, // Error occurred, decoder object no longer usable +}; + +enum class FrameStage : uint32_t { + kHeader, // Must parse frame header. + kTOC, // Must parse TOC + kFull, // Must parse full pixels +}; + +enum class BoxStage : uint32_t { + kHeader, // Parsing box header of the next box, or start of non-container + // stream + kFtyp, // The ftyp box + kSkip, // Box whose contents are skipped + kCodestream, // Handling codestream box contents, or non-container stream + kPartialCodestream, // Handling the extra header of partial codestream box + kJpegRecon, // Handling jpeg reconstruction box +}; + +enum class JpegReconStage : uint32_t { + kNone, // Not outputting + kSettingMetadata, // Ready to output, must set metadata to the jpeg_data + kOutputting, // Currently outputting the JPEG bytes + kFinished, // JPEG reconstruction fully handled +}; + +/* +Given list of frame references to storage slots, and storage slots in which this +frame is saved, computes which frames are required to decode the frame at the +given index and any frames after it. The frames on which this depends are +returned as a vector of their indices, in no particular order. The given index +must be smaller than saved_as.size(), and references.size() must equal +saved_as.size(). Any frames beyond saved_as and references are considered +unknown future frames and must be treated as if something depends on them. +*/ +std::vector GetFrameDependencies(size_t index, + const std::vector& saved_as, + const std::vector& references) { + JXL_ASSERT(references.size() == saved_as.size()); + JXL_ASSERT(index < references.size()); + + std::vector result; + + constexpr size_t kNumStorage = 8; + + // value which indicates nothing is stored in this storage slot + const size_t invalid = references.size(); + // for each of the 8 storage slots, a vector that translates frame index to + // frame stored in this storage slot at this point, that is, the last + // frame that was stored in this slot before or at this index. + std::array, kNumStorage> storage; + for (size_t s = 0; s < kNumStorage; ++s) { + storage[s].resize(saved_as.size()); + int mask = 1 << s; + size_t id = invalid; + for (size_t i = 0; i < saved_as.size(); ++i) { + if (saved_as[i] & mask) { + id = i; + } + storage[s][i] = id; + } + } + + std::vector seen(index + 1, 0); + std::vector stack; + stack.push_back(index); + seen[index] = 1; + + // For frames after index, assume they can depend on any of the 8 storage + // slots, so push the frame for each stored reference to the stack and result. + // All frames after index are treated as having unknown references and with + // the possibility that there are more frames after the last known. + // TODO(lode): take values of saved_as and references after index, and a + // input flag indicating if they are all frames of the image, to further + // optimize this. + for (size_t s = 0; s < kNumStorage; ++s) { + size_t frame_ref = storage[s][index]; + if (frame_ref == invalid) continue; + if (seen[frame_ref]) continue; + stack.push_back(frame_ref); + seen[frame_ref] = 1; + result.push_back(frame_ref); + } + + while (!stack.empty()) { + size_t frame_index = stack.back(); + stack.pop_back(); + if (frame_index == 0) continue; // first frame cannot have references + for (size_t s = 0; s < kNumStorage; ++s) { + int mask = 1 << s; + if (!(references[frame_index] & mask)) continue; + size_t frame_ref = storage[s][frame_index - 1]; + if (frame_ref == invalid) continue; + if (seen[frame_ref]) continue; + stack.push_back(frame_ref); + seen[frame_ref] = 1; + result.push_back(frame_ref); + } + } + + return result; +} + +// Parameters for user-requested extra channel output. +struct ExtraChannelOutput { + JxlPixelFormat format; + void* buffer; + size_t buffer_size; +}; + +} // namespace + +namespace jxl { + +typedef struct JxlDecoderFrameIndexBoxEntryStruct { + // OFFi: offset of start byte of this frame compared to start + // byte of previous frame from this index in the JPEG XL codestream. For the + // first frame, this is the offset from the first byte of the JPEG XL + // codestream. + uint64_t OFFi; + // Ti: duration in ticks between the start of this frame and + // the start of the next frame in the index. If this is the last frame in the + // index, this is the duration in ticks between the start of this frame and + // the end of the stream. A tick lasts TNUM / TDEN seconds. + uint32_t Ti; + // Fi: amount of frames the next frame in the index occurs + // after this frame. If this is the last frame in the index, this is the + // amount of frames after this frame in the remainder of the stream. Only + // frames that are presented by the decoder are counted for this purpose, this + // excludes frames that are not intended for display but for compositing with + // other frames, such as frames that aren't the last frame with a duration of + // 0 ticks. + uint32_t Fi; +} JxlDecoderFrameIndexBoxEntry; + +typedef struct JxlDecoderFrameIndexBoxStruct { + int64_t NF() const { return entries.size(); } + int32_t TNUM = 1; + int32_t TDEN = 1000; + + std::vector entries; + + // That way we can ensure that every index box will have the first frame. + // If the API user decides to mark it as an indexed frame, we call + // the AddFrame again, this time with requested. + void AddFrame(uint64_t OFFi, uint32_t Ti, uint32_t Fi) { + JxlDecoderFrameIndexBoxEntry e; + e.OFFi = OFFi; + e.Ti = Ti; + e.Fi = Fi; + entries.push_back(e); + } +} JxlDecoderFrameIndexBox; + +} // namespace jxl + +// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) +struct JxlDecoderStruct { + JxlDecoderStruct() = default; + + JxlMemoryManager memory_manager; + std::unique_ptr thread_pool; + + DecoderStage stage; + + // Status of progression, internal. + bool got_signature; + // Indicates we know that we've seen the last codestream box: either this + // was a jxlc box, or a jxlp box that has its index indicated as last by + // having its most significant bit set, or no boxes are used at all. This + // does not indicate the full codestream has already been seen, only the + // last box of it has been initiated. + bool last_codestream_seen; + bool got_codestream_signature; + bool got_basic_info; + bool got_transform_data; // To skip everything before ICC. + bool got_all_headers; // Codestream metadata headers. + bool post_headers; // Already decoding pixels. + jxl::ICCReader icc_reader; + jxl::JxlDecoderFrameIndexBox frame_index_box; + // This means either we actually got the preview image, or determined we + // cannot get it or there is none. + bool got_preview_image; + bool preview_frame; + + // Position of next_in in the original file including box format if present + // (as opposed to position in the codestream) + size_t file_pos; + + size_t box_contents_begin; + size_t box_contents_end; + size_t box_contents_size; + size_t box_size; + size_t header_size; + // Either a final box that runs until EOF, or the case of no container format + // at all. + bool box_contents_unbounded; + + JxlBoxType box_type; + JxlBoxType box_decoded_type; // Underlying type for brob boxes + // Set to true right after a JXL_DEC_BOX event only. + bool box_event; + bool decompress_boxes; + + bool box_out_buffer_set; + // Whether the out buffer is set for the current box, if the user did not yet + // release the buffer while the next box is encountered, this will be set to + // false. If this is false, no JXL_DEC_NEED_MORE_INPUT is emitted + // (irrespective of the value of box_out_buffer_set), because not setting + // output indicates the user does not wish the data of this box. + bool box_out_buffer_set_current_box; + uint8_t* box_out_buffer; + size_t box_out_buffer_size; + // which byte of the full box content the start of the out buffer points to + size_t box_out_buffer_begin; + // which byte of box_out_buffer to write to next + size_t box_out_buffer_pos; + + // Settings + bool keep_orientation; + bool unpremul_alpha; + bool render_spotcolors; + bool coalescing; + float desired_intensity_target; + + // Bitfield, for which informative events (JXL_DEC_BASIC_INFO, etc...) the + // decoder returns a status. By default, do not return for any of the events, + // only return when the decoder cannot continue because it needs more input or + // output data. + int events_wanted; + int orig_events_wanted; + + // Fields for reading the basic info from the header. + size_t basic_info_size_hint; + bool have_container; + size_t box_count; + + // The level of progressive detail in frame decoding. + JxlProgressiveDetail prog_detail = kDC; + // The progressive detail of the current frame. + JxlProgressiveDetail frame_prog_detail; + // The intended downsampling ratio for the current progression step. + size_t downsampling_target; + + // Set to true if either an image out buffer or an image out callback was set. + bool image_out_buffer_set; + + // Owned by the caller, buffer for preview or full resolution image. + void* image_out_buffer; + JxlImageOutInitCallback image_out_init_callback; + JxlImageOutRunCallback image_out_run_callback; + JxlImageOutDestroyCallback image_out_destroy_callback; + void* image_out_init_opaque; + struct SimpleImageOutCallback { + JxlImageOutCallback callback; + void* opaque; + }; + SimpleImageOutCallback simple_image_out_callback; + + size_t image_out_size; + + JxlPixelFormat image_out_format; + JxlBitDepth image_out_bit_depth; + + // For extra channels. Empty if no extra channels are requested, and they are + // reset each frame + std::vector extra_channel_output; + + jxl::CodecMetadata metadata; + // Same as metadata.m, except for the color_encoding, which is set to the + // output encoding. + jxl::ImageMetadata image_metadata; + std::unique_ptr ib; + + std::unique_ptr passes_state; + std::unique_ptr frame_dec; + size_t next_section; + std::vector section_processed; + + // headers and TOC for the current frame. When got_toc is true, this is + // always the frame header of the last frame of the current still series, + // that is, the displayed frame. + std::unique_ptr frame_header; + + size_t remaining_frame_size; + FrameStage frame_stage; + bool dc_frame_progression_done; + // The currently processed frame is the last of the current composite still, + // and so must be returned as pixels + bool is_last_of_still; + // The currently processed frame is the last of the codestream + bool is_last_total; + // How many frames to skip. + size_t skip_frames; + // Skipping the current frame. May be false if skip_frames was just set to + // a positive value while already processing a current frame, then + // skipping_frame will be enabled only for the next frame. + bool skipping_frame; + + // Amount of internal frames and external frames started. External frames are + // user-visible frames, internal frames includes all external frames and + // also invisible frames such as patches, blending-only and dc_level frames. + size_t internal_frames; + size_t external_frames; + + // For each internal frame, which storage locations it references, and which + // storage locations it is stored in, using the bit mask as defined in + // FrameDecoder::References and FrameDecoder::SaveAs. + std::vector frame_references; + std::vector frame_saved_as; + + // Translates external frame index to internal frame index. The external + // index is the index of user-visible frames. The internal index can be larger + // since non-visible frames (such as frames with patches, ...) are included. + std::vector frame_external_to_internal; + + // Whether the frame with internal index is required to decode the frame + // being skipped to or any frames after that. If no skipping is active, + // this vector is ignored. If the current internal frame index is beyond this + // vector, it must be treated as a required frame. + std::vector frame_required; + + // Codestream input data is copied here temporarily when the decoder needs + // more input bytes to process the next part of the stream. We copy the input + // data in order to be able to release it all through the API it when + // returning JXL_DEC_NEED_MORE_INPUT. + std::vector codestream_copy; + // Number of bytes at the end of codestream_copy that were not yet consumed + // by calling AdvanceInput(). + size_t codestream_unconsumed; + // Position in the codestream_copy vector that the decoder already finished + // processing. It can be greater than the current size of codestream_copy in + // case where the decoder skips some parts of the frame that were not yet + // provided. + size_t codestream_pos; + // Number of bits after codestream_pos that were already processed. + size_t codestream_bits_ahead; + + BoxStage box_stage; + +#if JPEGXL_ENABLE_BOXES + jxl::JxlBoxContentDecoder box_content_decoder; +#endif +#if JPEGXL_ENABLE_TRANSCODE_JPEG + jxl::JxlToJpegDecoder jpeg_decoder; + // Decodes Exif or XMP metadata for JPEG reconstruction + jxl::JxlBoxContentDecoder metadata_decoder; + std::vector exif_metadata; + std::vector xmp_metadata; + // must store JPEG reconstruction metadata from the current box + // 0 = not stored, 1 = currently storing, 2 = finished + int store_exif; + int store_xmp; + size_t recon_out_buffer_pos; + size_t recon_exif_size; // Expected exif size as read from the jbrd box + size_t recon_xmp_size; // Expected exif size as read from the jbrd box + JpegReconStage recon_output_jpeg; + + bool JbrdNeedMoreBoxes() const { + // jbrd box wants exif but exif box not yet seen + if (store_exif < 2 && recon_exif_size > 0) return true; + // jbrd box wants xmp but xmp box not yet seen + if (store_xmp < 2 && recon_xmp_size > 0) return true; + return false; + } +#endif + + const uint8_t* next_in; + size_t avail_in; + bool input_closed; + + void AdvanceInput(size_t size) { + JXL_DASSERT(avail_in >= size); + next_in += size; + avail_in -= size; + file_pos += size; + } + + size_t AvailableCodestream() const { + size_t avail_codestream = avail_in; + if (!box_contents_unbounded) { + avail_codestream = + std::min(avail_codestream, box_contents_end - file_pos); + } + return avail_codestream; + } + + void AdvanceCodestream(size_t size) { + size_t avail_codestream = AvailableCodestream(); + if (codestream_copy.empty()) { + if (size <= avail_codestream) { + AdvanceInput(size); + } else { + codestream_pos = size - avail_codestream; + AdvanceInput(avail_codestream); + } + } else { + codestream_pos += size; + if (codestream_pos + codestream_unconsumed >= codestream_copy.size()) { + size_t advance = std::min( + codestream_unconsumed, + codestream_unconsumed + codestream_pos - codestream_copy.size()); + AdvanceInput(advance); + codestream_pos -= std::min(codestream_pos, codestream_copy.size()); + codestream_unconsumed = 0; + codestream_copy.clear(); + } + } + } + + JxlDecoderStatus RequestMoreInput() { + if (codestream_copy.empty()) { + size_t avail_codestream = AvailableCodestream(); + codestream_copy.insert(codestream_copy.end(), next_in, + next_in + avail_codestream); + AdvanceInput(avail_codestream); + } else { + AdvanceInput(codestream_unconsumed); + codestream_unconsumed = 0; + } + return JXL_DEC_NEED_MORE_INPUT; + } + + JxlDecoderStatus GetCodestreamInput(jxl::Span* span) { + if (codestream_copy.empty() && codestream_pos > 0) { + size_t avail_codestream = AvailableCodestream(); + size_t skip = std::min(codestream_pos, avail_codestream); + AdvanceInput(skip); + codestream_pos -= skip; + if (codestream_pos > 0) { + return RequestMoreInput(); + } + } + JXL_ASSERT(codestream_pos <= codestream_copy.size()); + JXL_ASSERT(codestream_unconsumed <= codestream_copy.size()); + size_t avail_codestream = AvailableCodestream(); + if (codestream_copy.empty()) { + if (avail_codestream == 0) { + return RequestMoreInput(); + } + *span = jxl::Span(next_in, avail_codestream); + return JXL_DEC_SUCCESS; + } else { + codestream_copy.insert(codestream_copy.end(), + next_in + codestream_unconsumed, + next_in + avail_codestream); + codestream_unconsumed = avail_codestream; + *span = jxl::Span(codestream_copy.data() + codestream_pos, + codestream_copy.size() - codestream_pos); + return JXL_DEC_SUCCESS; + } + } + + // Whether the decoder can use more codestream input for a purpose it needs. + // This returns false if the user didn't subscribe to any events that + // require the codestream (e.g. only subscribed to metadata boxes), or all + // parts of the codestream that are subscribed to (e.g. only basic info) have + // already occurred. + bool CanUseMoreCodestreamInput() const { + // The decoder can set this to finished early if all relevant events were + // processed, so this check works. + return stage != DecoderStage::kCodestreamFinished; + } + + // If set then some operations will fail, if those would require + // allocating large objects. Actual memory usage might be two orders of + // magnitude bigger. + // TODO(eustas): remove once there is working API for memory / CPU limit. + size_t memory_limit_base = 0; + size_t cpu_limit_base = 0; + size_t used_cpu_base = 0; +}; + +namespace { + +bool CheckSizeLimit(JxlDecoder* dec, size_t xsize, size_t ysize) { + if (!dec->memory_limit_base) return true; + if (xsize == 0 || ysize == 0) return true; + if (xsize >= dec->memory_limit_base || ysize >= dec->memory_limit_base) { + return false; + } + // Rough estimate of real row length. + xsize = jxl::DivCeil(xsize, 32) * 32; + size_t num_pixels = xsize * ysize; + if (num_pixels / xsize != ysize) return false; // overflow + if (num_pixels > dec->memory_limit_base) return false; + return true; +} + +} // namespace + +// Resets the state that must be reset for both Rewind and Reset +void JxlDecoderRewindDecodingState(JxlDecoder* dec) { + dec->stage = DecoderStage::kInited; + dec->got_signature = false; + dec->last_codestream_seen = false; + dec->got_codestream_signature = false; + dec->got_basic_info = false; + dec->got_transform_data = false; + dec->got_all_headers = false; + dec->post_headers = false; + dec->icc_reader.Reset(); + dec->got_preview_image = false; + dec->preview_frame = false; + dec->file_pos = 0; + dec->box_contents_begin = 0; + dec->box_contents_end = 0; + dec->box_contents_size = 0; + dec->box_size = 0; + dec->header_size = 0; + dec->box_contents_unbounded = false; + memset(dec->box_type, 0, sizeof(dec->box_type)); + memset(dec->box_decoded_type, 0, sizeof(dec->box_decoded_type)); + dec->box_event = false; + dec->box_stage = BoxStage::kHeader; + dec->box_out_buffer_set = false; + dec->box_out_buffer_set_current_box = false; + dec->box_out_buffer = nullptr; + dec->box_out_buffer_size = 0; + dec->box_out_buffer_begin = 0; + dec->box_out_buffer_pos = 0; + +#if JPEGXL_ENABLE_TRANSCODE_JPEG + dec->exif_metadata.clear(); + dec->xmp_metadata.clear(); + dec->store_exif = 0; + dec->store_xmp = 0; + dec->recon_out_buffer_pos = 0; + dec->recon_exif_size = 0; + dec->recon_xmp_size = 0; + dec->recon_output_jpeg = JpegReconStage::kNone; +#endif + + dec->events_wanted = dec->orig_events_wanted; + dec->basic_info_size_hint = InitialBasicInfoSizeHint(); + dec->have_container = 0; + dec->box_count = 0; + dec->downsampling_target = 8; + dec->image_out_buffer_set = false; + dec->image_out_buffer = nullptr; + dec->image_out_init_callback = nullptr; + dec->image_out_run_callback = nullptr; + dec->image_out_destroy_callback = nullptr; + dec->image_out_init_opaque = nullptr; + dec->image_out_size = 0; + dec->image_out_bit_depth.type = JXL_BIT_DEPTH_FROM_PIXEL_FORMAT; + dec->extra_channel_output.clear(); + dec->next_in = 0; + dec->avail_in = 0; + dec->input_closed = false; + + dec->passes_state.reset(nullptr); + dec->frame_dec.reset(nullptr); + dec->next_section = 0; + dec->section_processed.clear(); + + dec->ib.reset(); + dec->metadata = jxl::CodecMetadata(); + dec->image_metadata = dec->metadata.m; + dec->frame_header.reset(new jxl::FrameHeader(&dec->metadata)); + + dec->codestream_copy.clear(); + dec->codestream_unconsumed = 0; + dec->codestream_pos = 0; + dec->codestream_bits_ahead = 0; + + dec->frame_stage = FrameStage::kHeader; + dec->remaining_frame_size = 0; + dec->is_last_of_still = false; + dec->is_last_total = false; + dec->skip_frames = 0; + dec->skipping_frame = false; + dec->internal_frames = 0; + dec->external_frames = 0; +} + +void JxlDecoderReset(JxlDecoder* dec) { + JxlDecoderRewindDecodingState(dec); + + dec->thread_pool.reset(); + dec->keep_orientation = false; + dec->unpremul_alpha = false; + dec->render_spotcolors = true; + dec->coalescing = true; + dec->desired_intensity_target = 0; + dec->orig_events_wanted = 0; + dec->events_wanted = 0; + dec->frame_references.clear(); + dec->frame_saved_as.clear(); + dec->frame_external_to_internal.clear(); + dec->frame_required.clear(); + dec->decompress_boxes = false; +} + +JxlDecoder* JxlDecoderCreate(const JxlMemoryManager* memory_manager) { + JxlMemoryManager local_memory_manager; + if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager)) + return nullptr; + + void* alloc = + jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlDecoder)); + if (!alloc) return nullptr; + // Placement new constructor on allocated memory + JxlDecoder* dec = new (alloc) JxlDecoder(); + dec->memory_manager = local_memory_manager; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (!memory_manager) { + dec->memory_limit_base = 53 << 16; + // Allow 5 x max_image_size processing units; every frame is accounted + // as W x H CPU processing units, so there could be numerous small frames + // or few larger ones. + dec->cpu_limit_base = 5 * dec->memory_limit_base; + } +#endif + + JxlDecoderReset(dec); + + return dec; +} + +void JxlDecoderDestroy(JxlDecoder* dec) { + if (dec) { + JxlMemoryManager local_memory_manager = dec->memory_manager; + // Call destructor directly since custom free function is used. + dec->~JxlDecoder(); + jxl::MemoryManagerFree(&local_memory_manager, dec); + } +} + +void JxlDecoderRewind(JxlDecoder* dec) { JxlDecoderRewindDecodingState(dec); } + +void JxlDecoderSkipFrames(JxlDecoder* dec, size_t amount) { + // Increment amount, rather than set it: making the amount smaller is + // impossible because the decoder may already have skipped frames required to + // decode earlier frames, and making the amount larger compared to an existing + // amount is impossible because if JxlDecoderSkipFrames is called in the + // middle of already skipping frames, the user cannot know how many frames + // have already been skipped internally so far so an absolute value cannot + // be defined. + dec->skip_frames += amount; + + dec->frame_required.clear(); + size_t next_frame = dec->external_frames + dec->skip_frames; + + // A frame that has been seen before a rewind + if (next_frame < dec->frame_external_to_internal.size()) { + size_t internal_index = dec->frame_external_to_internal[next_frame]; + if (internal_index < dec->frame_saved_as.size()) { + std::vector deps = GetFrameDependencies( + internal_index, dec->frame_saved_as, dec->frame_references); + + dec->frame_required.resize(internal_index + 1, 0); + for (size_t i = 0; i < deps.size(); i++) { + JXL_ASSERT(deps[i] < dec->frame_required.size()); + dec->frame_required[deps[i]] = 1; + } + } + } +} + +JxlDecoderStatus JxlDecoderSkipCurrentFrame(JxlDecoder* dec) { + if (dec->frame_stage != FrameStage::kFull) { + return JXL_API_ERROR("JxlDecoderSkipCurrentFrame called at the wrong time"); + } + JXL_DASSERT(dec->frame_dec); + dec->frame_stage = FrameStage::kHeader; + dec->AdvanceCodestream(dec->remaining_frame_size); + if (dec->is_last_of_still) { + dec->image_out_buffer_set = false; + } + return JXL_DEC_SUCCESS; +} + +JXL_EXPORT JxlDecoderStatus +JxlDecoderSetParallelRunner(JxlDecoder* dec, JxlParallelRunner parallel_runner, + void* parallel_runner_opaque) { + if (dec->stage != DecoderStage::kInited) { + return JXL_API_ERROR( + "JxlDecoderSetParallelRunner must be called before starting"); + } + dec->thread_pool.reset( + new jxl::ThreadPool(parallel_runner, parallel_runner_opaque)); + return JXL_DEC_SUCCESS; +} + +size_t JxlDecoderSizeHintBasicInfo(const JxlDecoder* dec) { + if (dec->got_basic_info) return 0; + return dec->basic_info_size_hint; +} + +JxlDecoderStatus JxlDecoderSubscribeEvents(JxlDecoder* dec, int events_wanted) { + if (dec->stage != DecoderStage::kInited) { + return JXL_DEC_ERROR; // Cannot subscribe to events after having started. + } + if (events_wanted & 63) { + return JXL_DEC_ERROR; // Can only subscribe to informative events. + } + dec->events_wanted = events_wanted; + dec->orig_events_wanted = events_wanted; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetKeepOrientation(JxlDecoder* dec, + JXL_BOOL skip_reorientation) { + if (dec->stage != DecoderStage::kInited) { + return JXL_API_ERROR("Must set keep_orientation option before starting"); + } + dec->keep_orientation = !!skip_reorientation; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetUnpremultiplyAlpha(JxlDecoder* dec, + JXL_BOOL unpremul_alpha) { + if (dec->stage != DecoderStage::kInited) { + return JXL_API_ERROR("Must set unpremul_alpha option before starting"); + } + dec->unpremul_alpha = !!unpremul_alpha; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetRenderSpotcolors(JxlDecoder* dec, + JXL_BOOL render_spotcolors) { + if (dec->stage != DecoderStage::kInited) { + return JXL_API_ERROR("Must set render_spotcolors option before starting"); + } + dec->render_spotcolors = !!render_spotcolors; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetCoalescing(JxlDecoder* dec, JXL_BOOL coalescing) { + if (dec->stage != DecoderStage::kInited) { + return JXL_API_ERROR("Must set coalescing option before starting"); + } + dec->coalescing = !!coalescing; + return JXL_DEC_SUCCESS; +} + +namespace { +// helper function to get the dimensions of the current image buffer +void GetCurrentDimensions(const JxlDecoder* dec, size_t& xsize, size_t& ysize) { + if (dec->frame_header->nonserialized_is_preview) { + xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation); + ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation); + return; + } + xsize = dec->metadata.oriented_xsize(dec->keep_orientation); + ysize = dec->metadata.oriented_ysize(dec->keep_orientation); + if (!dec->coalescing) { + const auto frame_dim = dec->frame_header->ToFrameDimensions(); + xsize = frame_dim.xsize_upsampled; + ysize = frame_dim.ysize_upsampled; + if (!dec->keep_orientation && + static_cast(dec->metadata.m.GetOrientation()) > 4) { + std::swap(xsize, ysize); + } + } +} +} // namespace + +namespace jxl { +namespace { + +template +bool CanRead(Span data, BitReader* reader, T* JXL_RESTRICT t) { + // Use a copy of the bit reader because CanRead advances bits. + BitReader reader2(data); + reader2.SkipBits(reader->TotalBitsConsumed()); + bool result = Bundle::CanRead(&reader2, t); + JXL_ASSERT(reader2.Close()); + return result; +} + +// Returns JXL_DEC_SUCCESS if the full bundle was successfully read, status +// indicating either error or need more input otherwise. +template +JxlDecoderStatus ReadBundle(JxlDecoder* dec, Span data, + BitReader* reader, T* JXL_RESTRICT t) { + if (!CanRead(data, reader, t)) { + return dec->RequestMoreInput(); + } + if (!Bundle::Read(reader, t)) { + return JXL_DEC_ERROR; + } + return JXL_DEC_SUCCESS; +} + +std::unique_ptr> GetBitReader( + Span span) { + BitReader* reader = new BitReader(span); + return std::unique_ptr>( + reader, [](BitReader* reader) { + // We can't allow Close to abort the program if the reader is out of + // bounds, or all return paths in the code, even those that already + // return failure, would have to manually call AllReadsWithinBounds(). + // Invalid JXL codestream should not cause program to quit. + (void)reader->AllReadsWithinBounds(); + (void)reader->Close(); + delete reader; + }); +} + +JxlDecoderStatus JxlDecoderReadBasicInfo(JxlDecoder* dec) { + if (!dec->got_codestream_signature) { + // Check and skip the codestream signature + Span span; + JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span)); + if (span.size() < 2) { + return dec->RequestMoreInput(); + } + if (span.data()[0] != 0xff || span.data()[1] != jxl::kCodestreamMarker) { + return JXL_INPUT_ERROR("invalid signature"); + } + dec->got_codestream_signature = true; + dec->AdvanceCodestream(2); + } + + Span span; + JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span)); + auto reader = GetBitReader(span); + JXL_API_RETURN_IF_ERROR( + ReadBundle(dec, span, reader.get(), &dec->metadata.size)); + JXL_API_RETURN_IF_ERROR( + ReadBundle(dec, span, reader.get(), &dec->metadata.m)); + size_t total_bits = reader->TotalBitsConsumed(); + dec->AdvanceCodestream(total_bits / jxl::kBitsPerByte); + dec->codestream_bits_ahead = total_bits % jxl::kBitsPerByte; + dec->got_basic_info = true; + dec->basic_info_size_hint = 0; + dec->image_metadata = dec->metadata.m; + JXL_DEBUG_V(2, "Decoded BasicInfo: %s", dec->metadata.DebugString().c_str()); + + if (!CheckSizeLimit(dec, dec->metadata.size.xsize(), + dec->metadata.size.ysize())) { + return JXL_INPUT_ERROR("image is too large"); + } + + return JXL_DEC_SUCCESS; +} + +// Reads all codestream headers (but not frame headers) +JxlDecoderStatus JxlDecoderReadAllHeaders(JxlDecoder* dec) { + if (!dec->got_transform_data) { + Span span; + JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span)); + auto reader = GetBitReader(span); + reader->SkipBits(dec->codestream_bits_ahead); + dec->metadata.transform_data.nonserialized_xyb_encoded = + dec->metadata.m.xyb_encoded; + JXL_API_RETURN_IF_ERROR( + ReadBundle(dec, span, reader.get(), &dec->metadata.transform_data)); + size_t total_bits = reader->TotalBitsConsumed(); + dec->AdvanceCodestream(total_bits / jxl::kBitsPerByte); + dec->codestream_bits_ahead = total_bits % jxl::kBitsPerByte; + dec->got_transform_data = true; + } + + Span span; + JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span)); + auto reader = GetBitReader(span); + reader->SkipBits(dec->codestream_bits_ahead); + + if (dec->metadata.m.color_encoding.WantICC()) { + jxl::Status status = + dec->icc_reader.Init(reader.get(), dec->memory_limit_base); + // Always check AllReadsWithinBounds, not all the C++ decoder implementation + // handles reader out of bounds correctly yet (e.g. context map). Not + // checking AllReadsWithinBounds can cause reader->Close() to trigger an + // assert, but we don't want library to quit program for invalid codestream. + if (!reader->AllReadsWithinBounds() || + status.code() == StatusCode::kNotEnoughBytes) { + return dec->RequestMoreInput(); + } + if (!status) { + // Other non-successful status is an error + return JXL_DEC_ERROR; + } + PaddedBytes icc; + status = dec->icc_reader.Process(reader.get(), &icc); + if (status.code() == StatusCode::kNotEnoughBytes) { + return dec->RequestMoreInput(); + } + if (!status) { + // Other non-successful status is an error + return JXL_DEC_ERROR; + } + if (!dec->metadata.m.color_encoding.SetICCRaw(std::move(icc))) { + return JXL_DEC_ERROR; + } + } + + dec->got_all_headers = true; + JXL_API_RETURN_IF_ERROR(reader->JumpToByteBoundary()); + + dec->AdvanceCodestream(reader->TotalBitsConsumed() / jxl::kBitsPerByte); + dec->codestream_bits_ahead = 0; + + if (!dec->passes_state) { + dec->passes_state.reset(new jxl::PassesDecoderState()); + } + + JXL_API_RETURN_IF_ERROR( + dec->passes_state->output_encoding_info.SetFromMetadata(dec->metadata)); + if (dec->desired_intensity_target > 0) { + dec->passes_state->output_encoding_info.desired_intensity_target = + dec->desired_intensity_target; + } + dec->image_metadata = dec->metadata.m; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderProcessSections(JxlDecoder* dec) { + Span span; + JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span)); + const auto& toc = dec->frame_dec->Toc(); + size_t pos = 0; + std::vector section_info; + std::vector section_status; + for (size_t i = dec->next_section; i < toc.size(); ++i) { + if (dec->section_processed[i]) { + pos += toc[i].size; + continue; + } + size_t id = toc[i].id; + size_t size = toc[i].size; + if (OutOfBounds(pos, size, span.size())) { + break; + } + auto br = + new jxl::BitReader(jxl::Span(span.data() + pos, size)); + section_info.emplace_back(jxl::FrameDecoder::SectionInfo{br, id, i}); + section_status.emplace_back(); + pos += size; + } + jxl::Status status = dec->frame_dec->ProcessSections( + section_info.data(), section_info.size(), section_status.data()); + bool out_of_bounds = false; + for (const auto& info : section_info) { + if (!info.br->AllReadsWithinBounds()) { + // Mark out of bounds section, but keep closing and deleting the next + // ones as well. + out_of_bounds = true; + } + JXL_ASSERT(info.br->Close()); + delete info.br; + } + if (out_of_bounds) { + // If any bit reader indicates out of bounds, it's an error, not just + // needing more input, since we ensure only bit readers containing + // a complete section are provided to the FrameDecoder. + return JXL_INPUT_ERROR("frame out of bounds"); + } + if (!status) { + return JXL_INPUT_ERROR("frame processing failed"); + } + for (size_t i = 0; i < section_status.size(); ++i) { + auto status = section_status[i]; + if (status == jxl::FrameDecoder::kDone) { + dec->section_processed[section_info[i].index] = 1; + } else if (status != jxl::FrameDecoder::kSkipped) { + return JXL_INPUT_ERROR("unexpected section status"); + } + } + size_t completed_prefix_bytes = 0; + while (dec->next_section < dec->section_processed.size() && + dec->section_processed[dec->next_section] == 1) { + completed_prefix_bytes += toc[dec->next_section].size; + ++dec->next_section; + } + dec->remaining_frame_size -= completed_prefix_bytes; + dec->AdvanceCodestream(completed_prefix_bytes); + return JXL_DEC_SUCCESS; +} + +// TODO(eustas): no CodecInOut -> no image size reinforcement -> possible OOM. +JxlDecoderStatus JxlDecoderProcessCodestream(JxlDecoder* dec) { + // If no parallel runner is set, use the default + // TODO(lode): move this initialization to an appropriate location once the + // runner is used to decode pixels. + if (!dec->thread_pool) { + dec->thread_pool.reset(new jxl::ThreadPool(nullptr, nullptr)); + } + + // No matter what events are wanted, the basic info is always required. + if (!dec->got_basic_info) { + JxlDecoderStatus status = JxlDecoderReadBasicInfo(dec); + if (status != JXL_DEC_SUCCESS) return status; + } + + if (dec->events_wanted & JXL_DEC_BASIC_INFO) { + dec->events_wanted &= ~JXL_DEC_BASIC_INFO; + return JXL_DEC_BASIC_INFO; + } + + if (!dec->events_wanted) { + dec->stage = DecoderStage::kCodestreamFinished; + return JXL_DEC_SUCCESS; + } + + if (!dec->got_all_headers) { + JxlDecoderStatus status = JxlDecoderReadAllHeaders(dec); + if (status != JXL_DEC_SUCCESS) return status; + } + + if (dec->events_wanted & JXL_DEC_COLOR_ENCODING) { + dec->events_wanted &= ~JXL_DEC_COLOR_ENCODING; + return JXL_DEC_COLOR_ENCODING; + } + + if (!dec->events_wanted) { + dec->stage = DecoderStage::kCodestreamFinished; + return JXL_DEC_SUCCESS; + } + + dec->post_headers = true; + + if (!dec->got_preview_image && dec->metadata.m.have_preview) { + dec->preview_frame = true; + } + + // Handle frames + for (;;) { + bool parse_frames = + (dec->events_wanted & + (JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + if (!parse_frames) { + break; + } + if (dec->frame_stage == FrameStage::kHeader && dec->is_last_total) { + break; + } + if (dec->frame_stage == FrameStage::kHeader) { +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->recon_output_jpeg == JpegReconStage::kSettingMetadata || + dec->recon_output_jpeg == JpegReconStage::kOutputting) { + // The image bundle contains the JPEG reconstruction frame, but the + // decoder is still waiting to decode an EXIF or XMP box. It's not + // implemented to decode additional frames during this, and a JPEG + // reconstruction image should have only one frame. + return JXL_API_ERROR( + "cannot decode a next frame after JPEG reconstruction frame"); + } +#endif + if (!dec->ib) { + dec->ib.reset(new jxl::ImageBundle(&dec->image_metadata)); + } +#if JPEGXL_ENABLE_TRANSCODE_JPEG + // If JPEG reconstruction is wanted and possible, set the jpeg_data of + // the ImageBundle. + if (!dec->jpeg_decoder.SetImageBundleJpegData(dec->ib.get())) + return JXL_DEC_ERROR; +#endif + dec->frame_dec.reset(new FrameDecoder( + dec->passes_state.get(), dec->metadata, dec->thread_pool.get(), + /*use_slow_rendering_pipeline=*/false)); + dec->frame_header.reset(new FrameHeader(&dec->metadata)); + Span span; + JXL_API_RETURN_IF_ERROR(dec->GetCodestreamInput(&span)); + auto reader = GetBitReader(span); + jxl::Status status = dec->frame_dec->InitFrame( + reader.get(), dec->ib.get(), dec->preview_frame); + if (!reader->AllReadsWithinBounds() || + status.code() == StatusCode::kNotEnoughBytes) { + return dec->RequestMoreInput(); + } else if (!status) { + return JXL_INPUT_ERROR("invalid frame header"); + } + dec->AdvanceCodestream(reader->TotalBitsConsumed() / kBitsPerByte); + *dec->frame_header = dec->frame_dec->GetFrameHeader(); + jxl::FrameDimensions frame_dim = dec->frame_header->ToFrameDimensions(); + if (!CheckSizeLimit(dec, frame_dim.xsize_upsampled_padded, + frame_dim.ysize_upsampled_padded)) { + return JXL_INPUT_ERROR("frame is too large"); + } + bool output_needed = + (dec->preview_frame ? (dec->events_wanted & JXL_DEC_PREVIEW_IMAGE) + : (dec->events_wanted & JXL_DEC_FULL_IMAGE)); + if (output_needed) { + JXL_API_RETURN_IF_ERROR(dec->frame_dec->InitFrameOutput()); + } + if (dec->cpu_limit_base != 0) { + // No overflow, checked in CheckSizeLimit. + size_t num_pixels = frame_dim.xsize * frame_dim.ysize; + if (dec->used_cpu_base + num_pixels < dec->used_cpu_base) { + return JXL_INPUT_ERROR("image too large"); + } + dec->used_cpu_base += num_pixels; + if (dec->used_cpu_base > dec->cpu_limit_base) { + return JXL_INPUT_ERROR("image too large"); + } + } + dec->remaining_frame_size = dec->frame_dec->SumSectionSizes(); + + dec->frame_stage = FrameStage::kTOC; + if (dec->preview_frame) { + if (!(dec->events_wanted & JXL_DEC_PREVIEW_IMAGE)) { + dec->frame_stage = FrameStage::kHeader; + dec->AdvanceCodestream(dec->remaining_frame_size); + dec->got_preview_image = true; + dec->preview_frame = false; + } + continue; + } + + int saved_as = FrameDecoder::SavedAs(*dec->frame_header); + // is last in entire codestream + dec->is_last_total = dec->frame_header->is_last; + // is last of current still + dec->is_last_of_still = + dec->is_last_total || dec->frame_header->animation_frame.duration > 0; + // is kRegularFrame and coalescing is disabled + dec->is_last_of_still |= + (!dec->coalescing && + dec->frame_header->frame_type == FrameType::kRegularFrame); + const size_t internal_frame_index = dec->internal_frames; + const size_t external_frame_index = dec->external_frames; + if (dec->is_last_of_still) dec->external_frames++; + dec->internal_frames++; + + if (dec->skip_frames > 0) { + dec->skipping_frame = true; + if (dec->is_last_of_still) { + dec->skip_frames--; + } + } else { + dec->skipping_frame = false; + } + + if (external_frame_index >= dec->frame_external_to_internal.size()) { + dec->frame_external_to_internal.push_back(internal_frame_index); + JXL_ASSERT(dec->frame_external_to_internal.size() == + external_frame_index + 1); + } + + if (internal_frame_index >= dec->frame_saved_as.size()) { + dec->frame_saved_as.push_back(saved_as); + JXL_ASSERT(dec->frame_saved_as.size() == internal_frame_index + 1); + + // add the value 0xff (which means all references) to new slots: we only + // know the references of the frame at FinalizeFrame, and fill in the + // correct values there. As long as this information is not known, the + // worst case where the frame depends on all storage slots is assumed. + dec->frame_references.push_back(0xff); + JXL_ASSERT(dec->frame_references.size() == internal_frame_index + 1); + } + + if (dec->skipping_frame) { + // Whether this frame could be referenced by any future frame: either + // because it's a frame saved for blending or patches, or because it's + // a DC frame. + bool referenceable = + dec->frame_header->CanBeReferenced() || + dec->frame_header->frame_type == FrameType::kDCFrame; + if (internal_frame_index < dec->frame_required.size() && + !dec->frame_required[internal_frame_index]) { + referenceable = false; + } + if (!referenceable) { + // Skip all decoding for this frame, since the user is skipping this + // frame and no future frames can reference it. + dec->frame_stage = FrameStage::kHeader; + dec->AdvanceCodestream(dec->remaining_frame_size); + continue; + } + } + + if ((dec->events_wanted & JXL_DEC_FRAME) && dec->is_last_of_still) { + // Only return this for the last of a series of stills: patches frames + // etc... before this one do not contain the correct information such + // as animation timing, ... + if (!dec->skipping_frame) { + return JXL_DEC_FRAME; + } + } + } + + if (dec->frame_stage == FrameStage::kTOC) { + dec->frame_dec->SetRenderSpotcolors(dec->render_spotcolors); + dec->frame_dec->SetCoalescing(dec->coalescing); + + if (!dec->preview_frame && + (dec->events_wanted & JXL_DEC_FRAME_PROGRESSION)) { + dec->frame_prog_detail = + dec->frame_dec->SetPauseAtProgressive(dec->prog_detail); + } else { + dec->frame_prog_detail = JxlProgressiveDetail::kFrames; + } + dec->dc_frame_progression_done = 0; + + dec->next_section = 0; + dec->section_processed.clear(); + dec->section_processed.resize(dec->frame_dec->Toc().size(), 0); + + // If we don't need pixels, we can skip actually decoding the frames. + if (dec->preview_frame || (dec->events_wanted & JXL_DEC_FULL_IMAGE)) { + dec->frame_stage = FrameStage::kFull; + } else if (!dec->is_last_total) { + dec->frame_stage = FrameStage::kHeader; + dec->AdvanceCodestream(dec->remaining_frame_size); + continue; + } else { + break; + } + } + + if (dec->frame_stage == FrameStage::kFull) { + if (!dec->image_out_buffer_set) { + if (dec->preview_frame) { + return JXL_DEC_NEED_PREVIEW_OUT_BUFFER; + } + if ( +#if JPEGXL_ENABLE_TRANSCODE_JPEG + (!dec->jpeg_decoder.IsOutputSet() || + dec->ib->jpeg_data == nullptr) && +#endif + dec->is_last_of_still && !dec->skipping_frame) { + // TODO(lode): remove the dec->is_last_of_still condition if the + // frame decoder needs the image buffer as working space for decoding + // non-visible or blending frames too + return JXL_DEC_NEED_IMAGE_OUT_BUFFER; + } + } + + if (dec->image_out_buffer_set) { + size_t xsize, ysize; + GetCurrentDimensions(dec, xsize, ysize); + size_t bits_per_sample = GetBitDepth( + dec->image_out_bit_depth, dec->metadata.m, dec->image_out_format); + dec->frame_dec->SetImageOutput( + PixelCallback{ + dec->image_out_init_callback, dec->image_out_run_callback, + dec->image_out_destroy_callback, dec->image_out_init_opaque}, + reinterpret_cast(dec->image_out_buffer), + dec->image_out_size, xsize, ysize, dec->image_out_format, + bits_per_sample, dec->unpremul_alpha, !dec->keep_orientation); + for (size_t i = 0; i < dec->extra_channel_output.size(); ++i) { + const auto& extra = dec->extra_channel_output[i]; + size_t ec_bits_per_sample = + GetBitDepth(dec->image_out_bit_depth, + dec->metadata.m.extra_channel_info[i], extra.format); + dec->frame_dec->AddExtraChannelOutput(extra.buffer, extra.buffer_size, + xsize, extra.format, + ec_bits_per_sample); + } + } + + size_t next_num_passes_to_pause = dec->frame_dec->NextNumPassesToPause(); + + JXL_API_RETURN_IF_ERROR(JxlDecoderProcessSections(dec)); + + bool all_sections_done = dec->frame_dec->HasDecodedAll(); + bool got_dc_only = !all_sections_done && dec->frame_dec->HasDecodedDC(); + + if (dec->frame_prog_detail >= JxlProgressiveDetail::kDC && + !dec->dc_frame_progression_done && got_dc_only) { + dec->dc_frame_progression_done = true; + dec->downsampling_target = 8; + return JXL_DEC_FRAME_PROGRESSION; + } + + bool new_progression_step_done = + dec->frame_dec->NumCompletePasses() >= next_num_passes_to_pause; + + if (!all_sections_done && + dec->frame_prog_detail >= JxlProgressiveDetail::kLastPasses && + new_progression_step_done) { + dec->downsampling_target = + dec->frame_header->passes.GetDownsamplingTargetForCompletedPasses( + dec->frame_dec->NumCompletePasses()); + return JXL_DEC_FRAME_PROGRESSION; + } + + if (!all_sections_done) { + // Not all sections have been processed yet + return dec->RequestMoreInput(); + } + + if (!dec->preview_frame) { + size_t internal_index = dec->internal_frames - 1; + JXL_ASSERT(dec->frame_references.size() > internal_index); + // Always fill this in, even if it was already written, it could be that + // this frame was skipped before and set to 255, while only now we know + // the true value. + dec->frame_references[internal_index] = dec->frame_dec->References(); + } + + if (!dec->frame_dec->FinalizeFrame()) { + return JXL_INPUT_ERROR("decoding frame failed"); + } +#if JPEGXL_ENABLE_TRANSCODE_JPEG + // If jpeg output was requested, we merely return the JXL_DEC_FULL_IMAGE + // status without outputting pixels. + if (dec->jpeg_decoder.IsOutputSet() && dec->ib->jpeg_data != nullptr) { + dec->frame_stage = FrameStage::kHeader; + dec->recon_output_jpeg = JpegReconStage::kSettingMetadata; + return JXL_DEC_FULL_IMAGE; + } +#endif + if (dec->preview_frame || dec->is_last_of_still) { + dec->image_out_buffer_set = false; + dec->extra_channel_output.clear(); + } + } + + dec->frame_stage = FrameStage::kHeader; + + // The pixels have been output or are not needed, do not keep them in + // memory here. + dec->ib.reset(); + if (dec->preview_frame) { + dec->got_preview_image = true; + dec->preview_frame = false; + dec->events_wanted &= ~JXL_DEC_PREVIEW_IMAGE; + return JXL_DEC_PREVIEW_IMAGE; + } else if (dec->is_last_of_still && + (dec->events_wanted & JXL_DEC_FULL_IMAGE) && + !dec->skipping_frame) { + return JXL_DEC_FULL_IMAGE; + } + } + + dec->stage = DecoderStage::kCodestreamFinished; + // Return success, this means there is nothing more to do. + return JXL_DEC_SUCCESS; +} + +} // namespace +} // namespace jxl + +JxlDecoderStatus JxlDecoderSetInput(JxlDecoder* dec, const uint8_t* data, + size_t size) { + if (dec->next_in) { + return JXL_API_ERROR("already set input, use JxlDecoderReleaseInput first"); + } + if (dec->input_closed) { + return JXL_API_ERROR("input already closed"); + } + + dec->next_in = data; + dec->avail_in = size; + return JXL_DEC_SUCCESS; +} + +size_t JxlDecoderReleaseInput(JxlDecoder* dec) { + size_t result = dec->avail_in; + dec->next_in = nullptr; + dec->avail_in = 0; + return result; +} + +void JxlDecoderCloseInput(JxlDecoder* dec) { dec->input_closed = true; } + +JxlDecoderStatus JxlDecoderSetJPEGBuffer(JxlDecoder* dec, uint8_t* data, + size_t size) { +#if JPEGXL_ENABLE_TRANSCODE_JPEG + // JPEG reconstruction buffer can only set and updated before or during the + // first frame, the reconstruction box refers to the first frame and in + // theory multi-frame images should not be used with a jbrd box. + if (dec->internal_frames > 1) { + return JXL_API_ERROR("JPEG reconstruction only works for the first frame"); + } + if (dec->jpeg_decoder.IsOutputSet()) { + return JXL_API_ERROR("Already set JPEG buffer"); + } + return dec->jpeg_decoder.SetOutputBuffer(data, size); +#else + return JXL_API_ERROR("JPEG reconstruction is not supported."); +#endif +} + +size_t JxlDecoderReleaseJPEGBuffer(JxlDecoder* dec) { +#if JPEGXL_ENABLE_TRANSCODE_JPEG + return dec->jpeg_decoder.ReleaseOutputBuffer(); +#else + return JXL_API_ERROR("JPEG reconstruction is not supported."); +#endif +} + +// Parses the header of the box, outputting the 4-character type and the box +// size, including header size, as stored in the box header. +// @param in current input bytes. +// @param size available input size. +// @param pos position in the input, must begin at the header of the box. +// @param file_pos position of pos since the start of the JXL file, rather than +// the current input, used for integer overflow checking. +// @param type the output box type. +// @param box_size output the total box size, including header, in bytes, or 0 +// if it's a final unbounded box. +// @param header_size output size of the box header. +// @return JXL_DEC_SUCCESS if the box header was fully parsed. In that case the +// parsing position must be incremented by header_size bytes. +// JXL_DEC_NEED_MORE_INPUT if not enough input bytes available, in that case +// header_size indicates a lower bound for the known size the header has to be +// at least. JXL_DEC_ERROR if the box header is invalid. +static JxlDecoderStatus ParseBoxHeader(const uint8_t* in, size_t size, + size_t pos, size_t file_pos, + JxlBoxType type, uint64_t* box_size, + uint64_t* header_size) { + if (OutOfBounds(pos, 8, size)) { + *header_size = 8; + return JXL_DEC_NEED_MORE_INPUT; + } + size_t box_start = pos; + // Box size, including this header itself. + *box_size = LoadBE32(in + pos); + pos += 4; + if (*box_size == 1) { + *header_size = 16; + if (OutOfBounds(pos, 12, size)) return JXL_DEC_NEED_MORE_INPUT; + *box_size = LoadBE64(in + pos); + pos += 8; + } + memcpy(type, in + pos, 4); + pos += 4; + *header_size = pos - box_start; + if (*box_size > 0 && *box_size < *header_size) { + return JXL_INPUT_ERROR("invalid box size"); + } + if (file_pos + *box_size < file_pos) { + return JXL_INPUT_ERROR("Box size overflow"); + } + return JXL_DEC_SUCCESS; +} + +// This includes handling the codestream if it is not a box-based jxl file. +static JxlDecoderStatus HandleBoxes(JxlDecoder* dec) { + // Box handling loop + for (;;) { + if (dec->box_stage != BoxStage::kHeader) { + dec->AdvanceInput(dec->header_size); + dec->header_size = 0; +#if JPEGXL_ENABLE_BOXES + if ((dec->events_wanted & JXL_DEC_BOX) && + dec->box_out_buffer_set_current_box) { + uint8_t* next_out = dec->box_out_buffer + dec->box_out_buffer_pos; + size_t avail_out = dec->box_out_buffer_size - dec->box_out_buffer_pos; + + JxlDecoderStatus box_result = dec->box_content_decoder.Process( + dec->next_in, dec->avail_in, + dec->file_pos - dec->box_contents_begin, &next_out, &avail_out); + size_t produced = + next_out - (dec->box_out_buffer + dec->box_out_buffer_pos); + dec->box_out_buffer_pos += produced; + + // Don't return JXL_DEC_NEED_MORE_INPUT: the box stages below, instead, + // handle the input progression, and the above only outputs the part of + // the box seen so far. + if (box_result != JXL_DEC_SUCCESS && + box_result != JXL_DEC_NEED_MORE_INPUT) { + return box_result; + } + } +#endif +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->store_exif == 1 || dec->store_xmp == 1) { + std::vector& metadata = + (dec->store_exif == 1) ? dec->exif_metadata : dec->xmp_metadata; + for (;;) { + if (metadata.empty()) metadata.resize(64); + uint8_t* orig_next_out = metadata.data() + dec->recon_out_buffer_pos; + uint8_t* next_out = orig_next_out; + size_t avail_out = metadata.size() - dec->recon_out_buffer_pos; + JxlDecoderStatus box_result = dec->metadata_decoder.Process( + dec->next_in, dec->avail_in, + dec->file_pos - dec->box_contents_begin, &next_out, &avail_out); + size_t produced = next_out - orig_next_out; + dec->recon_out_buffer_pos += produced; + if (box_result == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + metadata.resize(metadata.size() * 2); + } else if (box_result == JXL_DEC_NEED_MORE_INPUT) { + break; // box stage handling below will handle this instead + } else if (box_result == JXL_DEC_SUCCESS) { + size_t needed_size = (dec->store_exif == 1) ? dec->recon_exif_size + : dec->recon_xmp_size; + if (dec->box_contents_unbounded && + dec->recon_out_buffer_pos < needed_size) { + // Unbounded box, but we know the expected size due to the jbrd + // box's data. Treat this as the JXL_DEC_NEED_MORE_INPUT case. + break; + } else { + metadata.resize(dec->recon_out_buffer_pos); + if (dec->store_exif == 1) dec->store_exif = 2; + if (dec->store_xmp == 1) dec->store_xmp = 2; + break; + } + } else { + // error + return box_result; + } + } + } +#endif + } +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->recon_output_jpeg == JpegReconStage::kSettingMetadata && + !dec->JbrdNeedMoreBoxes()) { + jxl::jpeg::JPEGData* jpeg_data = dec->ib->jpeg_data.get(); + if (dec->recon_exif_size) { + JxlDecoderStatus status = jxl::JxlToJpegDecoder::SetExif( + dec->exif_metadata.data(), dec->exif_metadata.size(), jpeg_data); + if (status != JXL_DEC_SUCCESS) return status; + } + if (dec->recon_xmp_size) { + JxlDecoderStatus status = jxl::JxlToJpegDecoder::SetXmp( + dec->xmp_metadata.data(), dec->xmp_metadata.size(), jpeg_data); + if (status != JXL_DEC_SUCCESS) return status; + } + dec->recon_output_jpeg = JpegReconStage::kOutputting; + } + + if (dec->recon_output_jpeg == JpegReconStage::kOutputting && + !dec->JbrdNeedMoreBoxes()) { + JxlDecoderStatus status = + dec->jpeg_decoder.WriteOutput(*dec->ib->jpeg_data); + if (status != JXL_DEC_SUCCESS) return status; + dec->recon_output_jpeg = JpegReconStage::kFinished; + dec->ib.reset(); + if (dec->events_wanted & JXL_DEC_FULL_IMAGE) { + // Return the full image event here now, this may be delayed if this + // could only be done after decoding an exif or xmp box after the + // codestream. + return JXL_DEC_FULL_IMAGE; + } + } +#endif + + if (dec->box_stage == BoxStage::kHeader) { + if (!dec->have_container) { + if (dec->stage == DecoderStage::kCodestreamFinished) + return JXL_DEC_SUCCESS; + dec->box_stage = BoxStage::kCodestream; + dec->box_contents_unbounded = true; + continue; + } + if (dec->avail_in == 0) { + if (dec->stage != DecoderStage::kCodestreamFinished) { + // Not yet seen (all) codestream boxes. + return JXL_DEC_NEED_MORE_INPUT; + } +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->JbrdNeedMoreBoxes()) { + return JXL_DEC_NEED_MORE_INPUT; + } +#endif + if (dec->input_closed) { + return JXL_DEC_SUCCESS; + } + if (!(dec->events_wanted & JXL_DEC_BOX)) { + // All codestream and jbrd metadata boxes finished, and no individual + // boxes requested by user, so no need to request any more input. + // This returns success for backwards compatibility, when + // JxlDecoderCloseInput and JXL_DEC_BOX did not exist, as well + // as for efficiency. + return JXL_DEC_SUCCESS; + } + // Even though we are exactly at a box end, there still may be more + // boxes. The user may call JxlDecoderCloseInput to indicate the input + // is finished and get success instead. + return JXL_DEC_NEED_MORE_INPUT; + } + + bool boxed_codestream_done = + ((dec->events_wanted & JXL_DEC_BOX) && + dec->stage == DecoderStage::kCodestreamFinished && +#if JPEGXL_ENABLE_TRANSCODE_JPEG + !dec->JbrdNeedMoreBoxes() && +#endif + dec->last_codestream_seen); + if (boxed_codestream_done && dec->avail_in >= 2 && + dec->next_in[0] == 0xff && + dec->next_in[1] == jxl::kCodestreamMarker) { + // We detected the start of the next naked codestream, so we can return + // success here. + return JXL_DEC_SUCCESS; + } + + uint64_t box_size, header_size; + JxlDecoderStatus status = + ParseBoxHeader(dec->next_in, dec->avail_in, 0, dec->file_pos, + dec->box_type, &box_size, &header_size); + if (status != JXL_DEC_SUCCESS) { + if (status == JXL_DEC_NEED_MORE_INPUT) { + dec->basic_info_size_hint = + InitialBasicInfoSizeHint() + header_size - dec->file_pos; + } + return status; + } + if (memcmp(dec->box_type, "brob", 4) == 0) { + if (dec->avail_in < header_size + 4) { + return JXL_DEC_NEED_MORE_INPUT; + } + memcpy(dec->box_decoded_type, dec->next_in + header_size, + sizeof(dec->box_decoded_type)); + } else { + memcpy(dec->box_decoded_type, dec->box_type, + sizeof(dec->box_decoded_type)); + } + + // Box order validity checks + // The signature box at box_count == 1 is not checked here since that's + // already done at the beginning. + dec->box_count++; + if (boxed_codestream_done && memcmp(dec->box_type, "JXL ", 4) == 0) { + // We detected the start of the next boxed stream, so we can return + // success here. + return JXL_DEC_SUCCESS; + } + if (dec->box_count == 2 && memcmp(dec->box_type, "ftyp", 4) != 0) { + return JXL_INPUT_ERROR("the second box must be the ftyp box"); + } + if (memcmp(dec->box_type, "ftyp", 4) == 0 && dec->box_count != 2) { + return JXL_INPUT_ERROR("the ftyp box must come second"); + } + + dec->box_contents_unbounded = (box_size == 0); + dec->box_contents_begin = dec->file_pos + header_size; + dec->box_contents_end = + dec->box_contents_unbounded ? 0 : (dec->file_pos + box_size); + dec->box_contents_size = + dec->box_contents_unbounded ? 0 : (box_size - header_size); + dec->box_size = box_size; + dec->header_size = header_size; +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) { + // Initiate storing of Exif or XMP data for JPEG reconstruction + if (dec->store_exif == 0 && + memcmp(dec->box_decoded_type, "Exif", 4) == 0) { + dec->store_exif = 1; + dec->recon_out_buffer_pos = 0; + } + if (dec->store_xmp == 0 && + memcmp(dec->box_decoded_type, "xml ", 4) == 0) { + dec->store_xmp = 1; + dec->recon_out_buffer_pos = 0; + } + } +#endif +#if JPEGXL_ENABLE_BOXES + if (dec->events_wanted & JXL_DEC_BOX) { + bool decompress = + dec->decompress_boxes && memcmp(dec->box_type, "brob", 4) == 0; + dec->box_content_decoder.StartBox( + decompress, dec->box_contents_unbounded, dec->box_contents_size); + } +#endif +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->store_exif == 1 || dec->store_xmp == 1) { + bool brob = memcmp(dec->box_type, "brob", 4) == 0; + dec->metadata_decoder.StartBox(brob, dec->box_contents_unbounded, + dec->box_contents_size); + } +#endif + if (memcmp(dec->box_type, "ftyp", 4) == 0) { + dec->box_stage = BoxStage::kFtyp; + } else if (memcmp(dec->box_type, "jxlc", 4) == 0) { + if (dec->last_codestream_seen) { + return JXL_INPUT_ERROR("there can only be one jxlc box"); + } + dec->last_codestream_seen = true; + dec->box_stage = BoxStage::kCodestream; + } else if (memcmp(dec->box_type, "jxlp", 4) == 0) { + dec->box_stage = BoxStage::kPartialCodestream; +#if JPEGXL_ENABLE_TRANSCODE_JPEG + } else if ((dec->orig_events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) && + memcmp(dec->box_type, "jbrd", 4) == 0) { + if (!(dec->events_wanted & JXL_DEC_JPEG_RECONSTRUCTION)) { + return JXL_INPUT_ERROR( + "multiple JPEG reconstruction boxes not supported"); + } + dec->box_stage = BoxStage::kJpegRecon; +#endif + } else { + dec->box_stage = BoxStage::kSkip; + } + + if (dec->events_wanted & JXL_DEC_BOX) { + dec->box_event = true; + dec->box_out_buffer_set_current_box = false; + return JXL_DEC_BOX; + } + } else if (dec->box_stage == BoxStage::kFtyp) { + if (dec->box_contents_size < 12) { + return JXL_INPUT_ERROR("file type box too small"); + } + if (dec->avail_in < 4) return JXL_DEC_NEED_MORE_INPUT; + if (memcmp(dec->next_in, "jxl ", 4) != 0) { + return JXL_INPUT_ERROR("file type box major brand must be \"jxl \""); + } + dec->AdvanceInput(4); + dec->box_stage = BoxStage::kSkip; + } else if (dec->box_stage == BoxStage::kPartialCodestream) { + if (dec->last_codestream_seen) { + return JXL_INPUT_ERROR("cannot have jxlp box after last jxlp box"); + } + // TODO(lode): error if box is unbounded but last bit not set + if (dec->avail_in < 4) return JXL_DEC_NEED_MORE_INPUT; + if (!dec->box_contents_unbounded && dec->box_contents_size < 4) { + return JXL_INPUT_ERROR("jxlp box too small to contain index"); + } + size_t jxlp_index = LoadBE32(dec->next_in); + // The high bit of jxlp_index indicates whether this is the last + // jxlp box. + if (jxlp_index & 0x80000000) { + dec->last_codestream_seen = true; + } + dec->AdvanceInput(4); + dec->box_stage = BoxStage::kCodestream; + } else if (dec->box_stage == BoxStage::kCodestream) { + JxlDecoderStatus status = jxl::JxlDecoderProcessCodestream(dec); +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (status == JXL_DEC_FULL_IMAGE) { + if (dec->recon_output_jpeg != JpegReconStage::kNone) { + continue; + } + } +#endif + if (status == JXL_DEC_NEED_MORE_INPUT) { + if (dec->file_pos == dec->box_contents_end && + !dec->box_contents_unbounded) { + dec->box_stage = BoxStage::kHeader; + continue; + } + } + + if (status == JXL_DEC_SUCCESS) { +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->JbrdNeedMoreBoxes()) { + dec->box_stage = BoxStage::kSkip; + continue; + } +#endif + if (dec->box_contents_unbounded) { + // Last box reached and codestream done, nothing more to do. + break; + } + if (dec->events_wanted & JXL_DEC_BOX) { + // Codestream done, but there may be more other boxes. + dec->box_stage = BoxStage::kSkip; + continue; + } + } + return status; +#if JPEGXL_ENABLE_TRANSCODE_JPEG + } else if (dec->box_stage == BoxStage::kJpegRecon) { + if (!dec->jpeg_decoder.IsParsingBox()) { + // This is a new JPEG reconstruction metadata box. + dec->jpeg_decoder.StartBox(dec->box_contents_unbounded, + dec->box_contents_size); + } + const uint8_t* next_in = dec->next_in; + size_t avail_in = dec->avail_in; + JxlDecoderStatus recon_result = + dec->jpeg_decoder.Process(&next_in, &avail_in); + size_t consumed = next_in - dec->next_in; + dec->AdvanceInput(consumed); + if (recon_result == JXL_DEC_JPEG_RECONSTRUCTION) { + jxl::jpeg::JPEGData* jpeg_data = dec->jpeg_decoder.GetJpegData(); + size_t num_exif = jxl::JxlToJpegDecoder::NumExifMarkers(*jpeg_data); + size_t num_xmp = jxl::JxlToJpegDecoder::NumXmpMarkers(*jpeg_data); + if (num_exif) { + if (num_exif > 1) { + return JXL_INPUT_ERROR( + "multiple exif markers for JPEG reconstruction not supported"); + } + if (JXL_DEC_SUCCESS != jxl::JxlToJpegDecoder::ExifBoxContentSize( + *jpeg_data, &dec->recon_exif_size)) { + return JXL_INPUT_ERROR("invalid jbrd exif size"); + } + } + if (num_xmp) { + if (num_xmp > 1) { + return JXL_INPUT_ERROR( + "multiple XMP markers for JPEG reconstruction not supported"); + } + if (JXL_DEC_SUCCESS != jxl::JxlToJpegDecoder::XmlBoxContentSize( + *jpeg_data, &dec->recon_xmp_size)) { + return JXL_INPUT_ERROR("invalid jbrd XMP size"); + } + } + + dec->box_stage = BoxStage::kHeader; + // If successful JPEG reconstruction, return the success if the user + // cares about it, otherwise continue. + if (dec->events_wanted & JXL_DEC_JPEG_RECONSTRUCTION) { + dec->events_wanted &= ~JXL_DEC_JPEG_RECONSTRUCTION; + return JXL_DEC_JPEG_RECONSTRUCTION; + } + } else { + // If anything else, return the result. + return recon_result; + } +#endif + } else if (dec->box_stage == BoxStage::kSkip) { + if (dec->box_contents_unbounded) { + if (dec->input_closed) { + return JXL_DEC_SUCCESS; + } + if (!(dec->box_out_buffer_set)) { + // An unbounded box is always the last box. Not requesting box data, + // so return success even if JxlDecoderCloseInput was not called for + // backwards compatibility as well as efficiency since this box is + // being skipped. + return JXL_DEC_SUCCESS; + } + // Arbitrarily more bytes may follow, only JxlDecoderCloseInput can + // mark the end. + dec->AdvanceInput(dec->avail_in); + return JXL_DEC_NEED_MORE_INPUT; + } + // Amount of remaining bytes in the box that is being skipped. + size_t remaining = dec->box_contents_end - dec->file_pos; + if (dec->avail_in < remaining) { + // Indicate how many more bytes needed starting from next_in. + dec->basic_info_size_hint = + InitialBasicInfoSizeHint() + dec->box_contents_end - dec->file_pos; + // Don't have the full box yet, skip all we have so far + dec->AdvanceInput(dec->avail_in); + return JXL_DEC_NEED_MORE_INPUT; + } else { + // Full box available, skip all its remaining bytes + dec->AdvanceInput(remaining); + dec->box_stage = BoxStage::kHeader; + } + } else { + JXL_DASSERT(false); // unknown box stage + } + } + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderProcessInput(JxlDecoder* dec) { + if (dec->stage == DecoderStage::kInited) { + dec->stage = DecoderStage::kStarted; + } + if (dec->stage == DecoderStage::kError) { + return JXL_API_ERROR( + "Cannot keep using decoder after it encountered an error, use " + "JxlDecoderReset to reset it"); + } + + if (!dec->got_signature) { + JxlSignature sig = JxlSignatureCheck(dec->next_in, dec->avail_in); + if (sig == JXL_SIG_INVALID) return JXL_INPUT_ERROR("invalid signature"); + if (sig == JXL_SIG_NOT_ENOUGH_BYTES) { + if (dec->input_closed) { + return JXL_INPUT_ERROR("file too small for signature"); + } + return JXL_DEC_NEED_MORE_INPUT; + } + + dec->got_signature = true; + + if (sig == JXL_SIG_CONTAINER) { + dec->have_container = 1; + } else { + dec->last_codestream_seen = true; + } + } + + JxlDecoderStatus status = HandleBoxes(dec); + + if (status == JXL_DEC_NEED_MORE_INPUT && dec->input_closed) { + return JXL_INPUT_ERROR("premature end of input"); + } + + // Even if the box handling returns success, certain types of + // data may be missing. + if (status == JXL_DEC_SUCCESS) { + if (dec->CanUseMoreCodestreamInput()) { + return JXL_INPUT_ERROR("codestream never finished"); + } +#if JPEGXL_ENABLE_TRANSCODE_JPEG + if (dec->JbrdNeedMoreBoxes()) { + return JXL_INPUT_ERROR("missing metadata boxes for jpeg reconstruction"); + } +#endif + } + + return status; +} + +// To ensure ABI forward-compatibility, this struct has a constant size. +static_assert(sizeof(JxlBasicInfo) == 204, + "JxlBasicInfo struct size should remain constant"); + +JxlDecoderStatus JxlDecoderGetBasicInfo(const JxlDecoder* dec, + JxlBasicInfo* info) { + if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT; + + if (info) { + memset(info, 0, sizeof(*info)); + + const jxl::ImageMetadata& meta = dec->metadata.m; + + info->have_container = dec->have_container; + info->xsize = dec->metadata.size.xsize(); + info->ysize = dec->metadata.size.ysize(); + info->uses_original_profile = !meta.xyb_encoded; + + info->bits_per_sample = meta.bit_depth.bits_per_sample; + info->exponent_bits_per_sample = meta.bit_depth.exponent_bits_per_sample; + + info->have_preview = meta.have_preview; + info->have_animation = meta.have_animation; + info->orientation = static_cast(meta.orientation); + + if (!dec->keep_orientation) { + if (info->orientation >= JXL_ORIENT_TRANSPOSE) { + std::swap(info->xsize, info->ysize); + } + info->orientation = JXL_ORIENT_IDENTITY; + } + + info->intensity_target = meta.IntensityTarget(); + if (dec->desired_intensity_target > 0) { + info->intensity_target = dec->desired_intensity_target; + } + info->min_nits = meta.tone_mapping.min_nits; + info->relative_to_max_display = meta.tone_mapping.relative_to_max_display; + info->linear_below = meta.tone_mapping.linear_below; + + const jxl::ExtraChannelInfo* alpha = meta.Find(jxl::ExtraChannel::kAlpha); + if (alpha != nullptr) { + info->alpha_bits = alpha->bit_depth.bits_per_sample; + info->alpha_exponent_bits = alpha->bit_depth.exponent_bits_per_sample; + info->alpha_premultiplied = alpha->alpha_associated; + } else { + info->alpha_bits = 0; + info->alpha_exponent_bits = 0; + info->alpha_premultiplied = 0; + } + + info->num_color_channels = + meta.color_encoding.GetColorSpace() == jxl::ColorSpace::kGray ? 1 : 3; + + info->num_extra_channels = meta.num_extra_channels; + + if (info->have_preview) { + info->preview.xsize = dec->metadata.m.preview_size.xsize(); + info->preview.ysize = dec->metadata.m.preview_size.ysize(); + } + + if (info->have_animation) { + info->animation.tps_numerator = dec->metadata.m.animation.tps_numerator; + info->animation.tps_denominator = + dec->metadata.m.animation.tps_denominator; + info->animation.num_loops = dec->metadata.m.animation.num_loops; + info->animation.have_timecodes = dec->metadata.m.animation.have_timecodes; + } + + if (meta.have_intrinsic_size) { + info->intrinsic_xsize = dec->metadata.m.intrinsic_size.xsize(); + info->intrinsic_ysize = dec->metadata.m.intrinsic_size.ysize(); + } else { + info->intrinsic_xsize = info->xsize; + info->intrinsic_ysize = info->ysize; + } + } + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetExtraChannelInfo(const JxlDecoder* dec, + size_t index, + JxlExtraChannelInfo* info) { + if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT; + + const std::vector& channels = + dec->metadata.m.extra_channel_info; + + if (index >= channels.size()) return JXL_DEC_ERROR; // out of bounds + const jxl::ExtraChannelInfo& channel = channels[index]; + + info->type = static_cast(channel.type); + info->bits_per_sample = channel.bit_depth.bits_per_sample; + info->exponent_bits_per_sample = + channel.bit_depth.floating_point_sample + ? channel.bit_depth.exponent_bits_per_sample + : 0; + info->dim_shift = channel.dim_shift; + info->name_length = channel.name.size(); + info->alpha_premultiplied = channel.alpha_associated; + info->spot_color[0] = channel.spot_color[0]; + info->spot_color[1] = channel.spot_color[1]; + info->spot_color[2] = channel.spot_color[2]; + info->spot_color[3] = channel.spot_color[3]; + info->cfa_channel = channel.cfa_channel; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetExtraChannelName(const JxlDecoder* dec, + size_t index, char* name, + size_t size) { + if (!dec->got_basic_info) return JXL_DEC_NEED_MORE_INPUT; + + const std::vector& channels = + dec->metadata.m.extra_channel_info; + + if (index >= channels.size()) return JXL_DEC_ERROR; // out of bounds + const jxl::ExtraChannelInfo& channel = channels[index]; + + // Also need null-termination character + if (channel.name.size() + 1 > size) return JXL_DEC_ERROR; + + memcpy(name, channel.name.c_str(), channel.name.size() + 1); + + return JXL_DEC_SUCCESS; +} + +namespace { + +// Gets the jxl::ColorEncoding for the desired target, and checks errors. +// Returns the object regardless of whether the actual color space is in ICC, +// but ensures that if the color encoding is not the encoding from the +// codestream header metadata, it cannot require ICC profile. +JxlDecoderStatus GetColorEncodingForTarget( + const JxlDecoder* dec, JxlColorProfileTarget target, + const jxl::ColorEncoding** encoding) { + if (!dec->got_all_headers) return JXL_DEC_NEED_MORE_INPUT; + *encoding = nullptr; + if (target == JXL_COLOR_PROFILE_TARGET_DATA && dec->metadata.m.xyb_encoded) { + *encoding = &dec->passes_state->output_encoding_info.color_encoding; + } else { + *encoding = &dec->metadata.m.color_encoding; + } + return JXL_DEC_SUCCESS; +} +} // namespace + +JxlDecoderStatus JxlDecoderGetColorAsEncodedProfile( + const JxlDecoder* dec, JxlColorProfileTarget target, + JxlColorEncoding* color_encoding) { + const jxl::ColorEncoding* jxl_color_encoding = nullptr; + JxlDecoderStatus status = + GetColorEncodingForTarget(dec, target, &jxl_color_encoding); + if (status) return status; + + if (jxl_color_encoding->WantICC()) + return JXL_DEC_ERROR; // Indicate no encoded profile available. + + if (color_encoding) { + ConvertInternalToExternalColorEncoding(*jxl_color_encoding, color_encoding); + } + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetICCProfileSize(const JxlDecoder* dec, + JxlColorProfileTarget target, + size_t* size) { + const jxl::ColorEncoding* jxl_color_encoding = nullptr; + JxlDecoderStatus status = + GetColorEncodingForTarget(dec, target, &jxl_color_encoding); + if (status != JXL_DEC_SUCCESS) return status; + + if (jxl_color_encoding->WantICC()) { + jxl::ColorSpace color_space = + dec->metadata.m.color_encoding.GetColorSpace(); + if (color_space == jxl::ColorSpace::kUnknown || + color_space == jxl::ColorSpace::kXYB) { + // This indicates there's no ICC profile available + // TODO(lode): for the XYB case, do we want to craft an ICC profile that + // represents XYB as an RGB profile? It may be possible, but not with + // only 1D transfer functions. + return JXL_DEC_ERROR; + } + } + + if (size) { + *size = jxl_color_encoding->ICC().size(); + } + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetColorAsICCProfile(const JxlDecoder* dec, + JxlColorProfileTarget target, + uint8_t* icc_profile, + size_t size) { + size_t wanted_size; + // This also checks the NEED_MORE_INPUT and the unknown/xyb cases + JxlDecoderStatus status = + JxlDecoderGetICCProfileSize(dec, target, &wanted_size); + if (status != JXL_DEC_SUCCESS) return status; + if (size < wanted_size) return JXL_API_ERROR("ICC profile output too small"); + + const jxl::ColorEncoding* jxl_color_encoding = nullptr; + status = GetColorEncodingForTarget(dec, target, &jxl_color_encoding); + if (status != JXL_DEC_SUCCESS) return status; + + memcpy(icc_profile, jxl_color_encoding->ICC().data(), + jxl_color_encoding->ICC().size()); + + return JXL_DEC_SUCCESS; +} + +namespace { + +// Returns the amount of bits needed for getting memory buffer size, and does +// all error checking required for size checking and format validity. +JxlDecoderStatus PrepareSizeCheck(const JxlDecoder* dec, + const JxlPixelFormat* format, size_t* bits) { + if (!dec->got_basic_info) { + // Don't know image dimensions yet, cannot check for valid size. + return JXL_DEC_NEED_MORE_INPUT; + } + if (!dec->coalescing && + (!dec->frame_header || dec->frame_stage == FrameStage::kHeader)) { + return JXL_API_ERROR("Don't know frame dimensions yet"); + } + if (format->num_channels > 4) { + return JXL_API_ERROR("More than 4 channels not supported"); + } + + *bits = BitsPerChannel(format->data_type); + + if (*bits == 0) { + return JXL_API_ERROR("Invalid/unsupported data type"); + } + + return JXL_DEC_SUCCESS; +} + +} // namespace + +size_t JxlDecoderGetIntendedDownsamplingRatio(JxlDecoder* dec) { + return dec->downsampling_target; +} + +JxlDecoderStatus JxlDecoderFlushImage(JxlDecoder* dec) { + if (!dec->image_out_buffer_set) return JXL_DEC_ERROR; + if (dec->frame_stage != FrameStage::kFull) { + return JXL_DEC_ERROR; + } + JXL_DASSERT(dec->frame_dec); + if (!dec->frame_dec->HasDecodedDC()) { + // FrameDecoder::Flush currently requires DC to have been decoded already + // to work correctly. + return JXL_DEC_ERROR; + } + + if (!dec->frame_dec->Flush()) { + return JXL_DEC_ERROR; + } + + return JXL_DEC_SUCCESS; +} + +JXL_EXPORT JxlDecoderStatus JxlDecoderPreviewOutBufferSize( + const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) { + size_t bits; + JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits); + if (status != JXL_DEC_SUCCESS) return status; + if (format->num_channels < 3 && + !dec->image_metadata.color_encoding.IsGray()) { + return JXL_API_ERROR("Number of channels is too low for color output"); + } + + size_t xsize = dec->metadata.oriented_preview_xsize(dec->keep_orientation); + size_t ysize = dec->metadata.oriented_preview_ysize(dec->keep_orientation); + + size_t row_size = + jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte); + size_t last_row_size = row_size; + if (format->align > 1) { + row_size = jxl::DivCeil(row_size, format->align) * format->align; + } + *size = row_size * (ysize - 1) + last_row_size; + return JXL_DEC_SUCCESS; +} + +JXL_EXPORT JxlDecoderStatus JxlDecoderSetPreviewOutBuffer( + JxlDecoder* dec, const JxlPixelFormat* format, void* buffer, size_t size) { + if (!dec->got_basic_info || !dec->metadata.m.have_preview || + !(dec->orig_events_wanted & JXL_DEC_PREVIEW_IMAGE)) { + return JXL_API_ERROR("No preview out buffer needed at this time"); + } + if (format->num_channels < 3 && + !dec->image_metadata.color_encoding.IsGray()) { + return JXL_API_ERROR("Number of channels is too low for color output"); + } + + size_t min_size; + // This also checks whether the format is valid and supported and basic info + // is available. + JxlDecoderStatus status = + JxlDecoderPreviewOutBufferSize(dec, format, &min_size); + if (status != JXL_DEC_SUCCESS) return status; + + if (size < min_size) return JXL_DEC_ERROR; + + dec->image_out_buffer_set = true; + dec->image_out_buffer = buffer; + dec->image_out_size = size; + dec->image_out_format = *format; + + return JXL_DEC_SUCCESS; +} + +JXL_EXPORT JxlDecoderStatus JxlDecoderImageOutBufferSize( + const JxlDecoder* dec, const JxlPixelFormat* format, size_t* size) { + size_t bits; + JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits); + if (status != JXL_DEC_SUCCESS) return status; + if (format->num_channels < 3 && + !dec->image_metadata.color_encoding.IsGray()) { + return JXL_API_ERROR("Number of channels is too low for color output"); + } + size_t xsize, ysize; + GetCurrentDimensions(dec, xsize, ysize); + size_t row_size = + jxl::DivCeil(xsize * format->num_channels * bits, jxl::kBitsPerByte); + if (format->align > 1) { + row_size = jxl::DivCeil(row_size, format->align) * format->align; + } + *size = row_size * ysize; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetImageOutBuffer(JxlDecoder* dec, + const JxlPixelFormat* format, + void* buffer, size_t size) { + if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) { + return JXL_API_ERROR("No image out buffer needed at this time"); + } + if (dec->image_out_buffer_set && !!dec->image_out_run_callback) { + return JXL_API_ERROR( + "Cannot change from image out callback to image out buffer"); + } + if (format->num_channels < 3 && + !dec->image_metadata.color_encoding.IsGray()) { + return JXL_API_ERROR("Number of channels is too low for color output"); + } + size_t min_size; + // This also checks whether the format is valid and supported and basic info + // is available. + JxlDecoderStatus status = + JxlDecoderImageOutBufferSize(dec, format, &min_size); + if (status != JXL_DEC_SUCCESS) return status; + + if (size < min_size) return JXL_DEC_ERROR; + + dec->image_out_buffer_set = true; + dec->image_out_buffer = buffer; + dec->image_out_size = size; + dec->image_out_format = *format; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderExtraChannelBufferSize(const JxlDecoder* dec, + const JxlPixelFormat* format, + size_t* size, + uint32_t index) { + if (!dec->got_basic_info || !(dec->orig_events_wanted & JXL_DEC_FULL_IMAGE)) { + return JXL_API_ERROR("No extra channel buffer needed at this time"); + } + + if (index >= dec->metadata.m.num_extra_channels) { + return JXL_API_ERROR("Invalid extra channel index"); + } + + size_t num_channels = 1; // Do not use format's num_channels + + size_t bits; + JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits); + if (status != JXL_DEC_SUCCESS) return status; + + size_t xsize, ysize; + GetCurrentDimensions(dec, xsize, ysize); + size_t row_size = + jxl::DivCeil(xsize * num_channels * bits, jxl::kBitsPerByte); + if (format->align > 1) { + row_size = jxl::DivCeil(row_size, format->align) * format->align; + } + *size = row_size * ysize; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetExtraChannelBuffer(JxlDecoder* dec, + const JxlPixelFormat* format, + void* buffer, size_t size, + uint32_t index) { + size_t min_size; + // This also checks whether the format and index are valid and supported and + // basic info is available. + JxlDecoderStatus status = + JxlDecoderExtraChannelBufferSize(dec, format, &min_size, index); + if (status != JXL_DEC_SUCCESS) return status; + + if (size < min_size) return JXL_DEC_ERROR; + + if (dec->extra_channel_output.size() <= index) { + dec->extra_channel_output.resize(dec->metadata.m.num_extra_channels, + {{}, nullptr, 0}); + } + // Guaranteed correct thanks to check in JxlDecoderExtraChannelBufferSize. + JXL_ASSERT(index < dec->extra_channel_output.size()); + + dec->extra_channel_output[index].format = *format; + dec->extra_channel_output[index].format.num_channels = 1; + dec->extra_channel_output[index].buffer = buffer; + dec->extra_channel_output[index].buffer_size = size; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetImageOutCallback(JxlDecoder* dec, + const JxlPixelFormat* format, + JxlImageOutCallback callback, + void* opaque) { + dec->simple_image_out_callback.callback = callback; + dec->simple_image_out_callback.opaque = opaque; + const auto init_callback = + +[](void* init_opaque, size_t num_threads, size_t num_pixels_per_thread) { + // No initialization to do, just reuse init_opaque as run_opaque. + return init_opaque; + }; + const auto run_callback = + +[](void* run_opaque, size_t thread_id, size_t x, size_t y, + size_t num_pixels, const void* pixels) { + const auto* const simple_callback = + static_cast(run_opaque); + simple_callback->callback(simple_callback->opaque, x, y, num_pixels, + pixels); + }; + const auto destroy_callback = +[](void* run_opaque) {}; + return JxlDecoderSetMultithreadedImageOutCallback( + dec, format, init_callback, run_callback, + /*destroy_callback=*/destroy_callback, &dec->simple_image_out_callback); +} + +JxlDecoderStatus JxlDecoderSetMultithreadedImageOutCallback( + JxlDecoder* dec, const JxlPixelFormat* format, + JxlImageOutInitCallback init_callback, JxlImageOutRunCallback run_callback, + JxlImageOutDestroyCallback destroy_callback, void* init_opaque) { + if (dec->image_out_buffer_set && !!dec->image_out_buffer) { + return JXL_API_ERROR( + "Cannot change from image out buffer to image out callback"); + } + + if (init_callback == nullptr || run_callback == nullptr || + destroy_callback == nullptr) { + return JXL_API_ERROR("All callbacks are required"); + } + + // Perform error checking for invalid format. + size_t bits_dummy; + JxlDecoderStatus status = PrepareSizeCheck(dec, format, &bits_dummy); + if (status != JXL_DEC_SUCCESS) return status; + + dec->image_out_buffer_set = true; + dec->image_out_init_callback = init_callback; + dec->image_out_run_callback = run_callback; + dec->image_out_destroy_callback = destroy_callback; + dec->image_out_init_opaque = init_opaque; + dec->image_out_format = *format; + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetFrameHeader(const JxlDecoder* dec, + JxlFrameHeader* header) { + if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) { + return JXL_API_ERROR("no frame header available"); + } + const auto& metadata = dec->metadata.m; + memset(header, 0, sizeof(*header)); + if (metadata.have_animation) { + header->duration = dec->frame_header->animation_frame.duration; + if (metadata.animation.have_timecodes) { + header->timecode = dec->frame_header->animation_frame.timecode; + } + } + header->name_length = dec->frame_header->name.size(); + header->is_last = dec->frame_header->is_last; + size_t xsize, ysize; + GetCurrentDimensions(dec, xsize, ysize); + header->layer_info.xsize = xsize; + header->layer_info.ysize = ysize; + if (!dec->coalescing && dec->frame_header->custom_size_or_origin) { + header->layer_info.crop_x0 = dec->frame_header->frame_origin.x0; + header->layer_info.crop_y0 = dec->frame_header->frame_origin.y0; + header->layer_info.have_crop = JXL_TRUE; + } else { + header->layer_info.crop_x0 = 0; + header->layer_info.crop_y0 = 0; + header->layer_info.have_crop = JXL_FALSE; + } + if (!dec->keep_orientation && !dec->coalescing) { + // orient the crop offset + size_t W = dec->metadata.oriented_xsize(false); + size_t H = dec->metadata.oriented_ysize(false); + if (metadata.orientation > 4) { + std::swap(header->layer_info.crop_x0, header->layer_info.crop_y0); + } + size_t o = (metadata.orientation - 1) & 3; + if (o > 0 && o < 3) { + header->layer_info.crop_x0 = W - xsize - header->layer_info.crop_x0; + } + if (o > 1) { + header->layer_info.crop_y0 = H - ysize - header->layer_info.crop_y0; + } + } + if (dec->coalescing) { + header->layer_info.blend_info.blendmode = JXL_BLEND_REPLACE; + header->layer_info.blend_info.source = 0; + header->layer_info.blend_info.alpha = 0; + header->layer_info.blend_info.clamp = JXL_FALSE; + header->layer_info.save_as_reference = 0; + } else { + header->layer_info.blend_info.blendmode = + static_cast(dec->frame_header->blending_info.mode); + header->layer_info.blend_info.source = + dec->frame_header->blending_info.source; + header->layer_info.blend_info.alpha = + dec->frame_header->blending_info.alpha_channel; + header->layer_info.blend_info.clamp = + dec->frame_header->blending_info.clamp; + header->layer_info.save_as_reference = dec->frame_header->save_as_reference; + } + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetExtraChannelBlendInfo(const JxlDecoder* dec, + size_t index, + JxlBlendInfo* blend_info) { + if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) { + return JXL_API_ERROR("no frame header available"); + } + const auto& metadata = dec->metadata.m; + if (index >= metadata.num_extra_channels) { + return JXL_API_ERROR("Invalid extra channel index"); + } + blend_info->blendmode = static_cast( + dec->frame_header->extra_channel_blending_info[index].mode); + blend_info->source = + dec->frame_header->extra_channel_blending_info[index].source; + blend_info->alpha = + dec->frame_header->extra_channel_blending_info[index].alpha_channel; + blend_info->clamp = + dec->frame_header->extra_channel_blending_info[index].clamp; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetFrameName(const JxlDecoder* dec, char* name, + size_t size) { + if (!dec->frame_header || dec->frame_stage == FrameStage::kHeader) { + return JXL_API_ERROR("no frame header available"); + } + if (size < dec->frame_header->name.size() + 1) { + return JXL_API_ERROR("too small frame name output buffer"); + } + memcpy(name, dec->frame_header->name.c_str(), + dec->frame_header->name.size() + 1); + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetPreferredColorProfile( + JxlDecoder* dec, const JxlColorEncoding* color_encoding) { + if (!dec->got_all_headers) { + return JXL_API_ERROR("color info not yet available"); + } + if (dec->post_headers) { + return JXL_API_ERROR("too late to set the color encoding"); + } + if (dec->image_metadata.color_encoding.IsGray() && + color_encoding->color_space != JXL_COLOR_SPACE_GRAY && + dec->image_out_buffer_set && dec->image_out_format.num_channels < 3) { + return JXL_API_ERROR("Number of channels is too low for color output"); + } + if (color_encoding->color_space == JXL_COLOR_SPACE_UNKNOWN) { + return JXL_API_ERROR("Unknown output colorspace"); + } + jxl::ColorEncoding c_out; + JXL_API_RETURN_IF_ERROR( + ConvertExternalToInternalColorEncoding(*color_encoding, &c_out)); + JXL_API_RETURN_IF_ERROR(!c_out.ICC().empty()); + auto& output_encoding = dec->passes_state->output_encoding_info; + if (!c_out.SameColorEncoding(output_encoding.color_encoding)) { + JXL_API_RETURN_IF_ERROR(output_encoding.MaybeSetColorEncoding(c_out)); + dec->image_metadata.color_encoding = output_encoding.color_encoding; + } + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetDesiredIntensityTarget( + JxlDecoder* dec, float desired_intensity_target) { + if (desired_intensity_target < 0) { + return JXL_API_ERROR("negative intensity target requested"); + } + dec->desired_intensity_target = desired_intensity_target; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetBoxBuffer(JxlDecoder* dec, uint8_t* data, + size_t size) { + if (dec->box_out_buffer_set) { + return JXL_API_ERROR("must release box buffer before setting it again"); + } + if (!dec->box_event) { + return JXL_API_ERROR("can only set box buffer after box event"); + } + + dec->box_out_buffer_set = true; + dec->box_out_buffer_set_current_box = true; + dec->box_out_buffer = data; + dec->box_out_buffer_size = size; + dec->box_out_buffer_pos = 0; + return JXL_DEC_SUCCESS; +} + +size_t JxlDecoderReleaseBoxBuffer(JxlDecoder* dec) { + if (!dec->box_out_buffer_set) { + return 0; + } + size_t result = dec->box_out_buffer_size - dec->box_out_buffer_pos; + dec->box_out_buffer_set = false; + dec->box_out_buffer = nullptr; + dec->box_out_buffer_size = 0; + if (!dec->box_out_buffer_set_current_box) { + dec->box_out_buffer_begin = 0; + } else { + dec->box_out_buffer_begin += dec->box_out_buffer_pos; + } + dec->box_out_buffer_set_current_box = false; + return result; +} + +JxlDecoderStatus JxlDecoderSetDecompressBoxes(JxlDecoder* dec, + JXL_BOOL decompress) { + // TODO(lode): return error if libbrotli is not compiled in the jxl decoding + // library + dec->decompress_boxes = decompress; + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetBoxType(JxlDecoder* dec, JxlBoxType type, + JXL_BOOL decompressed) { + if (!dec->box_event) { + return JXL_API_ERROR("can only get box info after JXL_DEC_BOX event"); + } + if (decompressed) { + memcpy(type, dec->box_decoded_type, sizeof(dec->box_decoded_type)); + } else { + memcpy(type, dec->box_type, sizeof(dec->box_type)); + } + + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderGetBoxSizeRaw(const JxlDecoder* dec, + uint64_t* size) { + if (!dec->box_event) { + return JXL_API_ERROR("can only get box info after JXL_DEC_BOX event"); + } + if (size) { + *size = dec->box_size; + } + return JXL_DEC_SUCCESS; +} + +JxlDecoderStatus JxlDecoderSetProgressiveDetail(JxlDecoder* dec, + JxlProgressiveDetail detail) { + if (detail != kDC && detail != kLastPasses && detail != kPasses) { + return JXL_API_ERROR( + "Values other than kDC (%d), kLastPasses (%d) and kPasses (%d), " + "like %d are not implemented.", + kDC, kLastPasses, kPasses, detail); + } + dec->prog_detail = detail; + return JXL_DEC_SUCCESS; +} + +namespace { + +template +JxlDecoderStatus VerifyOutputBitDepth(JxlBitDepth bit_depth, const T& metadata, + JxlPixelFormat format) { + uint32_t bits_per_sample = GetBitDepth(bit_depth, metadata, format); + if (bits_per_sample == 0) return JXL_API_ERROR("Invalid output bit depth"); + if (format.data_type == JXL_TYPE_UINT8 && bits_per_sample > 8) { + return JXL_API_ERROR("Invalid bit depth %u for uint8 output", + bits_per_sample); + } else if (format.data_type == JXL_TYPE_UINT16 && bits_per_sample > 16) { + return JXL_API_ERROR("Invalid bit depth %u for uint16 output", + bits_per_sample); + } + return JXL_DEC_SUCCESS; +} + +} // namespace + +JxlDecoderStatus JxlDecoderSetImageOutBitDepth(JxlDecoder* dec, + const JxlBitDepth* bit_depth) { + if (!dec->image_out_buffer_set) { + return JXL_API_ERROR("No image out buffer was set."); + } + JXL_API_RETURN_IF_ERROR( + VerifyOutputBitDepth(*bit_depth, dec->metadata.m, dec->image_out_format)); + dec->image_out_bit_depth = *bit_depth; + return JXL_DEC_SUCCESS; +} diff --git a/third-party/libjxl/libjxl/lib/jxl/decode_test.cc b/third-party/libjxl/libjxl/lib/jxl/decode_test.cc new file mode 100644 index 0000000000..da647958a0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/decode_test.cc @@ -0,0 +1,5506 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/extras/dec/color_description.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_icc_codec.h" +#include "lib/jxl/enc_progressive_split.h" +#include "lib/jxl/encode_internal.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/icc_codec.h" +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/jpeg/enc_jpeg_data.h" +#include "lib/jxl/test_image.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" +#include "lib/jxl/toc.h" + +//////////////////////////////////////////////////////////////////////////////// + +namespace { +void AppendU32BE(uint32_t u32, jxl::PaddedBytes* bytes) { + bytes->push_back(u32 >> 24); + bytes->push_back(u32 >> 16); + bytes->push_back(u32 >> 8); + bytes->push_back(u32 >> 0); +} + +// What type of codestream format in the boxes to use for testing +enum CodeStreamBoxFormat { + // Do not use box format at all, only pure codestream + kCSBF_None, + // Have a single codestream box, with its actual size given in the box + kCSBF_Single, + // Have a single codestream box, with box size 0 (final box running to end) + kCSBF_Single_Zero_Terminated, + // Single codestream box, with another unknown box behind it + kCSBF_Single_Other, + // Have multiple partial codestream boxes + kCSBF_Multi, + // Have multiple partial codestream boxes, with final box size 0 (running + // to end) + kCSBF_Multi_Zero_Terminated, + // Have multiple partial codestream boxes, terminated by non-codestream box + kCSBF_Multi_Other_Terminated, + // Have multiple partial codestream boxes, terminated by non-codestream box + // that has its size set to 0 (running to end) + kCSBF_Multi_Other_Zero_Terminated, + // Have multiple partial codestream boxes, and the first one has a content + // of zero length + kCSBF_Multi_First_Empty, + // Have multiple partial codestream boxes, and the last one has a content + // of zero length and there is an unknown empty box at the end + kCSBF_Multi_Last_Empty_Other, + // Have a compressed exif box before a regular codestream box + kCSBF_Brob_Exif, + // Not a value but used for counting amount of enum entries + kCSBF_NUM_ENTRIES, +}; + +// Unknown boxes for testing +static const char* unk1_box_type = "unk1"; +static const char* unk1_box_contents = "abcdefghijklmnopqrstuvwxyz"; +static const size_t unk1_box_size = strlen(unk1_box_contents); +static const char* unk2_box_type = "unk2"; +static const char* unk2_box_contents = "0123456789"; +static const size_t unk2_box_size = strlen(unk2_box_contents); +static const char* unk3_box_type = "unk3"; +static const char* unk3_box_contents = "ABCDEF123456"; +static const size_t unk3_box_size = strlen(unk3_box_contents); +// Box with brob-compressed exif, including header +static const uint8_t* box_brob_exif = reinterpret_cast( + "\0\0\0@brobExif\241\350\2\300\177\244v\2525\304\360\27=?\267{" + "\33\37\314\332\214QX17PT\"\256\0\0\202s\214\313t\333\310\320k\20\276\30" + "\204\277l$\326c#\1\b"); +size_t box_brob_exif_size = 64; +// The uncompressed Exif data from the brob box +static const uint8_t* exif_uncompressed = reinterpret_cast( + "\0\0\0\0MM\0*" + "\0\0\0\b\0\5\1\22\0\3\0\0\0\1\0\5\0\0\1\32\0\5\0\0\0\1\0\0\0J\1\33\0\5\0\0" + "\0\1\0\0\0R\1(" + "\0\3\0\0\0\1\0\1\0\0\2\23\0\3\0\0\0\1\0\1\0\0\0\0\0\0\0\0\0\1\0\0\0\1\0\0" + "\0\1\0\0\0\1"); +size_t exif_uncompressed_size = 94; + +// Returns an ICC profile output by the JPEG XL decoder for RGB_D65_SRG_Rel_Lin, +// but with, on purpose, rXYZ, bXYZ and gXYZ (the RGB primaries) switched to a +// different order to ensure the profile does not match any known profile, so +// the encoder cannot encode it in a compact struct instead. +jxl::PaddedBytes GetIccTestProfile() { + const uint8_t* profile = reinterpret_cast( + "\0\0\3\200lcms\0040\0\0mntrRGB XYZ " + "\a\344\0\a\0\27\0\21\0$" + "\0\37acspAPPL\0\0\0\1\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1\0\0\366" + "\326\0\1\0\0\0\0\323-lcms\372c\207\36\227\200{" + "\2\232s\255\327\340\0\n\26\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\rdesc\0\0\1 " + "\0\0\0Bcprt\0\0\1d\0\0\1\0wtpt\0\0\2d\0\0\0\24chad\0\0\2x\0\0\0," + "bXYZ\0\0\2\244\0\0\0\24gXYZ\0\0\2\270\0\0\0\24rXYZ\0\0\2\314\0\0\0\24rTR" + "C\0\0\2\340\0\0\0 gTRC\0\0\2\340\0\0\0 bTRC\0\0\2\340\0\0\0 " + "chrm\0\0\3\0\0\0\0$dmnd\0\0\3$\0\0\0(" + "dmdd\0\0\3L\0\0\0002mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0&" + "\0\0\0\34\0R\0G\0B\0_\0D\0006\0005\0_\0S\0R\0G\0_\0R\0e\0l\0_" + "\0L\0i\0n\0\0mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\344\0\0\0\34\0C\0o\0" + "p\0y\0r\0i\0g\0h\0t\0 \0002\0000\0001\08\0 \0G\0o\0o\0g\0l\0e\0 " + "\0L\0L\0C\0,\0 \0C\0C\0-\0B\0Y\0-\0S\0A\0 \0003\0.\0000\0 " + "\0U\0n\0p\0o\0r\0t\0e\0d\0 " + "\0l\0i\0c\0e\0n\0s\0e\0(\0h\0t\0t\0p\0s\0:\0/\0/" + "\0c\0r\0e\0a\0t\0i\0v\0e\0c\0o\0m\0m\0o\0n\0s\0.\0o\0r\0g\0/" + "\0l\0i\0c\0e\0n\0s\0e\0s\0/\0b\0y\0-\0s\0a\0/\0003\0.\0000\0/" + "\0l\0e\0g\0a\0l\0c\0o\0d\0e\0)XYZ " + "\0\0\0\0\0\0\366\326\0\1\0\0\0\0\323-" + "sf32\0\0\0\0\0\1\fB\0\0\5\336\377\377\363%" + "\0\0\a\223\0\0\375\220\377\377\373\241\377\377\375\242\0\0\3\334\0\0\300" + "nXYZ \0\0\0\0\0\0o\240\0\08\365\0\0\3\220XYZ " + "\0\0\0\0\0\0$\237\0\0\17\204\0\0\266\304XYZ " + "\0\0\0\0\0\0b\227\0\0\267\207\0\0\30\331para\0\0\0\0\0\3\0\0\0\1\0\0\0\1" + "\0\0\0\0\0\0\0\1\0\0\0\0\0\0chrm\0\0\0\0\0\3\0\0\0\0\243\327\0\0T|" + "\0\0L\315\0\0\231\232\0\0&" + "g\0\0\17\\mluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\f\0\0\0\34\0G\0o\0o\0g" + "\0l\0emluc\0\0\0\0\0\0\0\1\0\0\0\fenUS\0\0\0\26\0\0\0\34\0I\0m\0a\0g\0e" + "\0 \0c\0o\0d\0e\0c\0\0"); + size_t profile_size = 896; + jxl::PaddedBytes icc_profile; + icc_profile.assign(profile, profile + profile_size); + return icc_profile; +} + +} // namespace + +namespace jxl { +namespace { + +void AppendTestBox(const char* type, const char* contents, size_t contents_size, + bool unbounded, PaddedBytes* bytes) { + AppendU32BE(contents_size + 8, bytes); + bytes->push_back(type[0]); + bytes->push_back(type[1]); + bytes->push_back(type[2]); + bytes->push_back(type[3]); + const uint8_t* contents_u = reinterpret_cast(contents); + bytes->append(contents_u, contents_u + contents_size); +} + +enum PreviewMode { + kNoPreview, + kSmallPreview, + kBigPreview, + kNumPreviewModes, +}; + +void GeneratePreview(PreviewMode preview_mode, ImageBundle* ib) { + if (preview_mode == kSmallPreview) { + ib->ShrinkTo(ib->xsize() / 7, ib->ysize() / 7); + } else if (preview_mode == kBigPreview) { + auto upsample7 = [&](const ImageF& in, ImageF* out) { + for (size_t y = 0; y < out->ysize(); ++y) { + for (size_t x = 0; x < out->xsize(); ++x) { + out->Row(y)[x] = in.ConstRow(y / 7)[x / 7]; + } + } + }; + Image3F preview(ib->xsize() * 7, ib->ysize() * 7); + for (size_t c = 0; c < 3; ++c) { + upsample7(ib->color()->Plane(c), &preview.Plane(c)); + } + std::vector extra_channels; + for (size_t i = 0; i < ib->extra_channels().size(); ++i) { + ImageF ec(ib->xsize() * 7, ib->ysize() * 7); + upsample7(ib->extra_channels()[i], &ec); + extra_channels.emplace_back(std::move(ec)); + } + ib->RemoveColor(); + ib->ClearExtraChannels(); + ib->SetFromImage(std::move(preview), ib->c_current()); + ib->SetExtraChannels(std::move(extra_channels)); + } +} + +struct TestCodestreamParams { + CompressParams cparams; + CodeStreamBoxFormat box_format = kCSBF_None; + JxlOrientation orientation = JXL_ORIENT_IDENTITY; + PreviewMode preview_mode = kNoPreview; + bool add_intrinsic_size = false; + bool add_icc_profile = false; + float intensity_target = 0.0; + std::string color_space; + PaddedBytes* jpeg_codestream = nullptr; + const ProgressiveMode* progressive_mode = nullptr; +}; + +// Input pixels always given as 16-bit RGBA, 8 bytes per pixel. +// include_alpha determines if the encoded image should contain the alpha +// channel. +// add_icc_profile: if false, encodes the image as sRGB using the JXL fields, +// for grayscale or RGB images. If true, encodes the image using the ICC profile +// returned by GetIccTestProfile, without the JXL fields, this requires the +// image is RGB, not grayscale. +// Providing jpeg_codestream will populate the jpeg_codestream with compressed +// JPEG bytes, and make it possible to reconstruct those exact JPEG bytes using +// the return value _if_ add_container indicates a box format. +PaddedBytes CreateTestJXLCodestream(Span pixels, size_t xsize, + size_t ysize, size_t num_channels, + const TestCodestreamParams& params) { + // Compress the pixels with JPEG XL. + bool grayscale = (num_channels <= 2); + bool include_alpha = !(num_channels & 1) && params.jpeg_codestream == nullptr; + size_t bitdepth = params.jpeg_codestream == nullptr ? 16 : 8; + CodecInOut io; + io.SetSize(xsize, ysize); + ColorEncoding color_encoding; + if (params.add_icc_profile) { + // the hardcoded ICC profile we attach requires RGB. + EXPECT_EQ(false, grayscale); + EXPECT_TRUE(params.color_space.empty()); + EXPECT_TRUE(color_encoding.SetICC(GetIccTestProfile(), &GetJxlCms())); + } else if (!params.color_space.empty()) { + JxlColorEncoding c; + EXPECT_TRUE(jxl::ParseDescription(params.color_space, &c)); + EXPECT_TRUE(ConvertExternalToInternalColorEncoding(c, &color_encoding)); + EXPECT_EQ(color_encoding.IsGray(), grayscale); + } else { + color_encoding = jxl::ColorEncoding::SRGB(/*is_gray=*/grayscale); + } + ThreadPool pool(nullptr, nullptr); + io.metadata.m.SetUintSamples(bitdepth); + if (include_alpha) { + io.metadata.m.SetAlphaBits(bitdepth); + } + if (params.intensity_target != 0) { + io.metadata.m.SetIntensityTarget(params.intensity_target); + } + JxlPixelFormat format = {static_cast(num_channels), JXL_TYPE_UINT16, + JXL_BIG_ENDIAN, 0}; + // Make the grayscale-ness of the io metadata color_encoding and the packed + // image match. + io.metadata.m.color_encoding = color_encoding; + EXPECT_TRUE(ConvertFromExternal(pixels, xsize, ysize, color_encoding, + /*bits_per_sample=*/16, format, &pool, + &io.Main())); + jxl::PaddedBytes jpeg_data; + if (params.jpeg_codestream != nullptr) { + if (jxl::extras::CanDecode(jxl::extras::Codec::kJPG)) { + std::vector jpeg_bytes; + io.jpeg_quality = 70; + EXPECT_TRUE(Encode(io, extras::Codec::kJPG, io.metadata.m.color_encoding, + /*bits_per_sample=*/8, &jpeg_bytes, &pool)); + params.jpeg_codestream->append(jpeg_bytes.data(), + jpeg_bytes.data() + jpeg_bytes.size()); + EXPECT_TRUE(jxl::jpeg::DecodeImageJPG( + jxl::Span(jpeg_bytes.data(), jpeg_bytes.size()), &io)); + EXPECT_TRUE( + EncodeJPEGData(*io.Main().jpeg_data, &jpeg_data, params.cparams)); + io.metadata.m.xyb_encoded = false; + } else { + JXL_ABORT( + "unable to create reconstructible JPEG without JPEG support enabled"); + } + } + if (params.preview_mode) { + io.preview_frame = io.Main().Copy(); + GeneratePreview(params.preview_mode, &io.preview_frame); + io.metadata.m.have_preview = true; + EXPECT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(), + io.preview_frame.ysize())); + } + if (params.add_intrinsic_size) { + EXPECT_TRUE(io.metadata.m.intrinsic_size.Set(xsize / 3, ysize / 3)); + } + io.metadata.m.orientation = params.orientation; + AuxOut aux_out; + PaddedBytes compressed; + PassesEncoderState enc_state; + if (params.progressive_mode) { + enc_state.progressive_splitter.SetProgressiveMode(*params.progressive_mode); + } + EXPECT_TRUE(EncodeFile(params.cparams, &io, &enc_state, &compressed, + GetJxlCms(), &aux_out, &pool)); + CodeStreamBoxFormat add_container = params.box_format; + if (add_container != kCSBF_None) { + // Header with signature box and ftyp box. + const uint8_t header[] = {0, 0, 0, 0xc, 0x4a, 0x58, 0x4c, 0x20, + 0xd, 0xa, 0x87, 0xa, 0, 0, 0, 0x14, + 0x66, 0x74, 0x79, 0x70, 0x6a, 0x78, 0x6c, 0x20, + 0, 0, 0, 0, 0x6a, 0x78, 0x6c, 0x20}; + + bool is_multi = add_container == kCSBF_Multi || + add_container == kCSBF_Multi_Zero_Terminated || + add_container == kCSBF_Multi_Other_Terminated || + add_container == kCSBF_Multi_Other_Zero_Terminated || + add_container == kCSBF_Multi_First_Empty || + add_container == kCSBF_Multi_Last_Empty_Other; + + if (is_multi) { + size_t third = compressed.size() / 3; + std::vector compressed0(compressed.data(), + compressed.data() + third); + std::vector compressed1(compressed.data() + third, + compressed.data() + 2 * third); + std::vector compressed2(compressed.data() + 2 * third, + compressed.data() + compressed.size()); + + PaddedBytes c; + c.append(header, header + sizeof(header)); + if (params.jpeg_codestream != nullptr) { + jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false, + &c); + c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size()); + } + uint32_t jxlp_index = 0; + if (add_container == kCSBF_Multi_First_Empty) { + // Dummy (empty) codestream part + AppendU32BE(12, &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('p'); + AppendU32BE(jxlp_index++, &c); + } + // First codestream part + AppendU32BE(compressed0.size() + 12, &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('p'); + AppendU32BE(jxlp_index++, &c); + c.append(compressed0.data(), compressed0.data() + compressed0.size()); + // A few non-codestream boxes in between + AppendTestBox(unk1_box_type, unk1_box_contents, unk1_box_size, false, &c); + AppendTestBox(unk2_box_type, unk2_box_contents, unk2_box_size, false, &c); + // Dummy (empty) codestream part + AppendU32BE(12, &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('p'); + AppendU32BE(jxlp_index++, &c); + // Second codestream part + AppendU32BE(compressed1.size() + 12, &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('p'); + AppendU32BE(jxlp_index++, &c); + c.append(compressed1.data(), compressed1.data() + compressed1.size()); + // Third (last) codestream part + AppendU32BE(add_container == kCSBF_Multi_Zero_Terminated + ? 0 + : (compressed2.size() + 12), + &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('p'); + if (add_container != kCSBF_Multi_Last_Empty_Other) { + AppendU32BE(jxlp_index++ | 0x80000000, &c); + } else { + AppendU32BE(jxlp_index++, &c); + } + c.append(compressed2.data(), compressed2.data() + compressed2.size()); + if (add_container == kCSBF_Multi_Last_Empty_Other) { + // Dummy (empty) codestream part + AppendU32BE(12, &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('p'); + AppendU32BE(jxlp_index++ | 0x80000000, &c); + AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, false, + &c); + } + if (add_container == kCSBF_Multi_Other_Terminated) { + AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, false, + &c); + } + if (add_container == kCSBF_Multi_Other_Zero_Terminated) { + AppendTestBox(unk3_box_type, unk3_box_contents, unk3_box_size, true, + &c); + } + compressed.swap(c); + } else { + PaddedBytes c; + c.append(header, header + sizeof(header)); + if (params.jpeg_codestream != nullptr) { + jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false, + &c); + c.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size()); + } + if (add_container == kCSBF_Brob_Exif) { + c.append(box_brob_exif, box_brob_exif + box_brob_exif_size); + } + AppendU32BE(add_container == kCSBF_Single_Zero_Terminated + ? 0 + : (compressed.size() + 8), + &c); + c.push_back('j'); + c.push_back('x'); + c.push_back('l'); + c.push_back('c'); + c.append(compressed.data(), compressed.data() + compressed.size()); + if (add_container == kCSBF_Single_Other) { + AppendTestBox(unk1_box_type, unk1_box_contents, unk1_box_size, false, + &c); + } + compressed.swap(c); + } + } + + return compressed; +} + +JxlDecoderStatus ProcessInputIgnoreBoxes(JxlDecoder* dec) { + JxlDecoderStatus status = JXL_DEC_BOX; + while (status == JXL_DEC_BOX) { + status = JxlDecoderProcessInput(dec); + } + return status; +} + +// Decodes one-shot with the API for non-streaming decoding tests. +std::vector DecodeWithAPI(JxlDecoder* dec, + Span compressed, + const JxlPixelFormat& format, + bool use_callback, bool set_buffer_early, + bool use_resizable_runner, + bool require_boxes, bool expect_success, + PaddedBytes* icc = nullptr) { + JxlThreadParallelRunnerPtr runner_fixed; + JxlResizableParallelRunnerPtr runner_resizable; + JxlParallelRunner runner_fn; + void* runner; + + if (use_resizable_runner) { + runner_resizable = JxlResizableParallelRunnerMake(nullptr); + runner = runner_resizable.get(); + runner_fn = JxlResizableParallelRunner; + } else { + size_t hw_threads = JxlThreadParallelRunnerDefaultNumWorkerThreads(); + runner_fixed = + JxlThreadParallelRunnerMake(nullptr, std::min(hw_threads, 16)); + runner = runner_fixed.get(); + runner_fn = JxlThreadParallelRunner; + } + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetParallelRunner(dec, runner_fn, runner)); + + auto process_input = + require_boxes ? ProcessInputIgnoreBoxes : JxlDecoderProcessInput; + + EXPECT_EQ( + JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | (set_buffer_early ? JXL_DEC_FRAME : 0) | + JXL_DEC_PREVIEW_IMAGE | JXL_DEC_FULL_IMAGE | + (require_boxes ? JXL_DEC_BOX : 0) | + (icc != nullptr ? JXL_DEC_COLOR_ENCODING : 0))); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, process_input(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + if (use_resizable_runner) { + JxlResizableParallelRunnerSetThreads( + runner, + JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize)); + } + + std::vector pixels(buffer_size); + size_t bytes_per_pixel = format.num_channels * + test::GetDataBits(format.data_type) / + jxl::kBitsPerByte; + size_t stride = bytes_per_pixel * info.xsize; + if (format.align > 1) { + stride = jxl::DivCeil(stride, format.align) * format.align; + } + auto callback = [&](size_t x, size_t y, size_t num_pixels, + const void* pixels_row) { + memcpy(pixels.data() + stride * y + bytes_per_pixel * x, pixels_row, + num_pixels * bytes_per_pixel); + }; + + JxlDecoderStatus status = process_input(dec); + + if (status == JXL_DEC_COLOR_ENCODING) { + size_t icc_size = 0; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA, + &icc_size)); + icc->resize(icc_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsICCProfile(dec, JXL_COLOR_PROFILE_TARGET_DATA, + icc->data(), icc_size)); + + status = process_input(dec); + } + + std::vector preview; + if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) { + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)); + preview.resize(buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(), + preview.size())); + EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, process_input(dec)); + + status = process_input(dec); + } + + if (set_buffer_early) { + EXPECT_EQ(JXL_DEC_FRAME, status); + } else { + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, status); + } + + if (use_callback) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutCallback( + dec, &format, + [](void* opaque, size_t x, size_t y, size_t xsize, + const void* pixels_row) { + auto cb = static_cast(opaque); + (*cb)(x, y, xsize, pixels_row); + }, + /*opaque=*/&callback)); + } else { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + } + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, process_input(dec)); + + // After the full image was output, JxlDecoderProcessInput should return + // success to indicate all is done, unless we requested boxes and the last + // box was not a terminal unbounded box, in which case it should ask for + // more input. + JxlDecoderStatus expected_status = + expect_success ? JXL_DEC_SUCCESS : JXL_DEC_NEED_MORE_INPUT; + EXPECT_EQ(expected_status, process_input(dec)); + + return pixels; +} + +// Decodes one-shot with the API for non-streaming decoding tests. +std::vector DecodeWithAPI(Span compressed, + const JxlPixelFormat& format, + bool use_callback, bool set_buffer_early, + bool use_resizable_runner, + bool require_boxes, bool expect_success) { + JxlDecoder* dec = JxlDecoderCreate(NULL); + std::vector pixels = + DecodeWithAPI(dec, compressed, format, use_callback, set_buffer_early, + use_resizable_runner, require_boxes, expect_success); + JxlDecoderDestroy(dec); + return pixels; +} + +} // namespace +} // namespace jxl + +//////////////////////////////////////////////////////////////////////////////// + +TEST(DecodeTest, JxlSignatureCheckTest) { + std::vector>> tests = { + // No JPEGXL header starts with 'a'. + {JXL_SIG_INVALID, {'a'}}, + {JXL_SIG_INVALID, {'a', 'b', 'c', 'd', 'e', 'f'}}, + + // Empty file is not enough bytes. + {JXL_SIG_NOT_ENOUGH_BYTES, {}}, + + // JPEGXL headers. + {JXL_SIG_NOT_ENOUGH_BYTES, {0xff}}, // Part of a signature. + {JXL_SIG_INVALID, {0xff, 0xD8}}, // JPEG-1 + {JXL_SIG_CODESTREAM, {0xff, 0x0a}}, + + // JPEGXL container file. + {JXL_SIG_CONTAINER, + {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0xA}}, + // Ending with invalid byte. + {JXL_SIG_INVALID, {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87, 0}}, + // Part of signature. + {JXL_SIG_NOT_ENOUGH_BYTES, + {0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xD, 0xA, 0x87}}, + {JXL_SIG_NOT_ENOUGH_BYTES, {0}}, + }; + for (const auto& test : tests) { + EXPECT_EQ(test.first, + JxlSignatureCheck(test.second.data(), test.second.size())) + << "Where test data is " << ::testing::PrintToString(test.second); + } +} + +TEST(DecodeTest, DefaultAllocTest) { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_NE(nullptr, dec); + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, CustomAllocTest) { + struct CalledCounters { + int allocs = 0; + int frees = 0; + } counters; + + JxlMemoryManager mm; + mm.opaque = &counters; + mm.alloc = [](void* opaque, size_t size) { + reinterpret_cast(opaque)->allocs++; + return malloc(size); + }; + mm.free = [](void* opaque, void* address) { + reinterpret_cast(opaque)->frees++; + free(address); + }; + + JxlDecoder* dec = JxlDecoderCreate(&mm); + EXPECT_NE(nullptr, dec); + EXPECT_LE(1, counters.allocs); + EXPECT_EQ(0, counters.frees); + JxlDecoderDestroy(dec); + EXPECT_LE(1, counters.frees); +} + +// TODO(lode): add multi-threaded test when multithreaded pixel decoding from +// API is implemented. +TEST(DecodeTest, DefaultParallelRunnerTest) { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_NE(nullptr, dec); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetParallelRunner(dec, nullptr, nullptr)); + JxlDecoderDestroy(dec); +} + +// Creates the header of a JPEG XL file with various custom parameters for +// testing. +// xsize, ysize: image dimensions to store in the SizeHeader, max 512. +// bits_per_sample, orientation: a selection of header parameters to test with. +// orientation: image orientation to set in the metadata +// alpha_bits: if non-0, alpha extra channel bits to set in the metadata. Also +// gives the alpha channel the name "alpha_test" +// have_container: add box container format around the codestream. +// metadata_default: if true, ImageMetadata is set to default and +// bits_per_sample, orientation and alpha_bits are ignored. +// insert_box: insert an extra box before the codestream box, making the header +// farther away from the front than is ideal. Only used if have_container. +std::vector GetTestHeader(size_t xsize, size_t ysize, + size_t bits_per_sample, size_t orientation, + size_t alpha_bits, bool xyb_encoded, + bool have_container, bool metadata_default, + bool insert_extra_box, + const jxl::PaddedBytes& icc_profile) { + jxl::BitWriter writer; + jxl::BitWriter::Allotment allotment(&writer, 65536); // Large enough + + if (have_container) { + const std::vector signature_box = {0, 0, 0, 0xc, 'J', 'X', + 'L', ' ', 0xd, 0xa, 0x87, 0xa}; + const std::vector filetype_box = { + 0, 0, 0, 0x14, 'f', 't', 'y', 'p', 'j', 'x', + 'l', ' ', 0, 0, 0, 0, 'j', 'x', 'l', ' '}; + const std::vector extra_box_header = {0, 0, 0, 0xff, + 't', 'e', 's', 't'}; + // Beginning of codestream box, with an arbitrary size certainly large + // enough to contain the header + const std::vector codestream_box_header = {0, 0, 0, 0xff, + 'j', 'x', 'l', 'c'}; + + for (size_t i = 0; i < signature_box.size(); i++) { + writer.Write(8, signature_box[i]); + } + for (size_t i = 0; i < filetype_box.size(); i++) { + writer.Write(8, filetype_box[i]); + } + if (insert_extra_box) { + for (size_t i = 0; i < extra_box_header.size(); i++) { + writer.Write(8, extra_box_header[i]); + } + for (size_t i = 0; i < 255 - 8; i++) { + writer.Write(8, 0); + } + } + for (size_t i = 0; i < codestream_box_header.size(); i++) { + writer.Write(8, codestream_box_header[i]); + } + } + + // JXL signature + writer.Write(8, 0xff); + writer.Write(8, 0x0a); + + // SizeHeader + jxl::CodecMetadata metadata; + EXPECT_TRUE(metadata.size.Set(xsize, ysize)); + EXPECT_TRUE(WriteSizeHeader(metadata.size, &writer, 0, nullptr)); + + if (!metadata_default) { + metadata.m.SetUintSamples(bits_per_sample); + metadata.m.orientation = orientation; + metadata.m.SetAlphaBits(alpha_bits); + metadata.m.xyb_encoded = xyb_encoded; + if (alpha_bits != 0) { + metadata.m.extra_channel_info[0].name = "alpha_test"; + } + } + + if (!icc_profile.empty()) { + jxl::PaddedBytes copy = icc_profile; + EXPECT_TRUE( + metadata.m.color_encoding.SetICC(std::move(copy), &jxl::GetJxlCms())); + } + + EXPECT_TRUE(jxl::Bundle::Write(metadata.m, &writer, 0, nullptr)); + metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded; + EXPECT_TRUE(jxl::Bundle::Write(metadata.transform_data, &writer, 0, nullptr)); + + if (!icc_profile.empty()) { + EXPECT_TRUE(metadata.m.color_encoding.WantICC()); + EXPECT_TRUE(jxl::WriteICC(icc_profile, &writer, 0, nullptr)); + } + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + return std::vector( + writer.GetSpan().data(), + writer.GetSpan().data() + writer.GetSpan().size()); +} + +TEST(DecodeTest, BasicInfoTest) { + size_t xsize[2] = {50, 33}; + size_t ysize[2] = {50, 77}; + size_t bits_per_sample[2] = {8, 23}; + size_t orientation[2] = {3, 5}; + size_t alpha_bits[2] = {0, 8}; + JXL_BOOL have_container[2] = {0, 1}; + bool xyb_encoded = false; + + std::vector> test_samples; + // Test with direct codestream + test_samples.push_back(GetTestHeader( + xsize[0], ysize[0], bits_per_sample[0], orientation[0], alpha_bits[0], + xyb_encoded, have_container[0], /*metadata_default=*/false, + /*insert_extra_box=*/false, {})); + // Test with container and different parameters + test_samples.push_back(GetTestHeader( + xsize[1], ysize[1], bits_per_sample[1], orientation[1], alpha_bits[1], + xyb_encoded, have_container[1], /*metadata_default=*/false, + /*insert_extra_box=*/false, {})); + + for (size_t i = 0; i < test_samples.size(); ++i) { + const std::vector& data = test_samples[i]; + // Test decoding too small header first, until we reach the final byte. + for (size_t size = 0; size <= data.size(); ++size) { + // Test with a new decoder for each tested byte size. + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO)); + const uint8_t* next_in = data.data(); + size_t avail_in = size; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + + JxlBasicInfo info; + bool have_basic_info = !JxlDecoderGetBasicInfo(dec, &info); + + if (size == data.size()) { + EXPECT_EQ(JXL_DEC_BASIC_INFO, status); + + // All header bytes given so the decoder must have the basic info. + EXPECT_EQ(true, have_basic_info); + EXPECT_EQ(have_container[i], info.have_container); + EXPECT_EQ(alpha_bits[i], info.alpha_bits); + // Orientations 5..8 swap the dimensions + if (orientation[i] >= 5) { + EXPECT_EQ(xsize[i], info.ysize); + EXPECT_EQ(ysize[i], info.xsize); + } else { + EXPECT_EQ(xsize[i], info.xsize); + EXPECT_EQ(ysize[i], info.ysize); + } + // The API should set the orientation to identity by default since it + // already applies the transformation internally by default. + EXPECT_EQ(1u, info.orientation); + + EXPECT_EQ(3u, info.num_color_channels); + + if (alpha_bits[i] != 0) { + // Expect an extra channel + EXPECT_EQ(1u, info.num_extra_channels); + JxlExtraChannelInfo extra; + EXPECT_EQ(0, JxlDecoderGetExtraChannelInfo(dec, 0, &extra)); + EXPECT_EQ(alpha_bits[i], extra.bits_per_sample); + EXPECT_EQ(JXL_CHANNEL_ALPHA, extra.type); + EXPECT_EQ(0, extra.alpha_premultiplied); + // Verify the name "alpha_test" given to the alpha channel + EXPECT_EQ(10u, extra.name_length); + char name[11]; + EXPECT_EQ(0, + JxlDecoderGetExtraChannelName(dec, 0, name, sizeof(name))); + EXPECT_EQ(std::string("alpha_test"), std::string(name)); + } else { + EXPECT_EQ(0u, info.num_extra_channels); + } + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + } else { + // If we did not give the full header, the basic info should not be + // available. Allow a few bytes of slack due to some bits for default + // opsinmatrix/extension bits. + if (size + 2 < data.size()) { + EXPECT_EQ(false, have_basic_info); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status); + } + } + + // Test that decoder doesn't allow setting a setting required at beginning + // unless it's reset + EXPECT_EQ(JXL_DEC_ERROR, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO)); + JxlDecoderReset(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO)); + + JxlDecoderDestroy(dec); + } + } +} + +TEST(DecodeTest, BufferSizeTest) { + size_t xsize = 33; + size_t ysize = 77; + size_t bits_per_sample = 8; + size_t orientation = 1; + size_t alpha_bits = 8; + bool have_container = false; + bool xyb_encoded = false; + + std::vector header = + GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits, + xyb_encoded, have_container, /*metadata_default=*/false, + /*insert_extra_box=*/false, {}); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO)); + const uint8_t* next_in = header.data(); + size_t avail_in = header.size(); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + EXPECT_EQ(JXL_DEC_BASIC_INFO, status); + + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + + JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + size_t image_out_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &image_out_size)); + EXPECT_EQ(xsize * ysize * 4, image_out_size); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, BasicInfoSizeHintTest) { + // Test on a file where the size hint is too small initially due to inserting + // a box before the codestream (something that is normally not recommended) + size_t xsize = 50; + size_t ysize = 50; + size_t bits_per_sample = 16; + size_t orientation = 1; + size_t alpha_bits = 0; + bool xyb_encoded = false; + std::vector data = GetTestHeader( + xsize, ysize, bits_per_sample, orientation, alpha_bits, xyb_encoded, + /*have_container=*/true, /*metadata_default=*/false, + /*insert_extra_box=*/true, {}); + + JxlDecoderStatus status; + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO)); + + size_t hint0 = JxlDecoderSizeHintBasicInfo(dec); + // Test that the test works as intended: we construct a file on purpose to + // be larger than the first hint by having that extra box. + EXPECT_LT(hint0, data.size()); + const uint8_t* next_in = data.data(); + // Do as if we have only as many bytes as indicated by the hint available + size_t avail_in = std::min(hint0, data.size()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + status = JxlDecoderProcessInput(dec); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, status); + // Basic info cannot be available yet due to the extra inserted box. + EXPECT_EQ(false, !JxlDecoderGetBasicInfo(dec, nullptr)); + + size_t num_read = avail_in - JxlDecoderReleaseInput(dec); + EXPECT_LT(num_read, data.size()); + + size_t hint1 = JxlDecoderSizeHintBasicInfo(dec); + // The hint must be larger than the previous hint (taking already processed + // bytes into account, the hint is a hint for the next avail_in) since the + // decoder now knows there is a box in between. + EXPECT_GT(hint1 + num_read, hint0); + avail_in = std::min(hint1, data.size() - num_read); + next_in += num_read; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + status = JxlDecoderProcessInput(dec); + EXPECT_EQ(JXL_DEC_BASIC_INFO, status); + JxlBasicInfo info; + // We should have the basic info now, since we only added one box in-between, + // and the decoder should have known its size, its implementation can return + // a correct hint. + EXPECT_EQ(true, !JxlDecoderGetBasicInfo(dec, &info)); + + // Also test if the basic info is correct. + EXPECT_EQ(1, info.have_container); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + EXPECT_EQ(orientation, info.orientation); + EXPECT_EQ(bits_per_sample, info.bits_per_sample); + + JxlDecoderDestroy(dec); +} + +std::vector GetIccTestHeader(const jxl::PaddedBytes& icc_profile, + bool xyb_encoded) { + size_t xsize = 50; + size_t ysize = 50; + size_t bits_per_sample = 16; + size_t orientation = 1; + size_t alpha_bits = 0; + return GetTestHeader(xsize, ysize, bits_per_sample, orientation, alpha_bits, + xyb_encoded, + /*have_container=*/false, /*metadata_default=*/false, + /*insert_extra_box=*/false, icc_profile); +} + +// Tests the case where pixels and metadata ICC profile are the same +TEST(DecodeTest, IccProfileTestOriginal) { + jxl::PaddedBytes icc_profile = GetIccTestProfile(); + bool xyb_encoded = false; + std::vector data = GetIccTestHeader(icc_profile, xyb_encoded); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size())); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + + // Expect the opposite of xyb_encoded for uses_original_profile + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(JXL_TRUE, info.uses_original_profile); + + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + + // the encoded color profile expected to be not available, since the image + // has an ICC profile instead + EXPECT_EQ(JXL_DEC_ERROR, + JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr)); + + size_t dec_profile_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + &dec_profile_size)); + + // Check that can get return status with NULL size + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + nullptr)); + + // The profiles must be equal. This requires they have equal size, and if + // they do, we can get the profile and compare the contents. + EXPECT_EQ(icc_profile.size(), dec_profile_size); + if (icc_profile.size() == dec_profile_size) { + jxl::PaddedBytes icc_profile2(icc_profile.size()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + icc_profile2.data(), icc_profile2.size())); + EXPECT_EQ(icc_profile, icc_profile2); + } + + // the data is not xyb_encoded, so same result expected for the pixel data + // color profile + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, nullptr)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA, + &dec_profile_size)); + EXPECT_EQ(icc_profile.size(), dec_profile_size); + + JxlDecoderDestroy(dec); +} + +// Tests the case where pixels and metadata ICC profile are different +TEST(DecodeTest, IccProfileTestXybEncoded) { + jxl::PaddedBytes icc_profile = GetIccTestProfile(); + bool xyb_encoded = true; + std::vector data = GetIccTestHeader(icc_profile, xyb_encoded); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + + // Expect the opposite of xyb_encoded for uses_original_profile + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(JXL_FALSE, info.uses_original_profile); + + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + + // the encoded color profile expected to be not available, since the image + // has an ICC profile instead + EXPECT_EQ(JXL_DEC_ERROR, + JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, nullptr)); + + // Check that can get return status with NULL size + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + nullptr)); + + size_t dec_profile_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + &dec_profile_size)); + + // The profiles must be equal. This requires they have equal size, and if + // they do, we can get the profile and compare the contents. + EXPECT_EQ(icc_profile.size(), dec_profile_size); + if (icc_profile.size() == dec_profile_size) { + jxl::PaddedBytes icc_profile2(icc_profile.size()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + icc_profile2.data(), icc_profile2.size())); + EXPECT_EQ(icc_profile, icc_profile2); + } + + // Data is xyb_encoded, so the data profile is a different profile, encoded + // as structured profile. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, nullptr)); + JxlColorEncoding pixel_encoding; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding)); + EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries); + // The API returns LINEAR by default when the colorspace cannot be represented + // by enum values. + EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function); + + // Test the same but with integer format. + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding)); + EXPECT_EQ(JXL_PRIMARIES_SRGB, pixel_encoding.primaries); + EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function); + + // Test after setting the preferred color profile to non-linear sRGB: + // for XYB images with ICC profile, this setting is expected to take effect. + jxl::ColorEncoding temp_jxl_srgb = jxl::ColorEncoding::SRGB(false); + JxlColorEncoding pixel_encoding_srgb; + ConvertInternalToExternalColorEncoding(temp_jxl_srgb, &pixel_encoding_srgb); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_srgb)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding)); + EXPECT_EQ(JXL_TRANSFER_FUNCTION_SRGB, pixel_encoding.transfer_function); + + // The decoder can also output this as a generated ICC profile anyway, and + // we're certain that it will differ from the above defined profile since + // the sRGB data should not have swapped R/G/B primaries. + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA, + &dec_profile_size)); + // We don't need to dictate exactly what size the generated ICC profile + // must be (since there are many ways to represent the same color space), + // but it should not be zero. + EXPECT_NE(0u, dec_profile_size); + jxl::PaddedBytes icc_profile2(dec_profile_size); + if (0 != dec_profile_size) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, + icc_profile2.data(), icc_profile2.size())); + // expected not equal + EXPECT_NE(icc_profile, icc_profile2); + } + + // Test setting another different preferred profile, to verify that the + // returned JXL_COLOR_PROFILE_TARGET_DATA ICC profile is correctly + // updated. + + jxl::ColorEncoding temp_jxl_linear = jxl::ColorEncoding::LinearSRGB(false); + JxlColorEncoding pixel_encoding_linear; + ConvertInternalToExternalColorEncoding(temp_jxl_linear, + &pixel_encoding_linear); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreferredColorProfile(dec, &pixel_encoding_linear)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsEncodedProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, &pixel_encoding)); + EXPECT_EQ(JXL_TRANSFER_FUNCTION_LINEAR, pixel_encoding.transfer_function); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA, + &dec_profile_size)); + EXPECT_NE(0u, dec_profile_size); + jxl::PaddedBytes icc_profile3(dec_profile_size); + if (0 != dec_profile_size) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, + icc_profile3.data(), icc_profile3.size())); + // expected not equal to the previously set preferred profile. + EXPECT_NE(icc_profile2, icc_profile3); + } + + JxlDecoderDestroy(dec); +} + +// Test decoding ICC from partial files byte for byte. +// This test must pass also if JXL_CRASH_ON_ERROR is enabled, that is, the +// decoding of the ANS histogram and stream of the encoded ICC profile must also +// handle the case of not enough input bytes with StatusCode::kNotEnoughBytes +// rather than fatal error status codes. +TEST(DecodeTest, ICCPartialTest) { + jxl::PaddedBytes icc_profile = GetIccTestProfile(); + std::vector data = GetIccTestHeader(icc_profile, false); + + const uint8_t* next_in = data.data(); + size_t avail_in = 0; + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING)); + + bool seen_basic_info = false; + bool seen_color_encoding = false; + size_t total_size = 0; + + for (;;) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, avail_in); + next_in += avail_in - remaining; + avail_in = remaining; + if (status == JXL_DEC_NEED_MORE_INPUT) { + if (total_size >= data.size()) { + // End of partial codestream with codestrema headers and ICC profile + // reached, it should not require more input since full image is not + // requested + FAIL(); + break; + } + size_t increment = 1; + if (total_size + increment > data.size()) { + increment = data.size() - total_size; + } + total_size += increment; + avail_in += increment; + } else if (status == JXL_DEC_BASIC_INFO) { + EXPECT_FALSE(seen_basic_info); + seen_basic_info = true; + } else if (status == JXL_DEC_COLOR_ENCODING) { + EXPECT_TRUE(seen_basic_info); + EXPECT_FALSE(seen_color_encoding); + seen_color_encoding = true; + + // Sanity check that the ICC profile was decoded correctly + size_t dec_profile_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize( + dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, &dec_profile_size)); + EXPECT_EQ(icc_profile.size(), dec_profile_size); + + } else if (status == JXL_DEC_SUCCESS) { + EXPECT_TRUE(seen_color_encoding); + break; + } else { + // We do not expect any other events or errors + FAIL(); + break; + } + } + + EXPECT_TRUE(seen_basic_info); + EXPECT_TRUE(seen_color_encoding); + + JxlDecoderDestroy(dec); +} + +struct PixelTestConfig { + // Input image definition. + bool grayscale; + bool include_alpha; + size_t xsize; + size_t ysize; + jxl::PreviewMode preview_mode; + bool add_intrinsic_size; + // Output format. + JxlEndianness endianness; + JxlDataType data_type; + uint32_t output_channels; + // Container options. + CodeStreamBoxFormat add_container; + // Decoding mode. + bool use_callback; + bool set_buffer_early; + bool use_resizable_runner; + // Exif orientation, 1-8 + JxlOrientation orientation; + bool keep_orientation; + size_t upsampling; +}; + +class DecodeTestParam : public ::testing::TestWithParam {}; + +TEST_P(DecodeTestParam, PixelTest) { + PixelTestConfig config = GetParam(); + JxlDecoder* dec = JxlDecoderCreate(NULL); + + if (config.keep_orientation) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetKeepOrientation(dec, JXL_TRUE)); + } + + size_t num_pixels = config.xsize * config.ysize; + uint32_t orig_channels = + (config.grayscale ? 1 : 3) + (config.include_alpha ? 1 : 0); + std::vector pixels = + jxl::test::GetSomeTestImage(config.xsize, config.ysize, orig_channels, 0); + JxlPixelFormat format_orig = {orig_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, + 0}; + jxl::TestCodestreamParams params; + // Lossless to verify pixels exactly after roundtrip. + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + params.cparams.resampling = config.upsampling; + params.cparams.ec_resampling = config.upsampling; + params.box_format = config.add_container; + params.orientation = config.orientation; + params.preview_mode = config.preview_mode; + params.add_intrinsic_size = config.add_intrinsic_size; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), config.xsize, + config.ysize, orig_channels, params); + + JxlPixelFormat format = {config.output_channels, config.data_type, + config.endianness, 0}; + + bool swap_xy = !config.keep_orientation && (config.orientation > 4); + size_t xsize = swap_xy ? config.ysize : config.xsize; + size_t ysize = swap_xy ? config.xsize : config.ysize; + + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, config.use_callback, config.set_buffer_early, + config.use_resizable_runner, /*require_boxes=*/false, + /*expect_success=*/true); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * config.output_channels * + jxl::test::GetDataBits(config.data_type) / jxl::kBitsPerByte, + pixels2.size()); + + // If an orientation transformation is expected, to compare the pixels, also + // apply this transformation to the original pixels. ConvertToExternal is + // used to achieve this, with a temporary conversion to CodecInOut and back. + if (config.orientation > 1 && !config.keep_orientation) { + jxl::Span bytes(pixels.data(), pixels.size()); + jxl::ColorEncoding color_encoding = + jxl::ColorEncoding::SRGB(config.grayscale); + + jxl::CodecInOut io; + if (config.include_alpha) io.metadata.m.SetAlphaBits(16); + io.metadata.m.color_encoding = color_encoding; + io.SetSize(config.xsize, config.ysize); + + EXPECT_TRUE(ConvertFromExternal(bytes, config.xsize, config.ysize, + color_encoding, 16, format_orig, nullptr, + &io.Main())); + + for (size_t i = 0; i < pixels.size(); i++) pixels[i] = 0; + EXPECT_TRUE(ConvertToExternal( + io.Main(), 16, + /*float_out=*/false, orig_channels, JXL_BIG_ENDIAN, + xsize * 2 * orig_channels, nullptr, pixels.data(), pixels.size(), + /*out_callback=*/{}, + static_cast(config.orientation))); + } + if (config.upsampling == 1) { + EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format)); + } else { + // resampling is of course not lossless, so as a rough check: + // count pixels that are more than off-by-25 in the 8-bit value of one of + // the channels + EXPECT_LE( + jxl::test::ComparePixels( + pixels.data(), pixels2.data(), xsize, ysize, format_orig, format, + 50.0 * (config.data_type == JXL_TYPE_UINT8 ? 1.0 : 256.0)), + 300u); + } + + JxlDecoderDestroy(dec); +} + +std::vector GeneratePixelTests() { + std::vector all_tests; + struct ChannelInfo { + bool grayscale; + bool include_alpha; + size_t output_channels; + }; + ChannelInfo ch_info[] = { + {false, true, 4}, // RGBA -> RGBA + {true, false, 1}, // G -> G + {true, true, 1}, // GA -> G + {true, true, 2}, // GA -> GA + {false, false, 3}, // RGB -> RGB + {false, true, 3}, // RGBA -> RGB + {false, false, 4}, // RGB -> RGBA + }; + + struct OutputFormat { + JxlEndianness endianness; + JxlDataType data_type; + }; + OutputFormat out_formats[] = { + {JXL_NATIVE_ENDIAN, JXL_TYPE_UINT8}, + {JXL_LITTLE_ENDIAN, JXL_TYPE_UINT16}, + {JXL_BIG_ENDIAN, JXL_TYPE_UINT16}, + {JXL_NATIVE_ENDIAN, JXL_TYPE_FLOAT16}, + {JXL_LITTLE_ENDIAN, JXL_TYPE_FLOAT}, + {JXL_BIG_ENDIAN, JXL_TYPE_FLOAT}, + }; + + auto make_test = [&](ChannelInfo ch, size_t xsize, size_t ysize, + jxl::PreviewMode preview_mode, bool intrinsic_size, + CodeStreamBoxFormat box, JxlOrientation orientation, + bool keep_orientation, OutputFormat format, + bool use_callback, bool set_buffer_early, + bool resizable_runner, size_t upsampling) { + PixelTestConfig c; + c.grayscale = ch.grayscale; + c.include_alpha = ch.include_alpha; + c.preview_mode = preview_mode; + c.add_intrinsic_size = intrinsic_size; + c.xsize = xsize; + c.ysize = ysize; + c.add_container = (CodeStreamBoxFormat)box; + c.output_channels = ch.output_channels; + c.data_type = format.data_type; + c.endianness = format.endianness; + c.use_callback = use_callback; + c.set_buffer_early = set_buffer_early; + c.use_resizable_runner = resizable_runner; + c.orientation = orientation; + c.keep_orientation = keep_orientation; + c.upsampling = upsampling; + all_tests.push_back(c); + }; + + // Test output formats and methods. + for (ChannelInfo ch : ch_info) { + for (int use_callback = 0; use_callback <= 1; use_callback++) { + for (size_t upsampling : {1, 2, 4, 8}) { + for (OutputFormat fmt : out_formats) { + make_test(ch, 301, 33, jxl::kNoPreview, + /*add_intrinsic_size=*/false, + CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY, + /*keep_orientation=*/false, fmt, use_callback, + /*set_buffer_early=*/false, /*resizable_runner=*/false, + upsampling); + } + } + } + } + // Test codestream formats. + for (size_t box = 1; box < kCSBF_NUM_ENTRIES; ++box) { + make_test(ch_info[0], 77, 33, jxl::kNoPreview, + /*add_intrinsic_size=*/false, (CodeStreamBoxFormat)box, + JXL_ORIENT_IDENTITY, + /*keep_orientation=*/false, out_formats[0], + /*use_callback=*/false, + /*set_buffer_early=*/false, /*resizable_runner=*/false, 1); + } + // Test previews. + for (int preview_mode = 0; preview_mode < jxl::kNumPreviewModes; + preview_mode++) { + make_test(ch_info[0], 77, 33, (jxl::PreviewMode)preview_mode, + /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None, + JXL_ORIENT_IDENTITY, + /*keep_orientation=*/false, out_formats[0], + /*use_callback=*/false, /*set_buffer_early=*/false, + /*resizable_runner=*/false, 1); + } + // Test intrinsic sizes. + for (int add_intrinsic_size = 0; add_intrinsic_size <= 1; + add_intrinsic_size++) { + make_test(ch_info[0], 55, 34, jxl::kNoPreview, add_intrinsic_size, + CodeStreamBoxFormat::kCSBF_None, JXL_ORIENT_IDENTITY, + /*keep_orientation=*/false, out_formats[0], + /*use_callback=*/false, /*set_buffer_early=*/false, + /*resizable_runner=*/false, 1); + } + // Test setting buffers early. + make_test(ch_info[0], 300, 33, jxl::kNoPreview, + /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None, + JXL_ORIENT_IDENTITY, + /*keep_orientation=*/false, out_formats[0], + /*use_callback=*/false, /*set_buffer_early=*/true, + /*resizable_runner=*/false, 1); + + // Test using the resizable runner + for (size_t i = 0; i < 4; i++) { + make_test(ch_info[0], 300 << i, 33 << i, jxl::kNoPreview, + /*add_intrinsic_size=*/false, CodeStreamBoxFormat::kCSBF_None, + JXL_ORIENT_IDENTITY, + /*keep_orientation=*/false, out_formats[0], + /*use_callback=*/false, /*set_buffer_early=*/false, + /*resizable_runner=*/true, 1); + } + + // Test orientations. + for (int orientation = 2; orientation <= 8; ++orientation) { + for (int keep_orientation = 0; keep_orientation <= 1; keep_orientation++) { + for (int use_callback = 0; use_callback <= 1; use_callback++) { + for (ChannelInfo ch : ch_info) { + for (OutputFormat fmt : out_formats) { + make_test(ch, 280, 12, jxl::kNoPreview, + /*add_intrinsic_size=*/false, + CodeStreamBoxFormat::kCSBF_None, + static_cast(orientation), + /*keep_orientation=*/keep_orientation, fmt, + /*use_callback=*/use_callback, /*set_buffer_early=*/true, + /*resizable_runner=*/false, 1); + } + } + } + } + } + + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, const PixelTestConfig& c) { + os << c.xsize << "x" << c.ysize; + const char* colors[] = {"", "G", "GA", "RGB", "RGBA"}; + os << colors[(c.grayscale ? 1 : 3) + (c.include_alpha ? 1 : 0)]; + os << "to"; + os << colors[c.output_channels]; + switch (c.data_type) { + case JXL_TYPE_UINT8: + os << "u8"; + break; + case JXL_TYPE_UINT16: + os << "u16"; + break; + case JXL_TYPE_FLOAT: + os << "f32"; + break; + case JXL_TYPE_FLOAT16: + os << "f16"; + break; + default: + JXL_ASSERT(false); + }; + if (jxl::test::GetDataBits(c.data_type) > jxl::kBitsPerByte) { + if (c.endianness == JXL_NATIVE_ENDIAN) { + // add nothing + } else if (c.endianness == JXL_BIG_ENDIAN) { + os << "BE"; + } else if (c.endianness == JXL_LITTLE_ENDIAN) { + os << "LE"; + } + } + if (c.add_container != CodeStreamBoxFormat::kCSBF_None) { + os << "Box"; + os << (size_t)c.add_container; + } + if (c.preview_mode == jxl::kSmallPreview) os << "Preview"; + if (c.preview_mode == jxl::kBigPreview) os << "BigPreview"; + if (c.add_intrinsic_size) os << "IntrinicSize"; + if (c.use_callback) os << "Callback"; + if (c.set_buffer_early) os << "EarlyBuffer"; + if (c.use_resizable_runner) os << "ResizableRunner"; + if (c.orientation != 1) os << "O" << c.orientation; + if (c.keep_orientation) os << "Keep"; + if (c.upsampling > 1) os << "x" << c.upsampling; + return os; +} + +std::string PixelTestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeTest, DecodeTestParam, + testing::ValuesIn(GeneratePixelTests()), + PixelTestDescription); + +TEST(DecodeTest, PixelTestWithICCProfileLossless) { + JxlDecoder* dec = JxlDecoderCreate(NULL); + + size_t xsize = 123, ysize = 77; + size_t num_pixels = xsize * ysize; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + jxl::TestCodestreamParams params; + // Lossless to verify pixels exactly after roundtrip. + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + params.add_icc_profile = true; + // For variation: some have container and no preview, others have preview + // and no container. + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + + for (uint32_t channels = 3; channels <= 4; ++channels) { + { + JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/false, /*set_buffer_early=*/false, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * channels, pixels2.size()); + EXPECT_EQ(0u, + jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format)); + } + { + JxlPixelFormat format = {channels, JXL_TYPE_UINT16, JXL_LITTLE_ENDIAN, 0}; + + // Test with the container for one of the pixel formats. + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/true, /*set_buffer_early=*/true, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * channels * 2, pixels2.size()); + EXPECT_EQ(0u, + jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format)); + } + + { + JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0}; + + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/false, /*set_buffer_early=*/false, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * channels * 4, pixels2.size()); + EXPECT_EQ(0u, + jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format)); + } + } + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, PixelTestWithICCProfileLossy) { + JxlDecoder* dec = JxlDecoderCreate(NULL); + + size_t xsize = 123, ysize = 77; + size_t num_pixels = xsize * ysize; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + jxl::TestCodestreamParams params; + params.add_icc_profile = true; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + uint32_t channels = 3; + + JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0}; + + jxl::PaddedBytes icc; + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/false, /*set_buffer_early=*/true, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true, /*icc=*/&icc); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * channels * 4, pixels2.size()); + + // The input pixels use the profile matching GetIccTestProfile, since we set + // add_icc_profile for CreateTestJXLCodestream to true. + jxl::ColorEncoding color_encoding0; + EXPECT_TRUE(color_encoding0.SetICC(GetIccTestProfile(), &jxl::GetJxlCms())); + jxl::Span span0(pixels.data(), pixels.size()); + jxl::CodecInOut io0; + io0.SetSize(xsize, ysize); + EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0, + /*bits_per_sample=*/16, format_orig, + /*pool=*/nullptr, &io0.Main())); + + jxl::ColorEncoding color_encoding1; + EXPECT_TRUE(color_encoding1.SetICC(std::move(icc), &jxl::GetJxlCms())); + jxl::Span span1(pixels2.data(), pixels2.size()); + jxl::CodecInOut io1; + io1.SetSize(xsize, ysize); + EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1, + /*bits_per_sample=*/32, format, + /*pool=*/nullptr, &io1.Main())); + + jxl::ButteraugliParams ba; + EXPECT_THAT(ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(), + /*distmap=*/nullptr, nullptr), +#if JXL_HIGH_PRECISION + IsSlightlyBelow(0.9f)); +#else + IsSlightlyBelow(0.98f)); +#endif + + JxlDecoderDestroy(dec); +} + +std::string ColorDescription(JxlColorEncoding c) { + jxl::ColorEncoding color_encoding; + EXPECT_TRUE(ConvertExternalToInternalColorEncoding(c, &color_encoding)); + return Description(color_encoding); +} + +std::string GetOrigProfile(JxlDecoder* dec) { + JxlColorEncoding c; + JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_ORIGINAL; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsEncodedProfile(dec, target, &c)); + return ColorDescription(c); +} + +std::string GetDataProfile(JxlDecoder* dec) { + JxlColorEncoding c; + JxlColorProfileTarget target = JXL_COLOR_PROFILE_TARGET_DATA; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetColorAsEncodedProfile(dec, target, &c)); + return ColorDescription(c); +} + +double ButteraugliDistance(size_t xsize, size_t ysize, + const std::vector& pixels_in, + const jxl::ColorEncoding& color_in, + float intensity_in, + const std::vector& pixels_out, + const jxl::ColorEncoding& color_out, + float intensity_out) { + jxl::CodecInOut in; + in.metadata.m.color_encoding = color_in; + in.metadata.m.SetIntensityTarget(intensity_in); + JxlPixelFormat format_in = {static_cast(color_in.Channels()), + JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(pixels_in.data(), pixels_in.size()), xsize, + ysize, color_in, + /*bits_per_sample=*/16, format_in, + /*pool=*/nullptr, &in.Main())); + jxl::CodecInOut out; + out.metadata.m.color_encoding = color_out; + out.metadata.m.SetIntensityTarget(intensity_out); + JxlPixelFormat format_out = {static_cast(color_out.Channels()), + JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(pixels_out.data(), pixels_out.size()), xsize, + ysize, color_out, + /*bits_per_sample=*/16, format_out, + /*pool=*/nullptr, &out.Main())); + return ButteraugliDistance(in.frames, out.frames, jxl::ButteraugliParams(), + jxl::GetJxlCms(), nullptr, nullptr); +} + +class DecodeAllEncodingsTest + : public ::testing::TestWithParam {}; +JXL_GTEST_INSTANTIATE_TEST_SUITE_P( + DecodeAllEncodingsTestInstantiation, DecodeAllEncodingsTest, + ::testing::ValuesIn(jxl::test::AllEncodings())); +TEST_P(DecodeAllEncodingsTest, PreserveOriginalProfileTest) { + size_t xsize = 123, ysize = 77; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + int events = JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE; + const auto& cdesc = GetParam(); + jxl::ColorEncoding c_in = jxl::test::ColorEncodingFromDescriptor(cdesc); + if (c_in.rendering_intent != jxl::RenderingIntent::kRelative) return; + std::string color_space_in = Description(c_in); + float intensity_in = c_in.tf.IsPQ() ? 10000 : 255; + printf("Testing input color space %s\n", color_space_in.c_str()); + jxl::TestCodestreamParams params; + params.color_space = color_space_in; + params.intensity_target = intensity_in; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), data.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + EXPECT_FALSE(info.uses_original_profile); + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + EXPECT_EQ(GetOrigProfile(dec), color_space_in); + EXPECT_EQ(GetDataProfile(dec), color_space_in); + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + std::vector out(pixels.size()); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, out.data(), out.size())); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + double dist = ButteraugliDistance(xsize, ysize, pixels, c_in, intensity_in, + out, c_in, intensity_in); + EXPECT_LT(dist, 1.29); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlDecoderDestroy(dec); +} + +namespace { +void SetPreferredColorProfileTest( + const jxl::test::ColorEncodingDescriptor& from) { + size_t xsize = 123, ysize = 77; + int events = JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_FULL_IMAGE; + jxl::ColorEncoding c_in = jxl::test::ColorEncodingFromDescriptor(from); + if (c_in.rendering_intent != jxl::RenderingIntent::kRelative) return; + if (c_in.white_point != jxl::WhitePoint::kD65) return; + uint32_t num_channels = c_in.Channels(); + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::string color_space_in = Description(c_in); + float intensity_in = c_in.tf.IsPQ() ? 10000 : 255; + jxl::TestCodestreamParams params; + params.color_space = color_space_in; + params.intensity_target = intensity_in; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + auto all_encodings = jxl::test::AllEncodings(); + all_encodings.push_back( + {jxl::ColorSpace::kXYB, jxl::WhitePoint::kD65, jxl::Primaries::kCustom, + jxl::TransferFunction::kUnknown, jxl::RenderingIntent::kPerceptual}); + for (const auto& c1 : all_encodings) { + jxl::ColorEncoding c_out = jxl::test::ColorEncodingFromDescriptor(c1); + float intensity_out = intensity_in; + if (c_out.GetColorSpace() != jxl::ColorSpace::kXYB) { + if (c_out.rendering_intent != jxl::RenderingIntent::kRelative) { + continue; + } + if ((c_in.primaries == jxl::Primaries::k2100 && + c_out.primaries != jxl::Primaries::k2100) || + (c_in.primaries == jxl::Primaries::kP3 && + c_out.primaries == jxl::Primaries::kSRGB)) { + // Converting to a narrower gamut does not work without gammut mapping. + continue; + } + } + if (c_out.tf.IsHLG() && intensity_out > 300) { + // The Linear->HLG OOTF function at this intensity level can push + // saturated colors out of gamut, so we would need gamut mapping in + // this case too. + continue; + } + std::string color_space_out = Description(c_out); + if (color_space_in == color_space_out) continue; + printf("Testing input color space %s with output color space %s\n", + color_space_in.c_str(), color_space_out.c_str()); + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, data.data(), data.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + EXPECT_FALSE(info.uses_original_profile); + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + EXPECT_EQ(GetOrigProfile(dec), color_space_in); + EXPECT_EQ(GetDataProfile(dec), color_space_in); + JxlColorEncoding encoding_out; + EXPECT_TRUE(jxl::ParseDescription(color_space_out, &encoding_out)); + if (c_out.GetColorSpace() == jxl::ColorSpace::kXYB && + (c_in.primaries != jxl::Primaries::kSRGB || c_in.tf.IsPQ())) { + EXPECT_EQ(JXL_DEC_ERROR, + JxlDecoderSetPreferredColorProfile(dec, &encoding_out)); + JxlDecoderDestroy(dec); + continue; + } + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreferredColorProfile(dec, &encoding_out)); + EXPECT_EQ(GetOrigProfile(dec), color_space_in); + EXPECT_EQ(GetDataProfile(dec), color_space_out); + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + size_t buffer_size; + JxlPixelFormat out_format = format; + out_format.num_channels = c_out.Channels(); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &out_format, &buffer_size)); + std::vector out(buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &out_format, out.data(), out.size())); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + double dist = ButteraugliDistance(xsize, ysize, pixels, c_in, intensity_in, + out, c_out, intensity_out); + if (c_in.white_point == c_out.white_point) { + EXPECT_LT(dist, 1.29); + } else { + EXPECT_LT(dist, 4.0); + } + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlDecoderDestroy(dec); + } +} +} // namespace + +TEST(DecodeTest, SetPreferredColorProfileTestFromGray) { + jxl::test::ColorEncodingDescriptor gray = { + jxl::ColorSpace::kGray, jxl::WhitePoint::kD65, jxl::Primaries::kSRGB, + jxl::TransferFunction::kSRGB, jxl::RenderingIntent::kRelative}; + SetPreferredColorProfileTest(gray); +} + +TEST_P(DecodeAllEncodingsTest, SetPreferredColorProfileTest) { + const auto& from = GetParam(); + SetPreferredColorProfileTest(from); +} + +// Tests the case of lossy sRGB image without alpha channel, decoded to RGB8 +// and to RGBA8 +TEST(DecodeTest, PixelTestOpaqueSrgbLossy) { + for (unsigned channels = 3; channels <= 4; channels++) { + JxlDecoder* dec = JxlDecoderCreate(NULL); + + size_t xsize = 123, ysize = 77; + size_t num_pixels = xsize * ysize; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + jxl::TestCodestreamParams()); + + JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/true, /*set_buffer_early=*/false, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success*/ true); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * channels, pixels2.size()); + + jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false); + jxl::Span span0(pixels.data(), pixels.size()); + jxl::CodecInOut io0; + io0.SetSize(xsize, ysize); + EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0, + /*bits_per_sample=*/16, format_orig, + /*pool=*/nullptr, &io0.Main())); + + jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false); + jxl::Span span1(pixels2.data(), pixels2.size()); + jxl::CodecInOut io1; + EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1, + /*bits_per_sample=*/8, format, + /*pool=*/nullptr, &io1.Main())); + + jxl::ButteraugliParams ba; + EXPECT_THAT( + ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(), + /*distmap=*/nullptr, nullptr), +#if JXL_HIGH_PRECISION + IsSlightlyBelow(0.93f)); +#else + IsSlightlyBelow(0.94f)); +#endif + + JxlDecoderDestroy(dec); + } +} + +// Opaque image with noise enabled, decoded to RGB8 and RGBA8. +TEST(DecodeTest, PixelTestOpaqueSrgbLossyNoise) { + for (unsigned channels = 3; channels <= 4; channels++) { + JxlDecoder* dec = JxlDecoderCreate(NULL); + + size_t xsize = 512, ysize = 300; + size_t num_pixels = xsize * ysize; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + jxl::TestCodestreamParams params; + params.cparams.noise = jxl::Override::kOn; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + + JxlPixelFormat format = {channels, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/false, /*set_buffer_early=*/true, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true); + JxlDecoderReset(dec); + EXPECT_EQ(num_pixels * channels, pixels2.size()); + + jxl::ColorEncoding color_encoding0 = jxl::ColorEncoding::SRGB(false); + jxl::Span span0(pixels.data(), pixels.size()); + jxl::CodecInOut io0; + io0.SetSize(xsize, ysize); + EXPECT_TRUE(ConvertFromExternal(span0, xsize, ysize, color_encoding0, + /*bits_per_sample=*/16, format_orig, + /*pool=*/nullptr, &io0.Main())); + + jxl::ColorEncoding color_encoding1 = jxl::ColorEncoding::SRGB(false); + jxl::Span span1(pixels2.data(), pixels2.size()); + jxl::CodecInOut io1; + EXPECT_TRUE(ConvertFromExternal(span1, xsize, ysize, color_encoding1, + /*bits_per_sample=*/8, format, + /*pool=*/nullptr, &io1.Main())); + + jxl::ButteraugliParams ba; + EXPECT_THAT( + ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(), + /*distmap=*/nullptr, nullptr), + IsSlightlyBelow(2.04444f)); + + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, ProcessEmptyInputWithBoxes) { + size_t xsize = 123, ysize = 77; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + jxl::CompressParams cparams; + uint32_t channels = 3; + JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0}; + for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + JxlDecoder* dec = JxlDecoderCreate(NULL); + jxl::TestCodestreamParams params; + params.box_format = (CodeStreamBoxFormat)i; + printf("Testing empty input with box format %d\n", (int)params.box_format); + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + const int events = + JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_COLOR_ENCODING; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events)); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + const size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, compressed.size()); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, ExtraBytesAfterCompressedStream) { + size_t xsize = 123, ysize = 77; + size_t num_pixels = xsize * ysize; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + jxl::CompressParams cparams; + for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + CodeStreamBoxFormat box_format = (CodeStreamBoxFormat)i; + if (box_format == kCSBF_Multi_Other_Zero_Terminated) continue; + printf("Testing with box format %d\n", (int)box_format); + size_t last_unknown_box_size = 0; + if (box_format == kCSBF_Single_Other) { + last_unknown_box_size = unk1_box_size + 8; + } else if (box_format == kCSBF_Multi_Other_Terminated) { + last_unknown_box_size = unk3_box_size + 8; + } else if (box_format == kCSBF_Multi_Last_Empty_Other) { + // If boxes are not required, the decoder won't consume the last empty + // jxlp box. + last_unknown_box_size = 12 + unk3_box_size + 8; + } + jxl::TestCodestreamParams params; + params.box_format = box_format; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + // Add some more bytes after compressed data. + compressed.push_back(0); + compressed.push_back(1); + compressed.push_back(2); + JxlDecoder* dec = JxlDecoderCreate(NULL); + uint32_t channels = 3; + JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0}; + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/false, /*set_buffer_early=*/true, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true); + size_t unconsumed_bytes = JxlDecoderReleaseInput(dec); + EXPECT_EQ(last_unknown_box_size + 3, unconsumed_bytes); + EXPECT_EQ(num_pixels * channels * 4, pixels2.size()); + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, ExtraBytesAfterCompressedStreamRequireBoxes) { + size_t xsize = 123, ysize = 77; + size_t num_pixels = xsize * ysize; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + jxl::CompressParams cparams; + for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + CodeStreamBoxFormat box_format = (CodeStreamBoxFormat)i; + if (box_format == kCSBF_Multi_Other_Zero_Terminated) continue; + printf("Testing with box format %d\n", (int)box_format); + bool expect_success = (box_format == kCSBF_None || + box_format == kCSBF_Single_Zero_Terminated || + box_format == kCSBF_Multi_Zero_Terminated); + jxl::TestCodestreamParams params; + params.box_format = box_format; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + // Add some more bytes after compressed data. + compressed.push_back(0); + compressed.push_back(1); + compressed.push_back(2); + JxlDecoder* dec = JxlDecoderCreate(NULL); + uint32_t channels = 3; + JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0}; + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(compressed.data(), compressed.size()), + format, /*use_callback=*/false, /*set_buffer_early=*/true, + /*use_resizable_runner=*/false, /*require_boxes=*/true, expect_success); + size_t unconsumed_bytes = JxlDecoderReleaseInput(dec); + EXPECT_EQ(3, unconsumed_bytes); + EXPECT_EQ(num_pixels * channels * 4, pixels2.size()); + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, ConcatenatedCompressedStreams) { + size_t xsize = 123, ysize = 77; + size_t num_pixels = xsize * ysize; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + jxl::CompressParams cparams; + for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + CodeStreamBoxFormat first_box_format = (CodeStreamBoxFormat)i; + if (first_box_format == kCSBF_Multi_Other_Zero_Terminated) continue; + jxl::TestCodestreamParams params1; + params1.box_format = first_box_format; + jxl::PaddedBytes compressed1 = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params1); + for (int j = 0; j < kCSBF_NUM_ENTRIES; ++j) { + CodeStreamBoxFormat second_box_format = (CodeStreamBoxFormat)j; + if (second_box_format == kCSBF_Multi_Other_Zero_Terminated) continue; + printf("Testing with box format pair %d, %d\n", (int)first_box_format, + (int)second_box_format); + jxl::TestCodestreamParams params2; + params2.box_format = second_box_format; + jxl::PaddedBytes compressed2 = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + 3, params2); + jxl::PaddedBytes concat; + concat.append(compressed1); + concat.append(compressed2); + uint32_t channels = 3; + JxlPixelFormat format = {channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, 0}; + size_t remaining = concat.size(); + for (int part = 0; part < 2; ++part) { + printf(" Decoding part %d\n", part + 1); + JxlDecoder* dec = JxlDecoderCreate(NULL); + size_t pos = concat.size() - remaining; + bool expect_success = + (part == 0 || second_box_format == kCSBF_None || + second_box_format == kCSBF_Single_Zero_Terminated || + second_box_format == kCSBF_Multi_Zero_Terminated); + std::vector pixels2 = jxl::DecodeWithAPI( + dec, jxl::Span(concat.data() + pos, remaining), + format, /*use_callback=*/false, /*set_buffer_early=*/true, + /*use_resizable_runner=*/false, /*require_boxes=*/true, + expect_success); + EXPECT_EQ(num_pixels * channels * 4, pixels2.size()); + remaining = JxlDecoderReleaseInput(dec); + JxlDecoderDestroy(dec); + } + EXPECT_EQ(0, remaining); + } + } +} + +void TestPartialStream(bool reconstructible_jpeg) { + size_t xsize = 123, ysize = 77; + uint32_t channels = 4; + if (reconstructible_jpeg) { + channels = 3; + } + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, channels, 0); + JxlPixelFormat format_orig = {channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + jxl::TestCodestreamParams params; + if (reconstructible_jpeg) { + params.cparams.color_transform = jxl::ColorTransform::kNone; + } else { + // Lossless to verify pixels exactly after roundtrip. + params.cparams.SetLossless(); + } + + std::vector pixels2; + pixels2.resize(pixels.size()); + + jxl::PaddedBytes jpeg_output(64); + size_t used_jpeg_output = 0; + + std::vector codestreams(kCSBF_NUM_ENTRIES); + std::vector jpeg_codestreams(kCSBF_NUM_ENTRIES); + for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + params.box_format = (CodeStreamBoxFormat)i; + if (reconstructible_jpeg) { + params.jpeg_codestream = &jpeg_codestreams[i]; + } + codestreams[i] = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + channels, params); + } + + // Test multiple step sizes, to test different combinations of the streaming + // box parsing. + std::vector increments = {1, 3, 17, 23, 120, 700, 1050}; + + for (size_t index = 0; index < increments.size(); index++) { + for (size_t i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + if (reconstructible_jpeg && + (CodeStreamBoxFormat)i == CodeStreamBoxFormat::kCSBF_None) { + continue; + } + const jxl::PaddedBytes& data = codestreams[i]; + const uint8_t* next_in = data.data(); + size_t avail_in = 0; + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | + JXL_DEC_JPEG_RECONSTRUCTION)); + + bool seen_basic_info = false; + bool seen_full_image = false; + bool seen_jpeg_recon = false; + + size_t total_size = 0; + + for (;;) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, avail_in); + next_in += avail_in - remaining; + avail_in = remaining; + if (status == JXL_DEC_NEED_MORE_INPUT) { + if (total_size >= data.size()) { + // End of test data reached, it should have successfully decoded the + // image now. + FAIL(); + break; + } + + size_t increment = increments[index]; + // End of the file reached, should be the final test. + if (total_size + increment > data.size()) { + increment = data.size() - total_size; + } + total_size += increment; + avail_in += increment; + } else if (status == JXL_DEC_BASIC_INFO) { + // This event should happen exactly once + EXPECT_FALSE(seen_basic_info); + if (seen_basic_info) break; + seen_basic_info = true; + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + } else if (status == JXL_DEC_JPEG_RECONSTRUCTION) { + EXPECT_FALSE(seen_basic_info); + EXPECT_FALSE(seen_full_image); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetJPEGBuffer(dec, jpeg_output.data(), + jpeg_output.size())); + seen_jpeg_recon = true; + } else if (status == JXL_DEC_JPEG_NEED_MORE_OUTPUT) { + EXPECT_TRUE(seen_jpeg_recon); + used_jpeg_output = + jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec); + jpeg_output.resize(jpeg_output.size() * 2); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetJPEGBuffer( + dec, jpeg_output.data() + used_jpeg_output, + jpeg_output.size() - used_jpeg_output)); + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer( + dec, &format_orig, pixels2.data(), pixels2.size())); + } else if (status == JXL_DEC_FULL_IMAGE) { + // This event should happen exactly once + EXPECT_FALSE(seen_full_image); + if (seen_full_image) break; + // This event should happen after basic info + EXPECT_TRUE(seen_basic_info); + seen_full_image = true; + if (reconstructible_jpeg) { + used_jpeg_output = + jpeg_output.size() - JxlDecoderReleaseJPEGBuffer(dec); + EXPECT_EQ(used_jpeg_output, jpeg_codestreams[i].size()); + EXPECT_EQ(0, memcmp(jpeg_output.data(), jpeg_codestreams[i].data(), + used_jpeg_output)); + } else { + EXPECT_EQ(pixels, pixels2); + } + } else if (status == JXL_DEC_SUCCESS) { + EXPECT_TRUE(seen_full_image); + break; + } else { + // We do not expect any other events or errors + FAIL(); + break; + } + } + + // Ensure the decoder emitted the basic info and full image events + EXPECT_TRUE(seen_basic_info); + EXPECT_TRUE(seen_full_image); + + JxlDecoderDestroy(dec); + } + } +} + +// Tests the return status when trying to decode pixels on incomplete file: it +// should return JXL_DEC_NEED_MORE_INPUT, not error. +TEST(DecodeTest, PixelPartialTest) { TestPartialStream(false); } + +// Tests the return status when trying to decode JPEG bytes on incomplete file. +TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGPartialTest)) { + TEST_LIBJPEG_SUPPORT(); + TestPartialStream(true); +} + +// The DC event still exists, but is no longer implemented, it is deprecated. +TEST(DecodeTest, DCNotGettableTest) { + // 1x1 pixel JXL image + std::string compressed( + "\377\n\0\20\260\23\0H\200(" + "\0\334\0U\17\0\0\250P\31e\334\340\345\\\317\227\37:," + "\246m\\gh\253m\vK\22E\306\261I\252C&pH\22\353 " + "\363\6\22\bp\0\200\237\34\231W2d\255$\1", + 68); + + JxlDecoder* dec = JxlDecoderCreate(NULL); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput( + dec, reinterpret_cast(compressed.data()), + compressed.size())); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + + // Since the image is only 1x1 pixel, there is only 1 group, the decoder is + // unable to get DC size from this, and will not return the DC at all. Since + // no full image is requested either, it is expected to return success. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, PreviewTest) { + size_t xsize = 77, ysize = 120; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + JxlPixelFormat format_orig = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + for (jxl::PreviewMode mode : {jxl::kSmallPreview, jxl::kBigPreview}) { + jxl::TestCodestreamParams params; + params.preview_mode = mode; + + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 3, + params); + + JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_PREVIEW_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)); + + jxl::ColorEncoding c_srgb = jxl::ColorEncoding::SRGB(false); + jxl::CodecInOut io0; + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + c_srgb, /*bits_per_sample=*/16, format_orig, /*pool=*/nullptr, + &io0.Main())); + GeneratePreview(params.preview_mode, &io0.Main()); + + size_t xsize_preview = io0.Main().xsize(); + size_t ysize_preview = io0.Main().ysize(); + EXPECT_EQ(xsize_preview, info.preview.xsize); + EXPECT_EQ(ysize_preview, info.preview.ysize); + EXPECT_EQ(xsize_preview * ysize_preview * 3, buffer_size); + + EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + std::vector preview(buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreviewOutBuffer(dec, &format, preview.data(), + preview.size())); + + EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec)); + + jxl::CodecInOut io1; + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(preview.data(), preview.size()), xsize_preview, + ysize_preview, c_srgb, + /*bits_per_sample=*/8, format, + /*pool=*/nullptr, &io1.Main())); + + jxl::ButteraugliParams ba; + // TODO(lode): this ButteraugliDistance silently returns 0 (dangerous for + // tests) if xsize or ysize is < 8, no matter how different the images, a + // tiny size that could happen for a preview. ButteraugliDiffmap does + // support smaller than 8x8, but jxl's ButteraugliDistance does not. Perhaps + // move butteraugli's <8x8 handling from ButteraugliDiffmap to + // ButteraugliComparator::Diffmap in butteraugli.cc. + EXPECT_LE(ButteraugliDistance(io0.frames, io1.frames, ba, jxl::GetJxlCms(), + /*distmap=*/nullptr, nullptr), + mode == jxl::kSmallPreview ? 0.7f : 1.2f); + + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, AlignTest) { + size_t xsize = 123, ysize = 77; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::TestCodestreamParams params; + // Lossless to verify pixels exactly after roundtrip. + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + + size_t align = 17; + JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align}; + // On purpose not using jxl::RoundUpTo to test it independently. + size_t expected_line_bytes = (1 * 3 * xsize + align - 1) / align * align; + + for (int use_callback = 0; use_callback <= 1; ++use_callback) { + std::vector pixels2 = jxl::DecodeWithAPI( + jxl::Span(compressed.data(), compressed.size()), format, + use_callback, /*set_buffer_early=*/false, + /*use_resizable_runner=*/false, /*require_boxes=*/false, + /*expect_success=*/true); + EXPECT_EQ(expected_line_bytes * ysize, pixels2.size()); + EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format)); + } +} + +TEST(DecodeTest, AnimationTest) { + size_t xsize = 123, ysize = 77; + static const size_t num_frames = 2; + std::vector frames[2]; + frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1); + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.have_animation = true; + io.frames.clear(); + io.frames.reserve(num_frames); + io.SetSize(xsize, ysize); + + std::vector frame_durations(num_frames); + for (size_t i = 0; i < num_frames; ++i) { + frame_durations[i] = 5 + i; + } + + for (size_t i = 0; i < num_frames; ++i) { + jxl::ImageBundle bundle(&io.metadata.m); + + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frames[i].data(), frames[i].size()), xsize, + ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.duration = frame_durations[i]; + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip. + cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + + // Decode and test the animation frames + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + + for (size_t i = 0; i < num_frames; ++i) { + std::vector pixels(buffer_size); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + EXPECT_EQ(0u, frame_header.name_length); + // For now, test with empty name, there's currently no easy way to encode + // a jxl file with a frame name because ImageBundle doesn't have a + // jxl::FrameHeader to set the name in. We can test the null termination + // character though. + char name; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameName(dec, &name, 1)); + EXPECT_EQ(0, name); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + xsize, ysize, format, format)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, AnimationTestStreaming) { + size_t xsize = 123, ysize = 77; + static const size_t num_frames = 2; + std::vector frames[2]; + frames[0] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 0); + frames[1] = jxl::test::GetSomeTestImage(xsize, ysize, 3, 1); + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.have_animation = true; + io.frames.clear(); + io.frames.reserve(num_frames); + io.SetSize(xsize, ysize); + + std::vector frame_durations(num_frames); + for (size_t i = 0; i < num_frames; ++i) { + frame_durations[i] = 5 + i; + } + + for (size_t i = 0; i < num_frames; ++i) { + jxl::ImageBundle bundle(&io.metadata.m); + + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frames[i].data(), frames[i].size()), xsize, + ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.duration = frame_durations[i]; + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip. + cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + + // Decode and test the animation frames + + const size_t step_size = 16; + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = 0; + size_t frame_headers_seen = 0; + size_t frames_seen = 0; + bool seen_basic_info = false; + + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + + std::vector frames2[2]; + for (size_t i = 0; i < num_frames; ++i) { + frames2[i].resize(frames[i].size()); + } + + size_t total_in = 0; + size_t loop_count = 0; + + for (;;) { + if (loop_count++ > compressed.size()) { + fprintf(stderr, "Too many loops\n"); + FAIL(); + break; + } + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + auto status = JxlDecoderProcessInput(dec); + size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, avail_in); + next_in += avail_in - remaining; + avail_in = remaining; + + if (status == JXL_DEC_SUCCESS) { + break; + } else if (status == JXL_DEC_ERROR) { + FAIL(); + } else if (status == JXL_DEC_NEED_MORE_INPUT) { + if (total_in >= compressed.size()) { + fprintf(stderr, "Already gave all input data\n"); + FAIL(); + break; + } + size_t amount = step_size; + if (total_in + amount > compressed.size()) { + amount = compressed.size() - total_in; + } + avail_in += amount; + total_in += amount; + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, frames2[frames_seen].data(), + frames2[frames_seen].size())); + } else if (status == JXL_DEC_BASIC_INFO) { + EXPECT_EQ(false, seen_basic_info); + seen_basic_info = true; + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + } else if (status == JXL_DEC_FRAME) { + EXPECT_EQ(true, seen_basic_info); + frame_headers_seen++; + } else if (status == JXL_DEC_FULL_IMAGE) { + frames_seen++; + EXPECT_EQ(frame_headers_seen, frames_seen); + } else { + fprintf(stderr, "Unexpected status: %d\n", (int)status); + FAIL(); + } + } + + EXPECT_EQ(true, seen_basic_info); + EXPECT_EQ(num_frames, frames_seen); + EXPECT_EQ(num_frames, frame_headers_seen); + for (size_t i = 0; i < num_frames; ++i) { + EXPECT_EQ(frames[i], frames2[i]); + } + + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, ExtraChannelTest) { + size_t xsize = 55, ysize = 257; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::TestCodestreamParams params; + // Lossless to verify pixels exactly after roundtrip. + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + + size_t align = 17; + JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, align}; + + JxlDecoder* dec = JxlDecoderCreate(NULL); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(1u, info.num_extra_channels); + EXPECT_EQ(JXL_FALSE, info.alpha_premultiplied); + + JxlExtraChannelInfo extra_info; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetExtraChannelInfo(dec, 0, &extra_info)); + EXPECT_EQ(0, extra_info.type); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + size_t extra_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderExtraChannelBufferSize(dec, &format, &extra_size, 0)); + + std::vector image(buffer_size); + std::vector extra(extra_size); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, image.data(), image.size())); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetExtraChannelBuffer( + dec, &format, extra.data(), extra.size(), 0)); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + + // After the full image was output, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlDecoderDestroy(dec); + + EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), image.data(), xsize, + ysize, format_orig, format)); + + // Compare the extracted extra channel with the original alpha channel + + std::vector alpha(pixels.size() / 4); + for (size_t i = 0; i < pixels.size(); i += 8) { + size_t index_alpha = i / 4; + alpha[index_alpha + 0] = pixels[i + 6]; + alpha[index_alpha + 1] = pixels[i + 7]; + } + JxlPixelFormat format_alpha = format; + format_alpha.num_channels = 1; + JxlPixelFormat format_orig_alpha = format_orig; + format_orig_alpha.num_channels = 1; + + EXPECT_EQ(0u, + jxl::test::ComparePixels(alpha.data(), extra.data(), xsize, ysize, + format_orig_alpha, format_alpha)); +} + +TEST(DecodeTest, SkipCurrentFrameTest) { + size_t xsize = 90, ysize = 120; + constexpr size_t num_frames = 7; + std::vector frames[num_frames]; + for (size_t i = 0; i < num_frames; i++) { + frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i); + } + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.have_animation = true; + io.frames.clear(); + io.frames.reserve(num_frames); + io.SetSize(xsize, ysize); + + std::vector frame_durations(num_frames); + for (size_t i = 0; i < num_frames; ++i) { + frame_durations[i] = 5 + i; + } + + for (size_t i = 0; i < num_frames; ++i) { + jxl::ImageBundle bundle(&io.metadata.m); + if (i & 1) { + // Mark some frames as referenceable, others not. + bundle.use_for_next_frame = true; + } + + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frames[i].data(), frames[i].size()), xsize, + ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.duration = frame_durations[i]; + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + jxl::PassDefinition passes[] = {{2, 0, 4}, {4, 0, 4}, {8, 2, 2}, {8, 0, 1}}; + jxl::ProgressiveMode progressive_mode{passes}; + enc_state.progressive_splitter.SetProgressiveMode(progressive_mode); + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | + JXL_DEC_FRAME_PROGRESSION | + JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kLastPasses)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + + for (size_t i = 0; i < num_frames; ++i) { + printf("Decoding frame %d\n", (int)i); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec)); + std::vector pixels(buffer_size); + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec)); + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + if (i == 2) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec)); + continue; + } + EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec)); + EXPECT_EQ(8, JxlDecoderGetIntendedDownsamplingRatio(dec)); + if (i == 3) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec)); + continue; + } + EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec)); + EXPECT_EQ(4, JxlDecoderGetIntendedDownsamplingRatio(dec)); + if (i == 4) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec)); + continue; + } + EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec)); + EXPECT_EQ(2, JxlDecoderGetIntendedDownsamplingRatio(dec)); + if (i == 5) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSkipCurrentFrame(dec)); + continue; + } + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSkipCurrentFrame(dec)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, SkipFrameTest) { + size_t xsize = 90, ysize = 120; + constexpr size_t num_frames = 16; + std::vector frames[num_frames]; + for (size_t i = 0; i < num_frames; i++) { + frames[i] = jxl::test::GetSomeTestImage(xsize, ysize, 3, i); + } + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.have_animation = true; + io.frames.clear(); + io.frames.reserve(num_frames); + io.SetSize(xsize, ysize); + + std::vector frame_durations(num_frames); + for (size_t i = 0; i < num_frames; ++i) { + frame_durations[i] = 5 + i; + } + + for (size_t i = 0; i < num_frames; ++i) { + jxl::ImageBundle bundle(&io.metadata.m); + if (i & 1) { + // Mark some frames as referenceable, others not. + bundle.use_for_next_frame = true; + } + + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frames[i].data(), frames[i].size()), xsize, + ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.duration = frame_durations[i]; + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip. + cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + + // Decode and test the animation frames + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + + for (size_t i = 0; i < num_frames; ++i) { + if (i == 3) { + JxlDecoderSkipFrames(dec, 5); + i += 5; + } + std::vector pixels(buffer_size); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + xsize, ysize, format, format)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + // Test rewinding the decoder and skipping different frames + + JxlDecoderRewind(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + for (size_t i = 0; i < num_frames; ++i) { + int test_skipping = (i == 9) ? 3 : 0; + std::vector pixels(buffer_size); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this + // should only skip the next frame, not the currently processed one. + if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + xsize, ysize, format, format)); + + if (test_skipping) i += test_skipping; + } + + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, SkipFrameWithBlendingTest) { + size_t xsize = 90, ysize = 120; + constexpr size_t num_frames = 16; + std::vector frames[num_frames]; + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.have_animation = true; + io.frames.clear(); + io.frames.reserve(num_frames); + io.SetSize(xsize, ysize); + + std::vector frame_durations(num_frames); + + for (size_t i = 0; i < num_frames; ++i) { + if (i < 5) { + std::vector frame_internal = + jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2 + 1); + // An internal frame with 0 duration, and use_for_next_frame, this is a + // frame that is not rendered and not output by the API, but on which the + // rendered frames depend + jxl::ImageBundle bundle_internal(&io.metadata.m); + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frame_internal.data(), + frame_internal.size()), + xsize, ysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle_internal)); + bundle_internal.duration = 0; + bundle_internal.use_for_next_frame = true; + io.frames.push_back(std::move(bundle_internal)); + } + + std::vector frame = + jxl::test::GetSomeTestImage(xsize, ysize, 3, i * 2); + // Actual rendered frame + frame_durations[i] = 5 + i; + jxl::ImageBundle bundle(&io.metadata.m); + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frame.data(), frame.size()), xsize, ysize, + jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.duration = frame_durations[i]; + // Create some variation in which frames depend on which. + if (i != 3 && i != 9 && i != 10) { + bundle.use_for_next_frame = true; + } + if (i != 12) { + bundle.blend = true; + // Choose a blend mode that depends on the pixels of the saved frame and + // doesn't use alpha + bundle.blendmode = jxl::BlendMode::kMul; + } + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip. + cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + + // Independently decode all frames without any skipping, to create the + // expected blended frames, for the actual tests below to compare with. + { + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner( + dec, JxlThreadParallelRunner, runner)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + for (size_t i = 0; i < num_frames; ++i) { + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + frames[i].resize(xsize * ysize * 6); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(), + frames[i].size())); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); + } + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetParallelRunner(dec, JxlThreadParallelRunner, runner)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + + for (size_t i = 0; i < num_frames; ++i) { + std::vector pixels(buffer_size); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + xsize, ysize, format, format)); + + // Test rewinding mid-way, not decoding all frames. + if (i == 8) { + break; + } + } + + JxlDecoderRewind(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + for (size_t i = 0; i < num_frames; ++i) { + if (i == 3) { + JxlDecoderSkipFrames(dec, 5); + i += 5; + } + std::vector pixels(buffer_size); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + xsize, ysize, format, format)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + // Test rewinding the decoder and skipping different frames + + JxlDecoderRewind(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + for (size_t i = 0; i < num_frames; ++i) { + int test_skipping = (i == 9) ? 3 : 0; + std::vector pixels(buffer_size); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this + // should only skip the next frame, not the currently processed one. + if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ(frame_durations[i], frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels.data(), pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + xsize, ysize, format, format)); + + if (test_skipping) i += test_skipping; + } + + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, SkipFrameWithAlphaBlendingTest) { + size_t xsize = 90, ysize = 120; + constexpr size_t num_frames = 16; + std::vector frames[num_frames + 5]; + JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.have_animation = true; + io.frames.clear(); + io.frames.reserve(num_frames + 5); + io.SetSize(xsize, ysize); + + std::vector frame_durations_c; + std::vector frame_durations_nc; + std::vector frame_xsize, frame_ysize, frame_x0, frame_y0; + + for (size_t i = 0; i < num_frames; ++i) { + size_t cropxsize = 1 + xsize * 2 / (i + 1); + size_t cropysize = 1 + ysize * 3 / (i + 2); + int cropx0 = i * 3 - 8; + int cropy0 = i * 4 - 7; + if (i < 5) { + std::vector frame_internal = + jxl::test::GetSomeTestImage(xsize / 2, ysize / 2, 4, i * 2 + 1); + // An internal frame with 0 duration, and use_for_next_frame, this is a + // frame that is not rendered and not output by default by the API, but on + // which the rendered frames depend + jxl::ImageBundle bundle_internal(&io.metadata.m); + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frame_internal.data(), + frame_internal.size()), + xsize / 2, ysize / 2, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle_internal)); + bundle_internal.duration = 0; + bundle_internal.use_for_next_frame = true; + bundle_internal.origin = {13, 17}; + io.frames.push_back(std::move(bundle_internal)); + frame_durations_nc.push_back(0); + frame_xsize.push_back(xsize / 2); + frame_ysize.push_back(ysize / 2); + frame_x0.push_back(13); + frame_y0.push_back(17); + } + + std::vector frame = + jxl::test::GetSomeTestImage(cropxsize, cropysize, 4, i * 2); + // Actual rendered frame + jxl::ImageBundle bundle(&io.metadata.m); + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frame.data(), frame.size()), cropxsize, + cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.duration = 5 + i; + frame_durations_nc.push_back(5 + i); + frame_durations_c.push_back(5 + i); + frame_xsize.push_back(cropxsize); + frame_ysize.push_back(cropysize); + frame_x0.push_back(cropx0); + frame_y0.push_back(cropy0); + bundle.origin = {cropx0, cropy0}; + // Create some variation in which frames depend on which. + if (i != 3 && i != 9 && i != 10) { + bundle.use_for_next_frame = true; + } + if (i != 12) { + bundle.blend = true; + bundle.blendmode = jxl::BlendMode::kBlend; + } + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams.SetLossless(); // Lossless to verify pixels exactly after roundtrip. + cparams.speed_tier = jxl::SpeedTier::kThunder; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + // try both with and without coalescing + for (auto coalescing : {JXL_TRUE, JXL_FALSE}) { + // Independently decode all frames without any skipping, to create the + // expected blended frames, for the actual tests below to compare with. + { + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing)); + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner( + dec, JxlThreadParallelRunner, runner)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) { + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + if (coalescing) { + EXPECT_EQ(xsize * ysize * 8, buffer_size); + } else { + EXPECT_EQ(frame_xsize[i] * frame_ysize[i] * 8, buffer_size); + } + frames[i].resize(buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(), + frames[i].size())); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); + } + + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing)); + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner( + dec, JxlThreadParallelRunner, runner)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | + JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + + for (size_t i = 0; i < num_frames; ++i) { + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + std::vector pixels(buffer_size); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]), + frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames, frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(), + pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + if (coalescing) { + EXPECT_EQ(frame_header.layer_info.xsize, xsize); + } else { + EXPECT_EQ(frame_header.layer_info.xsize, frame_xsize[i]); + } + if (coalescing) { + EXPECT_EQ(frame_header.layer_info.ysize, ysize); + } else { + EXPECT_EQ(frame_header.layer_info.ysize, frame_ysize[i]); + } + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + frame_header.layer_info.xsize, + frame_header.layer_info.ysize, + format, format)); + + // Test rewinding mid-way, not decoding all frames. + if (i == 8) { + break; + } + } + + JxlDecoderRewind(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents( + dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) { + if (i == 3) { + JxlDecoderSkipFrames(dec, 5); + i += 5; + } + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + std::vector pixels(buffer_size); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]), + frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames + (coalescing ? 0 : 5), + frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(), + pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + if (coalescing) { + EXPECT_EQ(frame_header.layer_info.xsize, xsize); + EXPECT_EQ(frame_header.layer_info.ysize, ysize); + EXPECT_EQ(frame_header.layer_info.crop_x0, 0); + EXPECT_EQ(frame_header.layer_info.crop_y0, 0); + } else { + EXPECT_EQ(frame_header.layer_info.xsize, frame_xsize[i]); + EXPECT_EQ(frame_header.layer_info.ysize, frame_ysize[i]); + EXPECT_EQ(frame_header.layer_info.crop_x0, frame_x0[i]); + EXPECT_EQ(frame_header.layer_info.crop_y0, frame_y0[i]); + EXPECT_EQ(frame_header.layer_info.blend_info.blendmode, + i != 12 + 5 && frame_header.duration != 0 + ? 2 + : 0); // kBlend or the default kReplace + } + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + frame_header.layer_info.xsize, + frame_header.layer_info.ysize, + format, format)); + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + // Test rewinding the decoder and skipping different frames + + JxlDecoderRewind(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents( + dec, JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + + for (size_t i = 0; i < num_frames + (coalescing ? 0 : 5); ++i) { + int test_skipping = (i == 9) ? 3 : 0; + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + std::vector pixels(buffer_size); + + // Since this is after JXL_DEC_FRAME but before JXL_DEC_FULL_IMAGE, this + // should only skip the next frame, not the currently processed one. + if (test_skipping) JxlDecoderSkipFrames(dec, test_skipping); + + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec, &frame_header)); + EXPECT_EQ((coalescing ? frame_durations_c[i] : frame_durations_nc[i]), + frame_header.duration); + + EXPECT_EQ(i + 1 == num_frames + (coalescing ? 0 : 5), + frame_header.is_last); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels.data(), + pixels.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[i].data(), pixels.data(), + frame_header.layer_info.xsize, + frame_header.layer_info.ysize, + format, format)); + + if (test_skipping) i += test_skipping; + } + + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, OrientedCroppedFrameTest) { + const auto test = [](bool keep_orientation, uint32_t orientation, + uint32_t resampling) { + size_t xsize = 90, ysize = 120; + JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + size_t oxsize = (!keep_orientation && orientation > 4 ? ysize : xsize); + size_t oysize = (!keep_orientation && orientation > 4 ? xsize : ysize); + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetUintSamples(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.orientation = orientation; + io.frames.clear(); + io.SetSize(xsize, ysize); + + for (size_t i = 0; i < 3; ++i) { + size_t cropxsize = 1 + xsize * 2 / (i + 1); + size_t cropysize = 1 + ysize * 3 / (i + 2); + int cropx0 = i * 3 - 8; + int cropy0 = i * 4 - 7; + + std::vector frame = + jxl::test::GetSomeTestImage(cropxsize, cropysize, 4, i * 2); + jxl::ImageBundle bundle(&io.metadata.m); + EXPECT_TRUE(ConvertFromExternal( + jxl::Span(frame.data(), frame.size()), cropxsize, + cropysize, jxl::ColorEncoding::SRGB(/*is_gray=*/false), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &bundle)); + bundle.origin = {cropx0, cropy0}; + bundle.use_for_next_frame = true; + io.frames.push_back(std::move(bundle)); + } + + jxl::CompressParams cparams; + cparams + .SetLossless(); // Lossless to verify pixels exactly after roundtrip. + cparams.speed_tier = jxl::SpeedTier::kThunder; + cparams.resampling = resampling; + jxl::AuxOut aux_out; + jxl::PaddedBytes compressed; + jxl::PassesEncoderState enc_state; + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, &enc_state, &compressed, + jxl::GetJxlCms(), &aux_out, nullptr)); + + // 0 is merged frame as decoded with coalescing enabled (default) + // 1-3 are non-coalesced frames as decoded with coalescing disabled + // 4 is the manually merged frame + std::vector frames[5]; + frames[4].resize(xsize * ysize * 8, 0); + + // try both with and without coalescing + for (auto coalescing : {JXL_TRUE, JXL_FALSE}) { + // Independently decode all frames without any skipping, to create the + // expected blended frames, for the actual tests below to compare with. + { + JxlDecoder* dec = JxlDecoderCreate(NULL); + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec, coalescing)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetKeepOrientation(dec, keep_orientation)); + void* runner = JxlThreadParallelRunnerCreate( + NULL, JxlThreadParallelRunnerDefaultNumWorkerThreads()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetParallelRunner( + dec, JxlThreadParallelRunner, runner)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + for (size_t i = (coalescing ? 0 : 1); i < (coalescing ? 1 : 4); ++i) { + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + JxlFrameHeader frame_header; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetFrameHeader(dec, &frame_header)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + if (coalescing) { + EXPECT_EQ(xsize * ysize * 8, buffer_size); + } else { + EXPECT_EQ(frame_header.layer_info.xsize * + frame_header.layer_info.ysize * 8, + buffer_size); + } + frames[i].resize(buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, frames[i].data(), + frames[i].size())); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_EQ(frame_header.layer_info.blend_info.blendmode, + JXL_BLEND_REPLACE); + if (coalescing) { + EXPECT_EQ(frame_header.layer_info.xsize, oxsize); + EXPECT_EQ(frame_header.layer_info.ysize, oysize); + EXPECT_EQ(frame_header.layer_info.crop_x0, 0); + EXPECT_EQ(frame_header.layer_info.crop_y0, 0); + } else { + // manually merge this layer + int x0 = frame_header.layer_info.crop_x0; + int y0 = frame_header.layer_info.crop_y0; + int w = frame_header.layer_info.xsize; + int h = frame_header.layer_info.ysize; + for (int y = 0; y < static_cast(oysize); y++) { + if (y < y0 || y >= y0 + h) continue; + // pointers do whole 16-bit RGBA pixels at a time + uint64_t* row_merged = static_cast( + (void*)(frames[4].data() + y * oxsize * 8)); + uint64_t* row_layer = static_cast( + (void*)(frames[i].data() + (y - y0) * w * 8)); + for (int x = 0; x < static_cast(oxsize); x++) { + if (x < x0 || x >= x0 + w) continue; + row_merged[x] = row_layer[x - x0]; + } + } + } + } + + // After all frames were decoded, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlThreadParallelRunnerDestroy(runner); + JxlDecoderDestroy(dec); + } + } + + EXPECT_EQ(0u, jxl::test::ComparePixels(frames[0].data(), frames[4].data(), + oxsize, oysize, format, format)); + }; + + for (bool keep_orientation : {true, false}) { + for (uint32_t orientation = 1; orientation <= 8; orientation++) { + for (uint32_t resampling : {1, 2, 4, 8}) { + SCOPED_TRACE(testing::Message() + << "keep_orientation: " << keep_orientation << ", " + << "orientation: " << orientation << ", " + << "resampling: " << resampling); + test(keep_orientation, orientation, resampling); + } + } + } +} + +struct FramePositions { + size_t frame_start; + size_t header_end; + size_t toc_end; + std::vector section_end; +}; + +struct StreamPositions { + size_t codestream_start; + size_t codestream_end; + size_t basic_info; + size_t jbrd_end = 0; + std::vector box_start; + std::vector frames; +}; + +void AnalyzeCodestream(const jxl::PaddedBytes& data, + StreamPositions* streampos) { + // Unbox data to codestream and mark where it is broken up by boxes. + std::vector codestream; + std::vector> breakpoints; + bool codestream_end = false; + ASSERT_LE(2, data.size()); + if (data[0] == 0xff && data[1] == 0x0a) { + codestream = std::vector(data.begin(), data.end()); + streampos->codestream_start = 0; + } else { + const uint8_t* in = data.data(); + size_t pos = 0; + while (pos < data.size()) { + ASSERT_LE(pos + 8, data.size()); + streampos->box_start.push_back(pos); + size_t box_size = LoadBE32(in + pos); + if (box_size == 0) box_size = data.size() - pos; + ASSERT_LE(pos + box_size, data.size()); + if (memcmp(in + pos + 4, "jxlc", 4) == 0) { + EXPECT_TRUE(codestream.empty()); + streampos->codestream_start = pos + 8; + codestream.insert(codestream.end(), in + pos + 8, in + pos + box_size); + codestream_end = true; + } else if (memcmp(in + pos + 4, "jxlp", 4) == 0) { + codestream_end = (LoadBE32(in + pos + 8) & 0x80000000); + if (codestream.empty()) { + streampos->codestream_start = pos + 12; + } else if (box_size > 12 || !codestream_end) { + breakpoints.push_back({codestream.size(), 12}); + } + codestream.insert(codestream.end(), in + pos + 12, in + pos + box_size); + } else if (memcmp(in + pos + 4, "jbrd", 4) == 0) { + EXPECT_TRUE(codestream.empty()); + streampos->jbrd_end = pos + box_size; + } else if (!codestream.empty() && !codestream_end) { + breakpoints.push_back({codestream.size(), box_size}); + } + pos += box_size; + } + ASSERT_EQ(pos, data.size()); + } + // Translate codestream positions to boxed stream positions. + size_t offset = streampos->codestream_start; + size_t bp = 0; + auto add_offset = [&](size_t pos) { + while (bp < breakpoints.size() && pos >= breakpoints[bp].first) { + offset += breakpoints[bp++].second; + } + return pos + offset; + }; + // Analyze the unboxed codestream. + jxl::BitReader br( + jxl::Span(codestream.data(), codestream.size())); + ASSERT_EQ(br.ReadFixedBits<16>(), 0x0AFF); + jxl::CodecMetadata metadata; + ASSERT_TRUE(ReadSizeHeader(&br, &metadata.size)); + ASSERT_TRUE(ReadImageMetadata(&br, &metadata.m)); + streampos->basic_info = + add_offset(br.TotalBitsConsumed() / jxl::kBitsPerByte); + metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded; + ASSERT_TRUE(jxl::Bundle::Read(&br, &metadata.transform_data)); + if (metadata.m.color_encoding.WantICC()) { + jxl::PaddedBytes icc; + ASSERT_TRUE(jxl::ReadICC(&br, &icc)); + ASSERT_TRUE(metadata.m.color_encoding.SetICCRaw(std::move(icc))); + } + ASSERT_TRUE(br.JumpToByteBoundary()); + bool has_preview = metadata.m.have_preview; + while (br.TotalBitsConsumed() < br.TotalBytes() * jxl::kBitsPerByte) { + FramePositions p; + p.frame_start = add_offset(br.TotalBitsConsumed() / jxl::kBitsPerByte); + jxl::FrameHeader frame_header(&metadata); + if (has_preview) { + frame_header.nonserialized_is_preview = true; + has_preview = false; + } + ASSERT_TRUE(ReadFrameHeader(&br, &frame_header)); + p.header_end = + add_offset(jxl::DivCeil(br.TotalBitsConsumed(), jxl::kBitsPerByte)); + jxl::FrameDimensions frame_dim = frame_header.ToFrameDimensions(); + uint64_t groups_total_size; + const size_t toc_entries = jxl::NumTocEntries( + frame_dim.num_groups, frame_dim.num_dc_groups, + frame_header.passes.num_passes, /*has_ac_global=*/true); + std::vector section_offsets; + std::vector section_sizes; + ASSERT_TRUE(ReadGroupOffsets(toc_entries, &br, §ion_offsets, + §ion_sizes, &groups_total_size)); + EXPECT_EQ(br.TotalBitsConsumed() % jxl::kBitsPerByte, 0); + size_t sections_start = br.TotalBitsConsumed() / jxl::kBitsPerByte; + p.toc_end = add_offset(sections_start); + for (size_t i = 0; i < toc_entries; ++i) { + size_t end = sections_start + section_offsets[i] + section_sizes[i]; + p.section_end.push_back(add_offset(end)); + } + br.SkipBits(groups_total_size * jxl::kBitsPerByte); + streampos->frames.push_back(p); + } + streampos->codestream_end = add_offset(codestream.size()); + EXPECT_EQ(br.TotalBitsConsumed(), br.TotalBytes() * jxl::kBitsPerByte); + EXPECT_TRUE(br.Close()); +} + +enum ExpectedFlushState { NO_FLUSH, SAME_FLUSH, NEW_FLUSH }; +struct Breakpoint { + size_t file_pos; + ExpectedFlushState expect_flush; +}; + +void VerifyProgression(size_t xsize, size_t ysize, uint32_t num_channels, + const std::vector& pixels, + const jxl::PaddedBytes& data, + std::vector breakpoints) { + // Size large enough for multiple groups, required to have progressive stages. + ASSERT_LT(256, xsize); + ASSERT_LT(256, ysize); + std::vector pixels2; + pixels2.resize(pixels.size()); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + int bp = 0; + const uint8_t* next_in = data.data(); + size_t avail_in = breakpoints[bp].file_pos; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + double prev_dist = 1.0; + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + printf("bp: %d status: 0x%x\n", bp, (int)status); + if (status == JXL_DEC_BASIC_INFO) { + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + // Output buffer/callback not yet set + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(), + pixels2.size())); + } else if (status == JXL_DEC_FRAME) { + // Nothing to do. + } else if (status == JXL_DEC_SUCCESS) { + EXPECT_EQ(bp + 1, breakpoints.size()); + break; + } else if (status == JXL_DEC_NEED_MORE_INPUT || + status == JXL_DEC_FULL_IMAGE) { + if (breakpoints[bp].expect_flush == NO_FLUSH) { + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + } else { + if (status != JXL_DEC_FULL_IMAGE) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + } + double dist = jxl::test::DistanceRMS(pixels2.data(), pixels.data(), + xsize, ysize, format); + if (breakpoints[bp].expect_flush == NEW_FLUSH) { + EXPECT_LT(dist, prev_dist); + prev_dist = dist; + } else { + EXPECT_EQ(dist, prev_dist); + } + } + if (status == JXL_DEC_FULL_IMAGE) { + EXPECT_EQ(bp + 1, breakpoints.size()); + continue; + } + ASSERT_LT(++bp, breakpoints.size()); + next_in += avail_in - JxlDecoderReleaseInput(dec); + avail_in = breakpoints[bp].file_pos - (next_in - data.data()); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + } else { + printf("Unexpected status: 0x%x\n", (int)status); + FAIL(); // unexpected returned status + } + } + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, ProgressionTest) { + size_t xsize = 508, ysize = 470; + uint32_t num_channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.cparams.progressive_dc = 1; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + StreamPositions streampos; + AnalyzeCodestream(data, &streampos); + const std::vector& fp = streampos.frames; + // We have preview, dc frame and regular frame. + EXPECT_EQ(3, fp.size()); + EXPECT_EQ(7, fp[2].section_end.size()); + EXPECT_EQ(data.size(), fp[2].section_end[6]); + std::vector breakpoints{ + {fp[0].frame_start, NO_FLUSH}, // headers + {fp[1].frame_start, NO_FLUSH}, // preview + {fp[2].frame_start, NO_FLUSH}, // dc frame + {fp[2].section_end[0], NO_FLUSH}, // DC global + {fp[2].section_end[1] - 1, NO_FLUSH}, // partial DC group + {fp[2].section_end[1], NEW_FLUSH}, // DC group + {fp[2].section_end[2], SAME_FLUSH}, // AC global + {fp[2].section_end[3], NEW_FLUSH}, // AC group 0 + {fp[2].section_end[4] - 1, SAME_FLUSH}, // partial AC group 1 + {fp[2].section_end[4], NEW_FLUSH}, // AC group 1 + {fp[2].section_end[5], NEW_FLUSH}, // AC group 2 + {data.size() - 1, SAME_FLUSH}, // partial AC group 3 + {data.size(), NEW_FLUSH}}; // full image + VerifyProgression(xsize, ysize, num_channels, pixels, data, breakpoints); +} + +TEST(DecodeTest, ProgressionTestLosslessAlpha) { + size_t xsize = 508, ysize = 470; + uint32_t num_channels = 4; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + params.cparams.responsive = 1; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + StreamPositions streampos; + AnalyzeCodestream(data, &streampos); + const std::vector& fp = streampos.frames; + // We have preview, dc frame and regular frame. + EXPECT_EQ(1, fp.size()); + EXPECT_EQ(7, fp[0].section_end.size()); + EXPECT_EQ(data.size(), fp[0].section_end[6]); + std::vector breakpoints{ + {fp[0].frame_start, NO_FLUSH}, // headers + {fp[0].section_end[0] - 1, NO_FLUSH}, // partial DC global + {fp[0].section_end[0], NEW_FLUSH}, // DC global + {fp[0].section_end[1], SAME_FLUSH}, // DC group + {fp[0].section_end[2], SAME_FLUSH}, // AC global + {fp[0].section_end[3], NEW_FLUSH}, // AC group 0 + {fp[0].section_end[4] - 1, SAME_FLUSH}, // partial AC group 1 + {fp[0].section_end[4], NEW_FLUSH}, // AC group 1 + {fp[0].section_end[5], NEW_FLUSH}, // AC group 2 + {data.size() - 1, SAME_FLUSH}, // partial AC group 3 + {data.size(), NEW_FLUSH}}; // full image + VerifyProgression(xsize, ysize, num_channels, pixels, data, breakpoints); +} + +void VerifyFilePosition(size_t expected_pos, const jxl::PaddedBytes& data, + JxlDecoder* dec) { + size_t remaining = JxlDecoderReleaseInput(dec); + size_t pos = data.size() - remaining; + EXPECT_EQ(expected_pos, pos); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, data.data() + pos, remaining)); +} + +TEST(DecodeTest, InputHandlingTestOneShot) { + size_t xsize = 508, ysize = 470; + uint32_t num_channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + printf("Testing with box format %d\n", i); + jxl::TestCodestreamParams params; + params.cparams.progressive_dc = 1; + params.preview_mode = jxl::kSmallPreview; + params.box_format = (CodeStreamBoxFormat)i; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + StreamPositions streampos; + AnalyzeCodestream(data, &streampos); + const std::vector& fp = streampos.frames; + // We have preview, dc frame and regular frame. + EXPECT_EQ(3, fp.size()); + + std::vector pixels2; + pixels2.resize(pixels.size()); + + int kNumEvents = 6; + int events[] = { + JXL_DEC_BASIC_INFO, JXL_DEC_COLOR_ENCODING, JXL_DEC_PREVIEW_IMAGE, + JXL_DEC_FRAME, JXL_DEC_FULL_IMAGE, JXL_DEC_FRAME_PROGRESSION, + }; + size_t end_positions[] = { + streampos.basic_info, fp[0].frame_start, + fp[1].frame_start, fp[2].toc_end, + streampos.codestream_end, streampos.codestream_end}; + int events_wanted = 0; + for (int j = 0; j < kNumEvents; ++j) { + events_wanted |= events[j]; + size_t end_pos = end_positions[j]; + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, data.data(), data.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + VerifyFilePosition(streampos.basic_info, data, dec); + if (j >= 1) { + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[0].frame_start, data, dec); + } + if (j >= 2) { + EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[0].toc_end, data, dec); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)); + EXPECT_GE(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(), + buffer_size)); + EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[1].frame_start, data, dec); + } + if (j >= 3) { + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[2].toc_end, data, dec); + if (j >= 5) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kDC)); + } + } + if (j >= 4) { + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[2].toc_end, data, dec); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(), + pixels2.size())); + if (j >= 5) { + EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[2].section_end[1], data, dec); + } + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + VerifyFilePosition(streampos.codestream_end, data, dec); + } + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + VerifyFilePosition(end_pos, data, dec); + JxlDecoderDestroy(dec); + } + } +} + +TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(InputHandlingTestJPEGOneshot)) { + TEST_LIBJPEG_SUPPORT(); + size_t xsize = 123; + size_t ysize = 77; + size_t channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0); + for (int i = 1; i < kCSBF_NUM_ENTRIES; ++i) { + printf("Testing with box format %d\n", i); + jxl::PaddedBytes jpeg_codestream; + jxl::TestCodestreamParams params; + params.cparams.color_transform = jxl::ColorTransform::kNone; + params.jpeg_codestream = &jpeg_codestream; + params.preview_mode = jxl::kSmallPreview; + params.box_format = (CodeStreamBoxFormat)i; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + channels, params); + JxlPixelFormat format = {3, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + StreamPositions streampos; + AnalyzeCodestream(data, &streampos); + const std::vector& fp = streampos.frames; + // We have preview and regular frame. + EXPECT_EQ(2, fp.size()); + EXPECT_LT(0, streampos.jbrd_end); + + std::vector pixels2; + pixels2.resize(pixels.size()); + + int kNumEvents = 6; + int events[] = {JXL_DEC_BASIC_INFO, JXL_DEC_JPEG_RECONSTRUCTION, + JXL_DEC_COLOR_ENCODING, JXL_DEC_PREVIEW_IMAGE, + JXL_DEC_FRAME, JXL_DEC_FULL_IMAGE}; + size_t end_positions[] = {streampos.basic_info, streampos.basic_info, + fp[0].frame_start, fp[1].frame_start, + fp[1].toc_end, streampos.codestream_end}; + int events_wanted = 0; + for (int j = 0; j < kNumEvents; ++j) { + printf("j = %d\n", j); + events_wanted |= events[j]; + size_t end_pos = end_positions[j]; + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, data.data(), data.size())); + if (j >= 1) { + EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec)); + VerifyFilePosition(streampos.jbrd_end, data, dec); + } + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + VerifyFilePosition(streampos.basic_info, data, dec); + if (j >= 2) { + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[0].frame_start, data, dec); + } + if (j >= 3) { + EXPECT_EQ(JXL_DEC_NEED_PREVIEW_OUT_BUFFER, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[0].toc_end, data, dec); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)); + EXPECT_GE(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(), + buffer_size)); + EXPECT_EQ(JXL_DEC_PREVIEW_IMAGE, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[1].frame_start, data, dec); + } + if (j >= 4) { + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[1].toc_end, data, dec); + } + if (j >= 5) { + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + VerifyFilePosition(fp[1].toc_end, data, dec); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(), + pixels2.size())); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + VerifyFilePosition(streampos.codestream_end, data, dec); + } + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + VerifyFilePosition(end_pos, data, dec); + JxlDecoderDestroy(dec); + } + } +} + +TEST(DecodeTest, InputHandlingTestStreaming) { + size_t xsize = 508, ysize = 470; + uint32_t num_channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + for (int i = 0; i < kCSBF_NUM_ENTRIES; ++i) { + printf("Testing with box format %d\n", i); + fflush(stdout); + jxl::TestCodestreamParams params; + params.cparams.progressive_dc = 1; + params.box_format = (CodeStreamBoxFormat)i; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + StreamPositions streampos; + AnalyzeCodestream(data, &streampos); + const std::vector& fp = streampos.frames; + // We have preview, dc frame and regular frame. + EXPECT_EQ(3, fp.size()); + std::vector pixels2; + pixels2.resize(pixels.size()); + int events_wanted = + (JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | JXL_DEC_PREVIEW_IMAGE | + JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION | + JXL_DEC_BOX); + for (size_t increment : {1, 7, 27, 1024}) { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, events_wanted)); + size_t file_pos = 0; + size_t box_index = 0; + size_t avail_in = 0; + for (;;) { + const uint8_t* next_in = data.data() + file_pos; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + size_t remaining = JxlDecoderReleaseInput(dec); + size_t consumed = avail_in - remaining; + file_pos += consumed; + avail_in += increment; + avail_in = std::min(avail_in, data.size() - file_pos); + if (status == JXL_DEC_BASIC_INFO) { + EXPECT_EQ(file_pos, streampos.basic_info); + } else if (status == JXL_DEC_COLOR_ENCODING) { + EXPECT_EQ(file_pos, streampos.frames[0].frame_start); + } else if (status == JXL_DEC_NEED_PREVIEW_OUT_BUFFER) { + EXPECT_EQ(file_pos, streampos.frames[0].toc_end); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderPreviewOutBufferSize(dec, &format, &buffer_size)); + EXPECT_GE(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetPreviewOutBuffer(dec, &format, pixels2.data(), + buffer_size)); + } else if (status == JXL_DEC_PREVIEW_IMAGE) { + EXPECT_EQ(file_pos, streampos.frames[1].frame_start); + } else if (status == JXL_DEC_FRAME) { + EXPECT_EQ(file_pos, streampos.frames[2].toc_end); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetProgressiveDetail(dec, kDC)); + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + EXPECT_EQ(file_pos, streampos.frames[2].toc_end); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, pixels2.data(), + pixels2.size())); + } else if (status == JXL_DEC_FRAME_PROGRESSION) { + EXPECT_EQ(file_pos, streampos.frames[2].section_end[1]); + } else if (status == JXL_DEC_FULL_IMAGE) { + EXPECT_EQ(file_pos, streampos.codestream_end); + } else if (status == JXL_DEC_SUCCESS) { + EXPECT_EQ(file_pos, streampos.codestream_end); + break; + } else if (status == JXL_DEC_NEED_MORE_INPUT) { + EXPECT_LT(remaining, 12); + if ((i == kCSBF_None && file_pos >= 2) || + (box_index > 0 && box_index < streampos.box_start.size() && + file_pos >= streampos.box_start[box_index - 1] + 12 && + file_pos < streampos.box_start[box_index])) { + EXPECT_EQ(remaining, 0); + } + if (file_pos == data.size()) break; + } else if (status == JXL_DEC_BOX) { + ASSERT_LT(box_index, streampos.box_start.size()); + EXPECT_EQ(file_pos, streampos.box_start[box_index++]); + } else { + printf("Unexpected status: 0x%x\n", (int)status); + FAIL(); + } + } + JxlDecoderDestroy(dec); + } + } +} + +TEST(DecodeTest, FlushTest) { + // Size large enough for multiple groups, required to have progressive + // stages + size_t xsize = 333, ysize = 300; + uint32_t num_channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + + // Ensure that the first part contains at least the full DC of the image, + // otherwise flush does not work. + size_t first_part = data.size() - 1; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Output buffer not yet set + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels2.data(), pixels2.size())); + + // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if + // data was already input before, since the processing of the frame only + // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME. + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + + // Crude test of actual pixel data: pixel threshold of about 4% (2560/65535). + // 29000 pixels can be above the threshold + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 29000u); + + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + size_t consumed = first_part - JxlDecoderReleaseInput(dec); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed, + data.size() - consumed)); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + // Lower threshold for the final (still lossy) image + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 11000u); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, FlushTestImageOutCallback) { + // Size large enough for multiple groups, required to have progressive + // stages + size_t xsize = 333, ysize = 300; + uint32_t num_channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + size_t bytes_per_pixel = format.num_channels * 2; + size_t stride = bytes_per_pixel * xsize; + auto callback = [&](size_t x, size_t y, size_t num_pixels, + const void* pixels_row) { + memcpy(pixels2.data() + stride * y + bytes_per_pixel * x, pixels_row, + num_pixels * bytes_per_pixel); + }; + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + + // Ensure that the first part contains at least the full DC of the image, + // otherwise flush does not work. + size_t first_part = data.size() - 1; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Output callback not yet set + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutCallback( + dec, &format, + [](void* opaque, size_t x, size_t y, + size_t xsize, const void* pixels_row) { + auto cb = + static_cast(opaque); + (*cb)(x, y, xsize, pixels_row); + }, + /*opaque=*/&callback)); + + // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if + // data was already input before, since the processing of the frame only + // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME. + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + + // Crude test of actual pixel data: pixel threshold of about 4% (2560/65535). + // 29000 pixels can be above the threshold + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 29000u); + + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + size_t consumed = first_part - JxlDecoderReleaseInput(dec); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed, + data.size() - consumed)); + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + // Lower threshold for the final (still lossy) image + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 11000u); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, FlushTestLossyProgressiveAlpha) { + // Size large enough for multiple groups, required to have progressive + // stages + size_t xsize = 333, ysize = 300; + uint32_t num_channels = 4; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + + // Ensure that the first part contains at least the full DC of the image, + // otherwise flush does not work. + size_t first_part = data.size() - 1; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Output buffer not yet set + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels2.data(), pixels2.size())); + + // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if + // data was already input before, since the processing of the frame only + // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME. + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 30000u); + + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + size_t consumed = first_part - JxlDecoderReleaseInput(dec); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed, + data.size() - consumed)); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 11000u); + + JxlDecoderDestroy(dec); +} +TEST(DecodeTest, FlushTestLossyProgressiveAlphaUpsampling) { + size_t xsize = 533, ysize = 401; + uint32_t num_channels = 4; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.cparams.resampling = 2; + params.cparams.ec_resampling = 4; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + + // Ensure that the first part contains at least the full DC of the image, + // otherwise flush does not work. + size_t first_part = data.size() * 2 / 3; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Output buffer not yet set + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels2.data(), pixels2.size())); + + // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if + // data was already input before, since the processing of the frame only + // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME. + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 125000u); + + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + size_t consumed = first_part - JxlDecoderReleaseInput(dec); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed, + data.size() - consumed)); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 70000u); + + JxlDecoderDestroy(dec); +} +TEST(DecodeTest, FlushTestLosslessProgressiveAlpha) { + // Size large enough for multiple groups, required to have progressive + // stages + size_t xsize = 333, ysize = 300; + uint32_t num_channels = 4; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + jxl::TestCodestreamParams params; + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + params.cparams.responsive = 1; + params.cparams.modular_group_size_shift = 1; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + + // Ensure that the first part contains at least the full DC of the image, + // otherwise flush does not work. + size_t first_part = data.size() / 2; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data(), first_part)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + + // Output buffer not yet set + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderFlushImage(dec)); + + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels2.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, pixels2.data(), pixels2.size())); + + // Must process input further until we get JXL_DEC_NEED_MORE_INPUT, even if + // data was already input before, since the processing of the frame only + // happens at the JxlDecoderProcessInput call after JXL_DEC_FRAME. + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format, 2560.0), + 2700u); + + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + + size_t consumed = first_part - JxlDecoderReleaseInput(dec); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, data.data() + consumed, + data.size() - consumed)); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + EXPECT_LE(jxl::test::ComparePixels(pixels2.data(), pixels.data(), xsize, + ysize, format, format), + 0u); + + JxlDecoderDestroy(dec); +} + +class DecodeProgressiveTest : public ::testing::TestWithParam {}; +JXL_GTEST_INSTANTIATE_TEST_SUITE_P(DecodeProgressiveTestInstantiation, + DecodeProgressiveTest, + ::testing::Range(0, 8)); +TEST_P(DecodeProgressiveTest, ProgressiveEventTest) { + const int params = GetParam(); + int single_group = params & 1; + int lossless = (params >> 1) & 1; + uint32_t num_channels = 3 + ((params >> 2) & 1); + std::set progressive_details = {kDC, kLastPasses, + kPasses}; + for (auto prog_detail : progressive_details) { + // Only few combinations are expected to support outputting + // intermediate flushes for complete DC and complete passes. + // The test can be updated if more cases are expected to support it. + bool expect_flush = (num_channels & 1) && !lossless; + size_t xsize, ysize; + if (single_group) { + // An image smaller than 256x256 ensures it contains only 1 group. + xsize = 99; + ysize = 100; + } else { + xsize = 277; + ysize = 280; + } + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, num_channels, 0); + JxlPixelFormat format = {num_channels, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + jxl::ColorEncoding color_encoding = jxl::ColorEncoding::SRGB(false); + jxl::CodecInOut io; + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + color_encoding, + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &io.Main())); + jxl::TestCodestreamParams params; + if (lossless) { + params.cparams.SetLossless(); + } else { + params.cparams.butteraugli_distance = 0.5f; + } + jxl::PassDefinition passes[] = { + {2, 0, 4}, {4, 0, 4}, {8, 2, 2}, {8, 1, 2}, {8, 0, 1}}; + const int kNumPasses = 5; + jxl::ProgressiveMode progressive_mode{passes}; + params.progressive_mode = &progressive_mode; + jxl::PaddedBytes data = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + num_channels, params); + + for (size_t increment : {(size_t)1, data.size()}) { + printf( + "Testing with single_group=%d, lossless=%d, " + "num_channels=%d, prog_detail=%d, increment=%d\n", + single_group, lossless, (int)num_channels, (int)prog_detail, + (int)increment); + std::vector> passes(kNumPasses + 1); + for (int i = 0; i <= kNumPasses; ++i) { + passes[i].resize(pixels.size()); + } + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME | + JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION)); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kFrames)); + EXPECT_EQ(JXL_DEC_ERROR, + JxlDecoderSetProgressiveDetail(dec, kDCProgressive)); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kDCGroups)); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderSetProgressiveDetail(dec, kGroups)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetProgressiveDetail(dec, prog_detail)); + + uint8_t* next_in = data.data(); + size_t avail_in = 0; + size_t pos = 0; + + auto process_input = [&]() { + for (;;) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, next_in, avail_in)); + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, avail_in); + next_in += avail_in - remaining; + avail_in = remaining; + if (status == JXL_DEC_NEED_MORE_INPUT && pos < data.size()) { + size_t chunk = std::min(increment, data.size() - pos); + pos += chunk; + avail_in += chunk; + continue; + } + return status; + } + }; + + EXPECT_EQ(JXL_DEC_BASIC_INFO, process_input()); + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + + EXPECT_EQ(JXL_DEC_FRAME, process_input()); + + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(pixels.size(), buffer_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, passes[kNumPasses].data(), + passes[kNumPasses].size())); + + auto next_pass = [&](int pass) { + if (prog_detail <= kDC) return kNumPasses; + if (prog_detail <= kLastPasses) { + return std::min(pass + 2, kNumPasses); + } + return pass + 1; + }; + + if (expect_flush) { + // Return a particular downsampling ratio only after the last + // pass for that downsampling was processed. + int expected_downsampling_ratios[] = {8, 8, 4, 4, 2}; + for (int p = 0; p < kNumPasses; p = next_pass(p)) { + EXPECT_EQ(JXL_DEC_FRAME_PROGRESSION, process_input()); + EXPECT_EQ(expected_downsampling_ratios[p], + JxlDecoderGetIntendedDownsamplingRatio(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderFlushImage(dec)); + passes[p] = passes[kNumPasses]; + } + } + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, process_input()); + EXPECT_EQ(JXL_DEC_SUCCESS, process_input()); + + JxlDecoderDestroy(dec); + + if (!expect_flush) { + continue; + } + jxl::ButteraugliParams ba; + std::vector distances(kNumPasses + 1); + for (int p = 0;; p = next_pass(p)) { + jxl::CodecInOut io1; + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(passes[p].data(), passes[p].size()), xsize, + ysize, color_encoding, + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, &io1.Main())); + distances[p] = ButteraugliDistance(io.frames, io1.frames, ba, + jxl::GetJxlCms(), nullptr, nullptr); + if (p == kNumPasses) break; + } + const float kMaxDistance[kNumPasses + 1] = {30.0f, 20.0f, 10.0f, + 5.0f, 3.0f, 2.0f}; + EXPECT_LT(distances[kNumPasses], kMaxDistance[kNumPasses]); + for (int p = 0; p < kNumPasses;) { + int next_p = next_pass(p); + EXPECT_LT(distances[p], kMaxDistance[p]); + // Verify that the returned pass image is actually not the + // same as the next pass image, by checking that it has a bit + // worse butteraugli score. + EXPECT_LT(distances[next_p] * 1.1f, distances[p]); + p = next_p; + } + } + } +} + +void VerifyJPEGReconstruction(const jxl::PaddedBytes& container, + const jxl::PaddedBytes& jpeg_bytes) { + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE)); + JxlDecoderSetInput(dec.get(), container.data(), container.size()); + EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get())); + std::vector reconstructed_buffer(128); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(), + reconstructed_buffer.size())); + size_t used = 0; + JxlDecoderStatus process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT; + while (process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) { + used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get()); + reconstructed_buffer.resize(reconstructed_buffer.size() * 2); + EXPECT_EQ( + JXL_DEC_SUCCESS, + JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used, + reconstructed_buffer.size() - used)); + process_result = JxlDecoderProcessInput(dec.get()); + } + ASSERT_EQ(JXL_DEC_FULL_IMAGE, process_result); + used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get()); + ASSERT_EQ(used, jpeg_bytes.size()); + EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), jpeg_bytes.data(), used)); +} + +TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructTestCodestream)) { + TEST_LIBJPEG_SUPPORT(); + size_t xsize = 123; + size_t ysize = 77; + size_t channels = 3; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, channels, /*seed=*/0); + jxl::PaddedBytes jpeg_codestream; + jxl::TestCodestreamParams params; + params.cparams.color_transform = jxl::ColorTransform::kNone; + params.box_format = kCSBF_Single; + params.jpeg_codestream = &jpeg_codestream; + params.preview_mode = jxl::kSmallPreview; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, + channels, params); + VerifyJPEGReconstruction(compressed, jpeg_codestream); +} + +TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) { + const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path); + jxl::CodecInOut orig_io; + ASSERT_TRUE( + jxl::jpeg::DecodeImageJPG(jxl::Span(orig), &orig_io)); + orig_io.metadata.m.xyb_encoded = false; + jxl::BitWriter writer; + ASSERT_TRUE(WriteCodestreamHeaders(&orig_io.metadata, &writer, nullptr)); + writer.ZeroPadToByte(); + jxl::PassesEncoderState enc_state; + jxl::CompressParams cparams; + cparams.color_transform = jxl::ColorTransform::kNone; + ASSERT_TRUE(jxl::EncodeFrame(cparams, jxl::FrameInfo{}, &orig_io.metadata, + orig_io.Main(), &enc_state, jxl::GetJxlCms(), + /*pool=*/nullptr, &writer, + /*aux_out=*/nullptr)); + + jxl::PaddedBytes jpeg_data; + ASSERT_TRUE( + EncodeJPEGData(*orig_io.Main().jpeg_data.get(), &jpeg_data, cparams)); + jxl::PaddedBytes container; + container.append(jxl::kContainerHeader, + jxl::kContainerHeader + sizeof(jxl::kContainerHeader)); + jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_data.size(), false, + &container); + container.append(jpeg_data.data(), jpeg_data.data() + jpeg_data.size()); + jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), 0, true, &container); + jxl::PaddedBytes codestream = std::move(writer).TakeBytes(); + container.append(codestream.data(), codestream.data() + codestream.size()); + VerifyJPEGReconstruction(container, orig); +} + +TEST(DecodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionMetadataTest)) { + const std::string jpeg_path = "jxl/jpeg_reconstruction/1x1_exif_xmp.jpg"; + const std::string jxl_path = "jxl/jpeg_reconstruction/1x1_exif_xmp.jxl"; + const jxl::PaddedBytes jpeg = jxl::test::ReadTestData(jpeg_path); + const jxl::PaddedBytes jxl = jxl::test::ReadTestData(jxl_path); + VerifyJPEGReconstruction(jxl, jpeg); +} + +TEST(DecodeTest, ContinueFinalNonEssentialBoxTest) { + size_t xsize = 80, ysize = 90; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + jxl::TestCodestreamParams params; + params.box_format = kCSBF_Multi_Other_Terminated; + params.add_icc_profile = true; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + StreamPositions streampos; + AnalyzeCodestream(compressed, &streampos); + + // The non-essential final box size including 8-byte header + size_t final_box_size = unk3_box_size + 8; + size_t last_box_begin = compressed.size() - final_box_size; + // Verify that the test is indeed setup correctly to be at the beginning of + // the 'unkn' box header. + ASSERT_EQ(compressed[last_box_begin + 3], final_box_size); + ASSERT_EQ(compressed[last_box_begin + 4], 'u'); + ASSERT_EQ(compressed[last_box_begin + 5], 'n'); + ASSERT_EQ(compressed[last_box_begin + 6], 'k'); + ASSERT_EQ(compressed[last_box_begin + 7], '3'); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | JXL_DEC_FRAME)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), last_box_begin)); + + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_FRAME, JxlDecoderProcessInput(dec)); + // The decoder returns success despite not having seen the final unknown box + // yet. This is because calling JxlDecoderCloseInput is not mandatory for + // backwards compatibility, so it doesn't know more bytes follow, the current + // bytes ended at a perfectly valid place. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + size_t remaining = JxlDecoderReleaseInput(dec); + // Since the test was set up to end exactly at the boundary of the final + // codestream box, and the decoder returned success, all bytes are expected to + // be consumed until the end of the frame header. + EXPECT_EQ(remaining, last_box_begin - streampos.frames[0].toc_end); + + // Now set the remaining non-codestream box as input. + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data() + last_box_begin, + compressed.size() - last_box_begin)); + // Even though JxlDecoderProcessInput already returned JXL_DEC_SUCCESS before, + // when calling it again now after setting more input, success is expected, no + // event occurs but the box has been successfully skipped. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + JxlDecoderDestroy(dec); +} + +namespace { +bool BoxTypeEquals(const std::string& type_string, JxlBoxType type) { + return type_string.size() == 4 && type_string[0] == type[0] && + type_string[1] == type[1] && type_string[2] == type[2] && + type_string[3] == type[3]; +} +} // namespace + +TEST(DecodeTest, ExtentedBoxSizeTest) { + const std::string jxl_path = "jxl/boxes/square-extended-size-container.jxl"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jxl_path); + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX)); + + JxlBoxType type; + uint64_t box_size; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, orig.data(), orig.size())); + EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + EXPECT_TRUE(BoxTypeEquals("JXL ", type)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size)); + EXPECT_EQ(12, box_size); + EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + EXPECT_TRUE(BoxTypeEquals("ftyp", type)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size)); + EXPECT_EQ(20, box_size); + EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + EXPECT_TRUE(BoxTypeEquals("jxlc", type)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size)); + EXPECT_EQ(72, box_size); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, JXL_BOXES_TEST(BoxTest)) { + size_t xsize = 1, ysize = 1; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + jxl::TestCodestreamParams params; + params.box_format = kCSBF_Multi_Other_Terminated; + params.add_icc_profile = true; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX)); + + std::vector expected_box_types = { + "JXL ", "ftyp", "jxlp", "unk1", "unk2", "jxlp", "jxlp", "jxlp", "unk3"}; + + // Value 0 means to not test the size: codestream is not required to be a + // particular exact size. + std::vector expected_box_sizes = {12, 20, 0, 34, 18, 0, 0, 0, 20}; + + JxlBoxType type; + uint64_t box_size; + std::vector contents(50); + size_t expected_release_size = 0; + + // Cannot get these when decoding didn't start yet + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderGetBoxSizeRaw(dec, &box_size)); + + uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + for (size_t i = 0; i < expected_box_types.size(); i++) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + EXPECT_EQ(JXL_DEC_BOX, JxlDecoderProcessInput(dec)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxSizeRaw(dec, &box_size)); + EXPECT_TRUE(BoxTypeEquals(expected_box_types[i], type)); + if (expected_box_sizes[i]) { + EXPECT_EQ(expected_box_sizes[i], box_size); + } + + if (expected_release_size > 0) { + EXPECT_EQ(expected_release_size, JxlDecoderReleaseBoxBuffer(dec)); + expected_release_size = 0; + } + + if (type[0] == 'u' && type[1] == 'n' && type[2] == 'k') { + JxlDecoderSetBoxBuffer(dec, contents.data(), contents.size()); + size_t expected_box_contents_size = + type[3] == '1' ? unk1_box_size + : (type[3] == '2' ? unk2_box_size : unk3_box_size); + expected_release_size = contents.size() - expected_box_contents_size; + } + size_t consumed = avail_in - JxlDecoderReleaseInput(dec); + next_in += consumed; + avail_in -= consumed; + } + + // After the last DEC_BOX event, check that the input position is exactly at + // the stat of the box header. + EXPECT_EQ(avail_in, expected_box_sizes.back()); + + // Even though all input is given, the decoder cannot assume there aren't + // more boxes if the input was not closed. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec, next_in, avail_in)); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec)); + JxlDecoderCloseInput(dec); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + JxlDecoderDestroy(dec); +} + +TEST(DecodeTest, JXL_BOXES_TEST(ExifBrobBoxTest)) { + size_t xsize = 1, ysize = 1; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + jxl::TestCodestreamParams params; + // Lossless to verify pixels exactly after roundtrip. + params.cparams.SetLossless(); + params.box_format = kCSBF_Brob_Exif; + params.add_icc_profile = true; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + + // Test raw brob box, not brotli-decompressing + for (int streaming = 0; streaming < 2; ++streaming) { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX)); + if (!streaming) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + JxlDecoderCloseInput(dec); + } + // for streaming input case + const uint8_t* next_in = compressed.data(); + size_t avail_in = 0; + size_t total_in = 0; + size_t step_size = 64; + + std::vector box_buffer; + size_t box_num_output; + bool seen_brob_begin = false; + bool seen_brob_end = false; + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + if (status == JXL_DEC_NEED_MORE_INPUT) { + if (streaming) { + size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, avail_in); + next_in += avail_in - remaining; + avail_in = remaining; + size_t amount = step_size; + if (total_in + amount > compressed.size()) { + amount = compressed.size() - total_in; + } + avail_in += amount; + total_in += amount; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, next_in, avail_in)); + if (total_in == compressed.size()) JxlDecoderCloseInput(dec); + } else { + FAIL(); + break; + } + } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) { + if (!box_buffer.empty()) { + EXPECT_EQ(false, seen_brob_end); + seen_brob_end = true; + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + EXPECT_EQ(box_num_output, box_brob_exif_size - 8); + EXPECT_EQ( + 0, memcmp(box_buffer.data(), box_brob_exif + 8, box_num_output)); + box_buffer.clear(); + } + if (status == JXL_DEC_SUCCESS) break; + JxlBoxType type; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + if (BoxTypeEquals("brob", type)) { + EXPECT_EQ(false, seen_brob_begin); + seen_brob_begin = true; + box_buffer.resize(8); + JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size()); + } + } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + box_buffer.resize(box_buffer.size() * 2); + JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output, + box_buffer.size() - box_num_output); + } else { + // We do not expect any other events or errors + FAIL(); + break; + } + } + + EXPECT_EQ(true, seen_brob_begin); + EXPECT_EQ(true, seen_brob_end); + + JxlDecoderDestroy(dec); + } + + // Test decompressed brob box + for (int streaming = 0; streaming < 2; ++streaming) { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents(dec, JXL_DEC_BOX)); + if (!streaming) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + JxlDecoderCloseInput(dec); + } + // for streaming input case + const uint8_t* next_in = compressed.data(); + size_t avail_in = 0; + size_t total_in = 0; + size_t step_size = 64; + + std::vector box_buffer; + size_t box_num_output; + bool seen_exif_begin = false; + bool seen_exif_end = false; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetDecompressBoxes(dec, JXL_TRUE)); + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + if (status == JXL_DEC_NEED_MORE_INPUT) { + if (streaming) { + size_t remaining = JxlDecoderReleaseInput(dec); + EXPECT_LE(remaining, avail_in); + next_in += avail_in - remaining; + avail_in = remaining; + size_t amount = step_size; + if (total_in + amount > compressed.size()) { + amount = compressed.size() - total_in; + } + avail_in += amount; + total_in += amount; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, next_in, avail_in)); + if (total_in == compressed.size()) JxlDecoderCloseInput(dec); + } else { + FAIL(); + break; + } + } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) { + if (!box_buffer.empty()) { + EXPECT_EQ(false, seen_exif_end); + seen_exif_end = true; + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + // Expect that the output has the same size and contents as the + // uncompressed exif data. Only check contents if the sizes match to + // avoid comparing uninitialized memory in the test. + EXPECT_EQ(box_num_output, exif_uncompressed_size); + if (box_num_output == exif_uncompressed_size) { + EXPECT_EQ(0, memcmp(box_buffer.data(), exif_uncompressed, + exif_uncompressed_size)); + } + box_buffer.clear(); + } + if (status == JXL_DEC_SUCCESS) break; + JxlBoxType type; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_TRUE)); + if (BoxTypeEquals("Exif", type)) { + EXPECT_EQ(false, seen_exif_begin); + seen_exif_begin = true; + box_buffer.resize(8); + JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size()); + } + } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + box_buffer.resize(box_buffer.size() * 2); + JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output, + box_buffer.size() - box_num_output); + } else { + // We do not expect any other events or errors + FAIL(); + break; + } + } + + EXPECT_EQ(true, seen_exif_begin); + EXPECT_EQ(true, seen_exif_end); + + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, JXL_BOXES_TEST(PartialCodestreamBoxTest)) { + size_t xsize = 23, ysize = 81; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlPixelFormat format_orig = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + // Lossless to verify pixels exactly after roundtrip. + jxl::TestCodestreamParams params; + params.cparams.SetLossless(); + params.cparams.speed_tier = jxl::SpeedTier::kThunder; + params.box_format = kCSBF_Multi; + params.add_icc_profile = true; + jxl::PaddedBytes compressed = jxl::CreateTestJXLCodestream( + jxl::Span(pixels.data(), pixels.size()), xsize, ysize, 4, + params); + + std::vector extracted_codestream; + + { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_BOX)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + JxlDecoderCloseInput(dec); + + size_t num_jxlp = 0; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + std::vector box_buffer; + size_t box_num_output; + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + if (status == JXL_DEC_NEED_MORE_INPUT) { + FAIL(); + break; + } else if (status == JXL_DEC_BASIC_INFO) { + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format_orig, pixels2.data(), + pixels2.size())); + } else if (status == JXL_DEC_FULL_IMAGE) { + continue; + } else if (status == JXL_DEC_BOX || status == JXL_DEC_SUCCESS) { + if (!box_buffer.empty()) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + EXPECT_GE(box_num_output, 4); + // Do not insert the first 4 bytes, which are not part of the + // codestream, but the partial codestream box index + extracted_codestream.insert(extracted_codestream.end(), + box_buffer.begin() + 4, + box_buffer.begin() + box_num_output); + box_buffer.clear(); + } + if (status == JXL_DEC_SUCCESS) break; + JxlBoxType type; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec, type, JXL_FALSE)); + if (BoxTypeEquals("jxlp", type)) { + num_jxlp++; + box_buffer.resize(8); + JxlDecoderSetBoxBuffer(dec, box_buffer.data(), box_buffer.size()); + } + } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + box_buffer.resize(box_buffer.size() * 2); + JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output, + box_buffer.size() - box_num_output); + } else { + // We do not expect any other events or errors + FAIL(); + break; + } + } + + // The test file created with kCSBF_Multi is expected to have 4 jxlp boxes. + EXPECT_EQ(4, num_jxlp); + + EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format_orig)); + + JxlDecoderDestroy(dec); + } + + // Now test whether the codestream extracted from the jxlp boxes can itself + // also be decoded and gives the same pixels + { + JxlDecoder* dec = JxlDecoderCreate(nullptr); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE | JXL_DEC_BOX)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, extracted_codestream.data(), + extracted_codestream.size())); + JxlDecoderCloseInput(dec); + + size_t num_boxes = 0; + + std::vector pixels2; + pixels2.resize(pixels.size()); + + std::vector box_buffer; + size_t box_num_output; + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec); + if (status == JXL_DEC_NEED_MORE_INPUT) { + FAIL(); + break; + } else if (status == JXL_DEC_BASIC_INFO) { + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(info.xsize, xsize); + EXPECT_EQ(info.ysize, ysize); + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format_orig, pixels2.data(), + pixels2.size())); + } else if (status == JXL_DEC_FULL_IMAGE) { + continue; + } else if (status == JXL_DEC_BOX) { + num_boxes++; + } else if (status == JXL_DEC_BOX_NEED_MORE_OUTPUT) { + size_t remaining = JxlDecoderReleaseBoxBuffer(dec); + box_num_output = box_buffer.size() - remaining; + box_buffer.resize(box_buffer.size() * 2); + JxlDecoderSetBoxBuffer(dec, box_buffer.data() + box_num_output, + box_buffer.size() - box_num_output); + } else if (status == JXL_DEC_SUCCESS) { + break; + } else { + // We do not expect any other events or errors + FAIL(); + break; + } + } + + EXPECT_EQ(0, num_boxes); // The data does not use the container format. + EXPECT_EQ(0u, jxl::test::ComparePixels(pixels.data(), pixels2.data(), xsize, + ysize, format_orig, format_orig)); + + JxlDecoderDestroy(dec); + } +} + +TEST(DecodeTest, SpotColorTest) { + jxl::ThreadPool* pool = nullptr; + jxl::CodecInOut io; + size_t xsize = 55, ysize = 257; + io.metadata.m.color_encoding = jxl::ColorEncoding::LinearSRGB(); + jxl::Image3F main(xsize, ysize); + jxl::ImageF spot(xsize, ysize); + jxl::ZeroFillImage(&main); + jxl::ZeroFillImage(&spot); + + for (size_t y = 0; y < ysize; y++) { + float* JXL_RESTRICT rowm = main.PlaneRow(1, y); + float* JXL_RESTRICT rows = spot.Row(y); + for (size_t x = 0; x < xsize; x++) { + rowm[x] = (x + y) * (1.f / 255.f); + rows[x] = ((x ^ y) & 255) * (1.f / 255.f); + } + } + io.SetFromImage(std::move(main), jxl::ColorEncoding::LinearSRGB()); + jxl::ExtraChannelInfo info; + info.bit_depth.bits_per_sample = 8; + info.dim_shift = 0; + info.type = jxl::ExtraChannel::kSpotColor; + info.spot_color[0] = 0.5f; + info.spot_color[1] = 0.2f; + info.spot_color[2] = 1.f; + info.spot_color[3] = 0.5f; + + io.metadata.m.extra_channel_info.push_back(info); + std::vector ec; + ec.push_back(std::move(spot)); + io.frames[0].SetExtraChannels(std::move(ec)); + + jxl::CompressParams cparams; + cparams.speed_tier = jxl::SpeedTier::kLightning; + cparams.modular_mode = true; + cparams.color_transform = jxl::ColorTransform::kNone; + cparams.butteraugli_distance = 0.f; + + jxl::PaddedBytes compressed; + std::unique_ptr enc_state = + jxl::make_unique(); + EXPECT_TRUE(jxl::EncodeFile(cparams, &io, enc_state.get(), &compressed, + jxl::GetJxlCms(), nullptr, pool)); + + for (size_t render_spot = 0; render_spot < 2; render_spot++) { + JxlPixelFormat format = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + + JxlDecoder* dec = JxlDecoderCreate(NULL); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec, JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE)); + if (!render_spot) { + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetRenderSpotcolors(dec, JXL_FALSE)); + } + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetInput(dec, compressed.data(), compressed.size())); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + JxlBasicInfo binfo; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &binfo)); + EXPECT_EQ(1u, binfo.num_extra_channels); + EXPECT_EQ(xsize, binfo.xsize); + EXPECT_EQ(ysize, binfo.ysize); + + JxlExtraChannelInfo extra_info; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetExtraChannelInfo(dec, 0, &extra_info)); + EXPECT_EQ((unsigned int)jxl::ExtraChannel::kSpotColor, extra_info.type); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + size_t extra_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderExtraChannelBufferSize(dec, &format, &extra_size, 0)); + + std::vector image(buffer_size); + std::vector extra(extra_size); + size_t bytes_per_pixel = format.num_channels * + jxl::test::GetDataBits(format.data_type) / + jxl::kBitsPerByte; + size_t stride = bytes_per_pixel * binfo.xsize; + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer( + dec, &format, image.data(), image.size())); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetExtraChannelBuffer(dec, &format, extra.data(), + extra.size(), 0)); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + + // After the full image was output, JxlDecoderProcessInput should return + // success to indicate all is done. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + JxlDecoderDestroy(dec); + + for (size_t y = 0; y < ysize; y++) { + uint8_t* JXL_RESTRICT rowm = image.data() + stride * y; + uint8_t* JXL_RESTRICT rows = extra.data() + xsize * y; + for (size_t x = 0; x < xsize; x++) { + if (!render_spot) { + // if spot color isn't rendered, main image should be as we made it + // (red and blue are all zeroes) + + EXPECT_EQ(rowm[x * 3 + 0], 0); + EXPECT_EQ(rowm[x * 3 + 1], (x + y > 255 ? 255 : x + y)); + EXPECT_EQ(rowm[x * 3 + 2], 0); + } + if (render_spot) { + // if spot color is rendered, expect red and blue to look like the + // spot color channel + EXPECT_LT(abs(rowm[x * 3 + 0] - (rows[x] * 0.25f)), 1); + EXPECT_LT(abs(rowm[x * 3 + 2] - (rows[x] * 0.5f)), 1); + } + EXPECT_EQ(rows[x], ((x ^ y) & 255)); + } + } + } +} + +TEST(DecodeTest, CloseInput) { + std::vector partial_file = {0xff}; + + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec.get(), + JXL_DEC_BASIC_INFO | JXL_DEC_FULL_IMAGE)); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetInput(dec.get(), partial_file.data(), + partial_file.size())); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec.get())); + EXPECT_EQ(JXL_DEC_NEED_MORE_INPUT, JxlDecoderProcessInput(dec.get())); + JxlDecoderCloseInput(dec.get()); + EXPECT_EQ(JXL_DEC_ERROR, JxlDecoderProcessInput(dec.get())); +} diff --git a/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.cc b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.cc new file mode 100644 index 0000000000..40d8b1354d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.cc @@ -0,0 +1,169 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/decode_to_jpeg.h" + +namespace jxl { + +#if JPEGXL_ENABLE_TRANSCODE_JPEG + +JxlDecoderStatus JxlToJpegDecoder::Process(const uint8_t** next_in, + size_t* avail_in) { + if (!inside_box_) { + JXL_UNREACHABLE( + "processing of JPEG reconstruction data outside JPEG reconstruction " + "box"); + } + Span to_decode; + if (box_until_eof_) { + // Until EOF means consume all data. + to_decode = Span(*next_in, *avail_in); + *next_in += *avail_in; + *avail_in = 0; + } else { + // Defined size means consume min(available, needed). + size_t avail_recon_in = + std::min(*avail_in, box_size_ - buffer_.size()); + to_decode = Span(*next_in, avail_recon_in); + *next_in += avail_recon_in; + *avail_in -= avail_recon_in; + } + bool old_data_exists = !buffer_.empty(); + if (old_data_exists) { + // Append incoming data to buffer if we already had data in the buffer. + buffer_.insert(buffer_.end(), to_decode.data(), + to_decode.data() + to_decode.size()); + to_decode = Span(buffer_.data(), buffer_.size()); + } + if (!box_until_eof_ && to_decode.size() > box_size_) { + JXL_UNREACHABLE("JPEG reconstruction data to decode larger than expected"); + } + if (box_until_eof_ || to_decode.size() == box_size_) { + // If undefined size, or the right size, try to decode. + jpeg_data_ = make_unique(); + const auto status = jpeg::DecodeJPEGData(to_decode, jpeg_data_.get()); + if (status.IsFatalError()) return JXL_DEC_ERROR; + if (status) { + // Successful decoding, emit event after updating state to track that we + // are no longer parsing JPEG reconstruction data. + inside_box_ = false; + return JXL_DEC_JPEG_RECONSTRUCTION; + } + if (box_until_eof_) { + // Unsuccessful decoding and undefined size, assume incomplete data. Copy + // the data if we haven't already. + if (!old_data_exists) { + buffer_.insert(buffer_.end(), to_decode.data(), + to_decode.data() + to_decode.size()); + } + } else { + // Unsuccessful decoding of correct amount of data, assume error. + return JXL_DEC_ERROR; + } + } else { + // Not enough data, copy the data if we haven't already. + if (!old_data_exists) { + buffer_.insert(buffer_.end(), to_decode.data(), + to_decode.data() + to_decode.size()); + } + } + return JXL_DEC_NEED_MORE_INPUT; +} + +size_t JxlToJpegDecoder::NumExifMarkers(const jpeg::JPEGData& jpeg_data) { + size_t num = 0; + for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) { + if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) { + num++; + } + } + return num; +} + +size_t JxlToJpegDecoder::NumXmpMarkers(const jpeg::JPEGData& jpeg_data) { + size_t num = 0; + for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) { + if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) { + num++; + } + } + return num; +} + +JxlDecoderStatus JxlToJpegDecoder::ExifBoxContentSize( + const jpeg::JPEGData& jpeg_data, size_t* size) { + for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) { + if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) { + if (jpeg_data.app_data[i].size() < 3 + sizeof(jpeg::kExifTag)) { + // too small for app marker header + return JXL_DEC_ERROR; + } + // The first 4 bytes are the TIFF header from the box contents, and are + // not included in the JPEG + *size = jpeg_data.app_data[i].size() + 4 - 3 - sizeof(jpeg::kExifTag); + return JXL_DEC_SUCCESS; + } + } + return JXL_DEC_ERROR; +} + +JxlDecoderStatus JxlToJpegDecoder::XmlBoxContentSize( + const jpeg::JPEGData& jpeg_data, size_t* size) { + for (size_t i = 0; i < jpeg_data.app_data.size(); ++i) { + if (jpeg_data.app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) { + if (jpeg_data.app_data[i].size() < 3 + sizeof(jpeg::kXMPTag)) { + // too small for app marker header + return JXL_DEC_ERROR; + } + *size = jpeg_data.app_data[i].size() - 3 - sizeof(jpeg::kXMPTag); + return JXL_DEC_SUCCESS; + } + } + return JXL_DEC_ERROR; +} + +JxlDecoderStatus JxlToJpegDecoder::SetExif(const uint8_t* data, size_t size, + jpeg::JPEGData* jpeg_data) { + for (size_t i = 0; i < jpeg_data->app_data.size(); ++i) { + if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kExif) { + if (jpeg_data->app_data[i].size() != + size + 3 + sizeof(jpeg::kExifTag) - 4) + return JXL_DEC_ERROR; + // The first 9 bytes are used for JPEG marker header. + jpeg_data->app_data[i][0] = 0xE1; + // The second and third byte are already filled in correctly + memcpy(jpeg_data->app_data[i].data() + 3, jpeg::kExifTag, + sizeof(jpeg::kExifTag)); + // The first 4 bytes are the TIFF header from the box contents, and are + // not included in the JPEG + memcpy(jpeg_data->app_data[i].data() + 3 + sizeof(jpeg::kExifTag), + data + 4, size - 4); + return JXL_DEC_SUCCESS; + } + } + return JXL_DEC_ERROR; +} +JxlDecoderStatus JxlToJpegDecoder::SetXmp(const uint8_t* data, size_t size, + jpeg::JPEGData* jpeg_data) { + for (size_t i = 0; i < jpeg_data->app_data.size(); ++i) { + if (jpeg_data->app_marker_type[i] == jxl::jpeg::AppMarkerType::kXMP) { + if (jpeg_data->app_data[i].size() != size + 3 + sizeof(jpeg::kXMPTag)) + return JXL_DEC_ERROR; + // The first 9 bytes are used for JPEG marker header. + jpeg_data->app_data[i][0] = 0xE1; + // The second and third byte are already filled in correctly + memcpy(jpeg_data->app_data[i].data() + 3, jpeg::kXMPTag, + sizeof(jpeg::kXMPTag)); + memcpy(jpeg_data->app_data[i].data() + 3 + sizeof(jpeg::kXMPTag), data, + size); + return JXL_DEC_SUCCESS; + } + } + return JXL_DEC_ERROR; +} + +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.h b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.h new file mode 100644 index 0000000000..a64ace27a2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/decode_to_jpeg.h @@ -0,0 +1,217 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_DECODE_TO_JPEG_H_ +#define LIB_JXL_DECODE_TO_JPEG_H_ + +// JPEG XL to JPEG bytes decoder logic. The JxlToJpegDecoder class keeps track +// of the decoder state needed to parse the JPEG reconstruction box and provide +// the reconstructed JPEG to the output buffer. + +#include +#include +#include + +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/jpeg/dec_jpeg_data.h" +#if JPEGXL_ENABLE_TRANSCODE_JPEG +#include "lib/jxl/jpeg/dec_jpeg_data_writer.h" +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +namespace jxl { + +#if JPEGXL_ENABLE_TRANSCODE_JPEG + +class JxlToJpegDecoder { + public: + // Returns whether an output buffer is set. + bool IsOutputSet() const { return next_out_ != nullptr; } + + // Returns whether the decoder is parsing a boxa JPEG box was parsed. + bool IsParsingBox() const { return inside_box_; } + + // Sets the output buffer used when producing JPEG output. + JxlDecoderStatus SetOutputBuffer(uint8_t* data, size_t size) { + if (next_out_) return JXL_DEC_ERROR; + next_out_ = data; + avail_size_ = size; + return JXL_DEC_SUCCESS; + } + + // Releases the buffer set with SetOutputBuffer(). + size_t ReleaseOutputBuffer() { + size_t result = avail_size_; + next_out_ = nullptr; + avail_size_ = 0; + return result; + } + + void StartBox(bool box_until_eof, size_t contents_size) { + // A new box implies that we clear the buffer. + buffer_.clear(); + inside_box_ = true; + if (box_until_eof) { + box_until_eof_ = true; + } else { + box_size_ = contents_size; + } + } + + // Consumes data from next_in/avail_in to reconstruct JPEG data. + // Uses box_size_, inside_box_ and box_until_eof_ to calculate how much to + // consume. Potentially stores unparsed data in buffer_. + // Potentially populates jpeg_data_. Potentially updates inside_box_. + // Returns JXL_DEC_JPEG_RECONSTRUCTION when finished, JXL_DEC_NEED_MORE_INPUT + // if more input is needed, JXL_DEC_ERROR on parsing error. + JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in); + + // Returns non-owned copy of the JPEGData, only after Process finished and + // the JPEGData was not yet moved to an image bundle with + // SetImageBundleJpegData. + jpeg::JPEGData* GetJpegData() { return jpeg_data_.get(); } + + // Returns how many exif or xmp app markers are present in the JPEG data. A + // return value higher than 1 would require multiple exif boxes or multiple + // xmp boxes in the container format, and this is not supported by the API and + // considered an error. May only be called after Process returned success. + static size_t NumExifMarkers(const jpeg::JPEGData& jpeg_data); + static size_t NumXmpMarkers(const jpeg::JPEGData& jpeg_data); + + // Returns box content size for metadata, using the known data from the app + // markers. + static JxlDecoderStatus ExifBoxContentSize(const jpeg::JPEGData& jpeg_data, + size_t* size); + static JxlDecoderStatus XmlBoxContentSize(const jpeg::JPEGData& jpeg_data, + size_t* size); + + // Returns JXL_DEC_ERROR if there is no exif/XMP marker or the data size + // does not match, or this function is called before Process returned + // success, JXL_DEC_SUCCESS otherwise. As input, provide the full box contents + // but not the box header. In case of exif, this includes the 4-byte TIFF + // header, even though it won't be copied into the JPEG. + static JxlDecoderStatus SetExif(const uint8_t* data, size_t size, + jpeg::JPEGData* jpeg_data); + static JxlDecoderStatus SetXmp(const uint8_t* data, size_t size, + jpeg::JPEGData* jpeg_data); + + // Sets the JpegData of the ImageBundle passed if there is anything to set. + // Releases the JpegData from this decoder if set. + Status SetImageBundleJpegData(ImageBundle* ib) { + if (IsOutputSet() && jpeg_data_ != nullptr) { + if (!jpeg::SetJPEGDataFromICC(ib->metadata()->color_encoding.ICC(), + jpeg_data_.get())) { + return false; + } + ib->jpeg_data.reset(jpeg_data_.release()); + } + return true; + } + + JxlDecoderStatus WriteOutput(const jpeg::JPEGData& jpeg_data) { + // Copy JPEG bytestream if desired. + uint8_t* tmp_next_out = next_out_; + size_t tmp_avail_size = avail_size_; + auto write = [&tmp_next_out, &tmp_avail_size](const uint8_t* buf, + size_t len) { + size_t to_write = std::min(tmp_avail_size, len); + if (to_write != 0) memcpy(tmp_next_out, buf, to_write); + tmp_next_out += to_write; + tmp_avail_size -= to_write; + return to_write; + }; + Status write_result = jpeg::WriteJpeg(jpeg_data, write); + if (!write_result) { + if (tmp_avail_size == 0) { + return JXL_DEC_JPEG_NEED_MORE_OUTPUT; + } + return JXL_DEC_ERROR; + } + next_out_ = tmp_next_out; + avail_size_ = tmp_avail_size; + return JXL_DEC_SUCCESS; + } + + private: + // Content of the most recently parsed JPEG reconstruction box if any. + std::vector buffer_; + + // Decoded content of the most recently parsed JPEG reconstruction box is + // stored here. + std::unique_ptr jpeg_data_; + + // True if the decoder is currently reading bytes inside a JPEG reconstruction + // box. + bool inside_box_ = false; + + // True if the JPEG reconstruction box had undefined size (all remaining + // bytes). + bool box_until_eof_ = false; + // Size of most recently parsed JPEG reconstruction box contents. + size_t box_size_ = 0; + + // Next bytes to write JPEG reconstruction to. + uint8_t* next_out_ = nullptr; + // Available bytes to write JPEG reconstruction to. + size_t avail_size_ = 0; +}; + +#else + +// Fake class that disables support for decoding JPEG XL to JPEG. +class JxlToJpegDecoder { + public: + bool IsOutputSet() const { return false; } + bool IsParsingBox() const { return false; } + + JxlDecoderStatus SetOutputBuffer(uint8_t* /* data */, size_t /* size */) { + return JXL_DEC_ERROR; + } + size_t ReleaseOutputBuffer() { return 0; } + + void StartBox(bool /* box_until_eof */, size_t /* contents_size */) {} + + JxlDecoderStatus Process(const uint8_t** next_in, size_t* avail_in) { + return JXL_DEC_ERROR; + } + jpeg::JPEGData* GetJpegData() { return nullptr; } + + Status SetImageBundleJpegData(ImageBundle* /* ib */) { return true; } + + static size_t NumExifMarkers(const jpeg::JPEGData& /*jpeg_data*/) { + return 0; + } + static size_t NumXmpMarkers(const jpeg::JPEGData& /*jpeg_data*/) { return 0; } + static size_t ExifBoxContentSize(const jpeg::JPEGData& /*jpeg_data*/, + size_t* /*size*/) { + return JXL_DEC_ERROR; + } + static size_t XmlBoxContentSize(const jpeg::JPEGData& /*jpeg_data*/, + size_t* /*size*/) { + return JXL_DEC_ERROR; + } + static JxlDecoderStatus SetExif(const uint8_t* /*data*/, size_t /*size*/, + jpeg::JPEGData* /*jpeg_data*/) { + return JXL_DEC_ERROR; + } + static JxlDecoderStatus SetXmp(const uint8_t* /*data*/, size_t /*size*/, + jpeg::JPEGData* /*jpeg_data*/) { + return JXL_DEC_ERROR; + } + + JxlDecoderStatus WriteOutput(const jpeg::JPEGData& /* jpeg_data */) { + return JXL_DEC_SUCCESS; + } +}; + +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +} // namespace jxl + +#endif // LIB_JXL_DECODE_TO_JPEG_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.cc b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.cc new file mode 100644 index 0000000000..44e6f049eb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.cc @@ -0,0 +1,1200 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_ac_strategy.h" + +#include +#include + +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_ac_strategy.cc" +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_debug_image.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_transforms-inl.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/fast_math-inl.h" + +// Some of the floating point constants in this file and in other +// files in the libjxl project have been obtained using the +// tools/optimizer/simplex_fork.py tool. It is a variation of +// Nelder-Mead optimization, and we generally try to minimize +// BPP * pnorm aggregate as reported by the benchmark_xl tool, +// but occasionally the values are optimized by using additional +// constraints such as maintaining a certain density, or ratio of +// popularity of integral transforms. Jyrki visually reviews all +// such changes and often makes manual changes to maintain good +// visual quality to changes where butteraugli was not sufficiently +// sensitive to some kind of degradation. Unfortunately image quality +// is still more of an art than science. + +// Set JXL_DEBUG_AC_STRATEGY to 1 to enable debugging. +#ifndef JXL_DEBUG_AC_STRATEGY +#define JXL_DEBUG_AC_STRATEGY 0 +#endif + +// This must come before the begin/end_target, but HWY_ONCE is only true +// after that, so use an "include guard". +#ifndef LIB_JXL_ENC_AC_STRATEGY_ +#define LIB_JXL_ENC_AC_STRATEGY_ +// Parameters of the heuristic are marked with a OPTIMIZE comment. +namespace jxl { +namespace { + +// Debugging utilities. + +// Returns a linear sRGB color (as bytes) for each AC strategy. +const uint8_t* TypeColor(const uint8_t& raw_strategy) { + JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy)); + static_assert(AcStrategy::kNumValidStrategies == 27, "Change colors"); + static constexpr uint8_t kColors[][3] = { + {0xFF, 0xFF, 0x00}, // DCT8 + {0xFF, 0x80, 0x80}, // HORNUSS + {0xFF, 0x80, 0x80}, // DCT2x2 + {0xFF, 0x80, 0x80}, // DCT4x4 + {0x80, 0xFF, 0x00}, // DCT16x16 + {0x00, 0xC0, 0x00}, // DCT32x32 + {0xC0, 0xFF, 0x00}, // DCT16x8 + {0xC0, 0xFF, 0x00}, // DCT8x16 + {0x00, 0xFF, 0x00}, // DCT32x8 + {0x00, 0xFF, 0x00}, // DCT8x32 + {0x00, 0xFF, 0x00}, // DCT32x16 + {0x00, 0xFF, 0x00}, // DCT16x32 + {0xFF, 0x80, 0x00}, // DCT4x8 + {0xFF, 0x80, 0x00}, // DCT8x4 + {0xFF, 0xFF, 0x80}, // AFV0 + {0xFF, 0xFF, 0x80}, // AFV1 + {0xFF, 0xFF, 0x80}, // AFV2 + {0xFF, 0xFF, 0x80}, // AFV3 + {0x00, 0xC0, 0xFF}, // DCT64x64 + {0x00, 0xFF, 0xFF}, // DCT64x32 + {0x00, 0xFF, 0xFF}, // DCT32x64 + {0x00, 0x40, 0xFF}, // DCT128x128 + {0x00, 0x80, 0xFF}, // DCT128x64 + {0x00, 0x80, 0xFF}, // DCT64x128 + {0x00, 0x00, 0xC0}, // DCT256x256 + {0x00, 0x00, 0xFF}, // DCT256x128 + {0x00, 0x00, 0xFF}, // DCT128x256 + }; + return kColors[raw_strategy]; +} + +const uint8_t* TypeMask(const uint8_t& raw_strategy) { + JXL_ASSERT(AcStrategy::IsRawStrategyValid(raw_strategy)); + static_assert(AcStrategy::kNumValidStrategies == 27, "Add masks"); + // implicitly, first row and column is made dark + static constexpr uint8_t kMask[][64] = { + { + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + }, // DCT8 + { + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 1, 0, 0, 1, 0, 0, // + 0, 0, 1, 0, 0, 1, 0, 0, // + 0, 0, 1, 1, 1, 1, 0, 0, // + 0, 0, 1, 0, 0, 1, 0, 0, // + 0, 0, 1, 0, 0, 1, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + }, // HORNUSS + { + 1, 1, 1, 1, 1, 1, 1, 1, // + 1, 0, 1, 0, 1, 0, 1, 0, // + 1, 1, 1, 1, 1, 1, 1, 1, // + 1, 0, 1, 0, 1, 0, 1, 0, // + 1, 1, 1, 1, 1, 1, 1, 1, // + 1, 0, 1, 0, 1, 0, 1, 0, // + 1, 1, 1, 1, 1, 1, 1, 1, // + 1, 0, 1, 0, 1, 0, 1, 0, // + }, // 2x2 + { + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 1, 1, 1, 1, 1, 1, 1, 1, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + }, // 4x4 + {}, // DCT16x16 (unused) + {}, // DCT32x32 (unused) + {}, // DCT16x8 (unused) + {}, // DCT8x16 (unused) + {}, // DCT32x8 (unused) + {}, // DCT8x32 (unused) + {}, // DCT32x16 (unused) + {}, // DCT16x32 (unused) + { + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 1, 1, 1, 1, 1, 1, 1, 1, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + }, // DCT4x8 + { + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + 0, 0, 0, 0, 1, 0, 0, 0, // + }, // DCT8x4 + { + 1, 1, 1, 1, 1, 0, 0, 0, // + 1, 1, 1, 1, 0, 0, 0, 0, // + 1, 1, 1, 0, 0, 0, 0, 0, // + 1, 1, 0, 0, 0, 0, 0, 0, // + 1, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + }, // AFV0 + { + 0, 0, 0, 0, 1, 1, 1, 1, // + 0, 0, 0, 0, 0, 1, 1, 1, // + 0, 0, 0, 0, 0, 0, 1, 1, // + 0, 0, 0, 0, 0, 0, 0, 1, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + }, // AFV1 + { + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 1, 0, 0, 0, 0, 0, 0, 0, // + 1, 1, 0, 0, 0, 0, 0, 0, // + 1, 1, 1, 0, 0, 0, 0, 0, // + 1, 1, 1, 1, 0, 0, 0, 0, // + }, // AFV2 + { + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 0, // + 0, 0, 0, 0, 0, 0, 0, 1, // + 0, 0, 0, 0, 0, 0, 1, 1, // + 0, 0, 0, 0, 0, 1, 1, 1, // + }, // AFV3 + }; + return kMask[raw_strategy]; +} + +void DumpAcStrategy(const AcStrategyImage& ac_strategy, size_t xsize, + size_t ysize, const char* tag, AuxOut* aux_out, + const CompressParams& cparams) { + Image3F color_acs(xsize, ysize); + for (size_t y = 0; y < ysize; y++) { + float* JXL_RESTRICT rows[3] = { + color_acs.PlaneRow(0, y), + color_acs.PlaneRow(1, y), + color_acs.PlaneRow(2, y), + }; + const AcStrategyRow acs_row = ac_strategy.ConstRow(y / kBlockDim); + for (size_t x = 0; x < xsize; x++) { + AcStrategy acs = acs_row[x / kBlockDim]; + const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy()); + for (size_t c = 0; c < 3; c++) { + rows[c][x] = color[c] / 255.f; + } + } + } + size_t stride = color_acs.PixelsPerRow(); + for (size_t c = 0; c < 3; c++) { + for (size_t by = 0; by < DivCeil(ysize, kBlockDim); by++) { + float* JXL_RESTRICT row = color_acs.PlaneRow(c, by * kBlockDim); + const AcStrategyRow acs_row = ac_strategy.ConstRow(by); + for (size_t bx = 0; bx < DivCeil(xsize, kBlockDim); bx++) { + AcStrategy acs = acs_row[bx]; + if (!acs.IsFirstBlock()) continue; + const uint8_t* JXL_RESTRICT color = TypeColor(acs.RawStrategy()); + const uint8_t* JXL_RESTRICT mask = TypeMask(acs.RawStrategy()); + if (acs.covered_blocks_x() == 1 && acs.covered_blocks_y() == 1) { + for (size_t iy = 0; iy < kBlockDim && by * kBlockDim + iy < ysize; + iy++) { + for (size_t ix = 0; ix < kBlockDim && bx * kBlockDim + ix < xsize; + ix++) { + if (mask[iy * kBlockDim + ix]) { + row[iy * stride + bx * kBlockDim + ix] = color[c] / 800.f; + } + } + } + } + // draw block edges + for (size_t ix = 0; ix < kBlockDim * acs.covered_blocks_x() && + bx * kBlockDim + ix < xsize; + ix++) { + row[0 * stride + bx * kBlockDim + ix] = color[c] / 350.f; + } + for (size_t iy = 0; iy < kBlockDim * acs.covered_blocks_y() && + by * kBlockDim + iy < ysize; + iy++) { + row[iy * stride + bx * kBlockDim + 0] = color[c] / 350.f; + } + } + } + } + DumpImage(cparams, tag, color_acs); +} + +} // namespace +} // namespace jxl +#endif // LIB_JXL_ENC_AC_STRATEGY_ + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::AbsDiff; +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::IfThenZeroElse; +using hwy::HWY_NAMESPACE::Round; +using hwy::HWY_NAMESPACE::Sqrt; + +bool MultiBlockTransformCrossesHorizontalBoundary( + const AcStrategyImage& ac_strategy, size_t start_x, size_t y, + size_t end_x) { + if (start_x >= ac_strategy.xsize() || y >= ac_strategy.ysize()) { + return false; + } + if (y % 8 == 0) { + // Nothing crosses 64x64 boundaries, and the memory on the other side + // of the 64x64 block may still uninitialized. + return false; + } + end_x = std::min(end_x, ac_strategy.xsize()); + // The first multiblock might be before the start_x, let's adjust it + // to point to the first IsFirstBlock() == true block we find by backward + // tracing. + AcStrategyRow row = ac_strategy.ConstRow(y); + const size_t start_x_limit = start_x & ~7; + while (start_x != start_x_limit && !row[start_x].IsFirstBlock()) { + --start_x; + } + for (size_t x = start_x; x < end_x;) { + if (row[x].IsFirstBlock()) { + x += row[x].covered_blocks_x(); + } else { + return true; + } + } + return false; +} + +bool MultiBlockTransformCrossesVerticalBoundary( + const AcStrategyImage& ac_strategy, size_t x, size_t start_y, + size_t end_y) { + if (x >= ac_strategy.xsize() || start_y >= ac_strategy.ysize()) { + return false; + } + if (x % 8 == 0) { + // Nothing crosses 64x64 boundaries, and the memory on the other side + // of the 64x64 block may still uninitialized. + return false; + } + end_y = std::min(end_y, ac_strategy.ysize()); + // The first multiblock might be before the start_y, let's adjust it + // to point to the first IsFirstBlock() == true block we find by backward + // tracing. + const size_t start_y_limit = start_y & ~7; + while (start_y != start_y_limit && + !ac_strategy.ConstRow(start_y)[x].IsFirstBlock()) { + --start_y; + } + + for (size_t y = start_y; y < end_y;) { + AcStrategyRow row = ac_strategy.ConstRow(y); + if (row[x].IsFirstBlock()) { + y += row[x].covered_blocks_y(); + } else { + return true; + } + } + return false; +} + +static const float kChromaErrorWeight[AcStrategy::kNumValidStrategies] = { + 0.95f, // DCT = 0, + 1.0f, // IDENTITY = 1, + 0.5f, // DCT2X2 = 2, + 1.0f, // DCT4X4 = 3, + 2.0f, // DCT16X16 = 4, + 2.0f, // DCT32X32 = 5, + 1.4f, // DCT16X8 = 6, + 1.4f, // DCT8X16 = 7, + 2.0f, // DCT32X8 = 8, + 2.0f, // DCT8X32 = 9, + 2.0f, // DCT32X16 = 10, + 2.0f, // DCT16X32 = 11, + 2.0f, // DCT4X8 = 12, + 2.0f, // DCT8X4 = 13, + 1.7f, // AFV0 = 14, + 1.7f, // AFV1 = 15, + 1.7f, // AFV2 = 16, + 1.7f, // AFV3 = 17, + 2.0f, // DCT64X64 = 18, + 2.0f, // DCT64X32 = 19, + 2.0f, // DCT32X64 = 20, + 2.0f, // DCT128X128 = 21, + 2.0f, // DCT128X64 = 22, + 2.0f, // DCT64X128 = 23, + 2.0f, // DCT256X256 = 24, + 2.0f, // DCT256X128 = 25, + 2.0f, // DCT128X256 = 26, +}; + +// For DCT the maximum error is roughly a sum of the values. +// For some transforms, especially IDENTITY and DCT2X2, not all +// the coefficients affect the maximum error. Probably would +// be better to do transforms back and forth and look at the pixels +// but that would significantly slow down the computation. +static const float kMixLossTable[AcStrategy::kNumValidStrategies] = { + 1.0f, // DCT = 0, + 0.45f, // IDENTITY = 1, + 0.45f, // DCT2X2 = 2, + 0.7f, // DCT4X4 = 3, + 1.0f, // DCT16X16 = 4, + 1.0f, // DCT32X32 = 5, + 1.0f, // DCT16X8 = 6, + 1.0f, // DCT8X16 = 7, + 1.0f, // DCT32X8 = 8, + 1.0f, // DCT8X32 = 9, + 1.0f, // DCT32X16 = 10, + 1.0f, // DCT16X32 = 11, + 0.96f, // DCT4X8 = 12, + 0.96f, // DCT8X4 = 13, + 0.94f, // AFV0 = 14, + 0.94f, // AFV1 = 15, + 0.94f, // AFV2 = 16, + 0.94f, // AFV3 = 17, + 1.0f, // DCT64X64 = 18, + 1.0f, // DCT64X32 = 19, + 1.0f, // DCT32X64 = 20, + 1.0f, // DCT128X128 = 21, + 1.0f, // DCT128X64 = 22, + 1.0f, // DCT64X128 = 23, + 1.0f, // DCT256X256 = 24, + 1.0f, // DCT256X128 = 25, + 1.0f, // DCT128X256 = 26, +}; + +float EstimateEntropy(const AcStrategy& acs, size_t x, size_t y, + const ACSConfig& config, + const float* JXL_RESTRICT cmap_factors, float* block, + float* scratch_space, uint32_t* quantized) { + const size_t size = (1 << acs.log2_covered_blocks()) * kDCTBlockSize; + + // Apply transform. + for (size_t c = 0; c < 3; c++) { + float* JXL_RESTRICT block_c = block + size * c; + TransformFromPixels(acs.Strategy(), &config.Pixel(c, x, y), + config.src_stride, block_c, scratch_space); + } + HWY_FULL(float) df; + + const size_t num_blocks = acs.covered_blocks_x() * acs.covered_blocks_y(); + // avoid large blocks when there is a lot going on in red-green. + float cmul[3] = {kChromaErrorWeight[acs.RawStrategy()], 1.0f, 1.0f}; + float quant_norm8 = 0; + float masking = 0; + if (num_blocks == 1) { + // When it is only one 8x8, we don't need aggregation of values. + quant_norm8 = config.Quant(x / 8, y / 8); + masking = config.Masking(x / 8, y / 8); + // Make DCT2X2 more favored when area is exposed. + float kExposedMasking = 0.118f; + if (acs.RawStrategy() == 2 && masking >= kExposedMasking) { + masking = kExposedMasking + 0.56 * (masking - kExposedMasking); + } + } else if (num_blocks == 2) { + // Taking max instead of 8th norm seems to work + // better for smallest blocks up to 16x8. Jyrki couldn't get + // improvements in trying the same for 16x16 blocks. + if (acs.covered_blocks_y() == 2) { + quant_norm8 = + std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8, y / 8 + 1)); + masking = std::max(config.Masking(x / 8, y / 8), + config.Masking(x / 8, y / 8 + 1)); + } else { + quant_norm8 = + std::max(config.Quant(x / 8, y / 8), config.Quant(x / 8 + 1, y / 8)); + masking = std::max(config.Masking(x / 8, y / 8), + config.Masking(x / 8 + 1, y / 8)); + } + } else { + float masking_norm2 = 0; + float masking_max = 0; + // Load QF value, calculate empirical heuristic on masking field + // for weighting the information loss. Information loss manifests + // itself as ringing, and masking could hide it. + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + float qval = config.Quant(x / 8 + ix, y / 8 + iy); + qval *= qval; + qval *= qval; + quant_norm8 += qval * qval; + float maskval = config.Masking(x / 8 + ix, y / 8 + iy); + masking_max = std::max(masking_max, maskval); + masking_norm2 += maskval * maskval; + } + } + quant_norm8 /= num_blocks; + quant_norm8 = FastPowf(quant_norm8, 1.0f / 8.0f); + masking_norm2 = sqrt(masking_norm2 / num_blocks); + // This is a highly empirical formula. + masking = 0.5 * (masking_norm2 + masking_max); + } + const auto q = Set(df, quant_norm8); + + // Compute entropy. + float entropy = 0.0f; + auto info_loss = Zero(df); + auto info_loss2 = Zero(df); + + for (size_t c = 0; c < 3; c++) { + const float* inv_matrix = config.dequant->InvMatrix(acs.RawStrategy(), c); + const auto cmap_factor = Set(df, cmap_factors[c]); + + auto entropy_v = Zero(df); + auto nzeros_v = Zero(df); + for (size_t i = 0; i < num_blocks * kDCTBlockSize; i += Lanes(df)) { + const auto in = Load(df, block + c * size + i); + const auto in_y = Mul(Load(df, block + size + i), cmap_factor); + const auto im = Load(df, inv_matrix + i); + const auto val = Mul(Sub(in, in_y), Mul(im, q)); + const auto rval = Round(val); + const auto diff = AbsDiff(val, rval); + info_loss = Add(info_loss, diff); + info_loss2 = MulAdd(diff, diff, info_loss2); + const auto q = Abs(rval); + const auto q_is_zero = Eq(q, Zero(df)); + // We used to have q * C here, but that cost model seems to + // be punishing large values more than necessary. Sqrt tries + // to avoid large values less aggressively. + entropy_v = Add(Sqrt(q), entropy_v); + nzeros_v = Add(nzeros_v, IfThenZeroElse(q_is_zero, Set(df, 1.0f))); + } + entropy += config.cost_delta * cmul[c] * GetLane(SumOfLanes(df, entropy_v)); + size_t num_nzeros = GetLane(SumOfLanes(df, nzeros_v)); + // Add #bit of num_nonzeros, as an estimate of the cost for encoding the + // number of non-zeros of the block. + size_t nbits = CeilLog2Nonzero(num_nzeros + 1) + 1; + // Also add #bit of #bit of num_nonzeros, to estimate the ANS cost, with a + // bias. + entropy += config.zeros_mul * (CeilLog2Nonzero(nbits + 17) + nbits); + } + const float kMixLoss = kMixLossTable[acs.RawStrategy()]; + const float loss1 = GetLane(SumOfLanes(df, info_loss)); + const float loss2 = + sqrt(GetLane(SumOfLanes(df, info_loss2)) * (num_blocks * 64)); + const float loss = kMixLoss * (config.info_loss_multiplier * loss1) + + (1.0 - kMixLoss) * (config.info_loss_multiplier2 * loss2); + const float kRegulateSurface = 11.5f; + float large_surface_error_mul = + (kRegulateSurface + sqrt(num_blocks)) * (1.0f / (kRegulateSurface + 1)); + return entropy + large_surface_error_mul * masking * loss; +} + +uint8_t FindBest8x8Transform(size_t x, size_t y, int encoding_speed_tier, + const ACSConfig& config, + const float* JXL_RESTRICT cmap_factors, + AcStrategyImage* JXL_RESTRICT ac_strategy, + float* block, float* scratch_space, + uint32_t* quantized, float* entropy_out) { + struct TransformTry8x8 { + AcStrategy::Type type; + int encoding_speed_tier_max_limit; + float entropy_add; + float entropy_mul; + }; + static const TransformTry8x8 kTransforms8x8[] = { + { + AcStrategy::Type::DCT, + 9, + 3.0f, + 0.785f, + }, + { + AcStrategy::Type::DCT4X4, + 5, + 4.0f, + 0.7f, + }, + { + AcStrategy::Type::DCT2X2, + 5, + 0.0f, + 0.685f, + }, + { + AcStrategy::Type::DCT4X8, + 4, + 3.0f, + 0.745f, + }, + { + AcStrategy::Type::DCT8X4, + 4, + 3.0f, + 0.745f, + }, + { + AcStrategy::Type::IDENTITY, + 5, + 8.0f, + 0.81217614513585534f, + }, + { + AcStrategy::Type::AFV0, + 4, + 3.0f, + 0.70086131125719425f, + }, + { + AcStrategy::Type::AFV1, + 4, + 3.0f, + 0.70086131125719425f, + }, + { + AcStrategy::Type::AFV2, + 4, + 3.0f, + 0.70086131125719425f, + }, + { + AcStrategy::Type::AFV3, + 4, + 3.0f, + 0.70086131125719425f, + }, + }; + double best = 1e30; + uint8_t best_tx = kTransforms8x8[0].type; + for (auto tx : kTransforms8x8) { + if (tx.encoding_speed_tier_max_limit < encoding_speed_tier) { + continue; + } + AcStrategy acs = AcStrategy::FromRawStrategy(tx.type); + float entropy = EstimateEntropy(acs, x, y, config, cmap_factors, block, + scratch_space, quantized); + entropy = tx.entropy_add + tx.entropy_mul * entropy; + if (entropy < best) { + best_tx = tx.type; + best = entropy; + } + } + *entropy_out = best; + return best_tx; +} + +// bx, by addresses the 64x64 block at 8x8 subresolution +// cx, cy addresses the left, upper 8x8 block position of the candidate +// transform. +void TryMergeAcs(AcStrategy::Type acs_raw, size_t bx, size_t by, size_t cx, + size_t cy, const ACSConfig& config, + const float* JXL_RESTRICT cmap_factors, + AcStrategyImage* JXL_RESTRICT ac_strategy, + const float entropy_mul, const uint8_t candidate_priority, + uint8_t* priority, float* JXL_RESTRICT entropy_estimate, + float* block, float* scratch_space, uint32_t* quantized) { + AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw); + float entropy_current = 0; + for (size_t iy = 0; iy < acs.covered_blocks_y(); ++iy) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ++ix) { + if (priority[(cy + iy) * 8 + (cx + ix)] >= candidate_priority) { + // Transform would reuse already allocated blocks and + // lead to invalid overlaps, for example DCT64X32 vs. + // DCT32X64. + return; + } + entropy_current += entropy_estimate[(cy + iy) * 8 + (cx + ix)]; + } + } + float entropy_candidate = + entropy_mul * EstimateEntropy(acs, (bx + cx) * 8, (by + cy) * 8, config, + cmap_factors, block, scratch_space, + quantized); + if (entropy_candidate >= entropy_current) return; + // Accept the candidate. + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + entropy_estimate[(cy + iy) * 8 + cx + ix] = 0; + priority[(cy + iy) * 8 + cx + ix] = candidate_priority; + } + } + ac_strategy->Set(bx + cx, by + cy, acs_raw); + entropy_estimate[cy * 8 + cx] = entropy_candidate; +} + +static void SetEntropyForTransform(size_t cx, size_t cy, + const AcStrategy::Type acs_raw, + float entropy, + float* JXL_RESTRICT entropy_estimate) { + const AcStrategy acs = AcStrategy::FromRawStrategy(acs_raw); + for (size_t dy = 0; dy < acs.covered_blocks_y(); ++dy) { + for (size_t dx = 0; dx < acs.covered_blocks_x(); ++dx) { + entropy_estimate[(cy + dy) * 8 + cx + dx] = 0.0; + } + } + entropy_estimate[cy * 8 + cx] = entropy; +} + +AcStrategy::Type AcsSquare(size_t blocks) { + if (blocks == 2) { + return AcStrategy::Type::DCT16X16; + } else if (blocks == 4) { + return AcStrategy::Type::DCT32X32; + } else { + return AcStrategy::Type::DCT64X64; + } +} + +AcStrategy::Type AcsVerticalSplit(size_t blocks) { + if (blocks == 2) { + return AcStrategy::Type::DCT16X8; + } else if (blocks == 4) { + return AcStrategy::Type::DCT32X16; + } else { + return AcStrategy::Type::DCT64X32; + } +} + +AcStrategy::Type AcsHorizontalSplit(size_t blocks) { + if (blocks == 2) { + return AcStrategy::Type::DCT8X16; + } else if (blocks == 4) { + return AcStrategy::Type::DCT16X32; + } else { + return AcStrategy::Type::DCT32X64; + } +} + +// The following function tries to merge smaller transforms into +// squares and the rectangles originating from a single middle division +// (horizontal or vertical) fairly. +// +// This is now generalized to concern about squares +// of blocks X blocks size, where a block is 8x8 pixels. +void FindBestFirstLevelDivisionForSquare( + size_t blocks, bool allow_square_transform, size_t bx, size_t by, size_t cx, + size_t cy, const ACSConfig& config, const float* JXL_RESTRICT cmap_factors, + AcStrategyImage* JXL_RESTRICT ac_strategy, const float entropy_mul_JXK, + const float entropy_mul_JXJ, float* JXL_RESTRICT entropy_estimate, + float* block, float* scratch_space, uint32_t* quantized) { + // We denote J for the larger dimension here, and K for the smaller. + // For example, for 32x32 block splitting, J would be 32, K 16. + const size_t blocks_half = blocks / 2; + const AcStrategy::Type acs_rawJXK = AcsVerticalSplit(blocks); + const AcStrategy::Type acs_rawKXJ = AcsHorizontalSplit(blocks); + const AcStrategy::Type acs_rawJXJ = AcsSquare(blocks); + const AcStrategy acsJXK = AcStrategy::FromRawStrategy(acs_rawJXK); + const AcStrategy acsKXJ = AcStrategy::FromRawStrategy(acs_rawKXJ); + const AcStrategy acsJXJ = AcStrategy::FromRawStrategy(acs_rawJXJ); + AcStrategyRow row0 = ac_strategy->ConstRow(by + cy + 0); + AcStrategyRow row1 = ac_strategy->ConstRow(by + cy + blocks_half); + // Let's check if we can consider a JXJ block here at all. + // This is not necessary in the basic use of hierarchically merging + // blocks in the simplest possible way, but is needed when we try other + // 'floating' options of merging, possibly after a simple hierarchical + // merge has been explored. + if (MultiBlockTransformCrossesHorizontalBoundary(*ac_strategy, bx + cx, + by + cy, bx + cx + blocks) || + MultiBlockTransformCrossesHorizontalBoundary( + *ac_strategy, bx + cx, by + cy + blocks, bx + cx + blocks) || + MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx, by + cy, + by + cy + blocks) || + MultiBlockTransformCrossesVerticalBoundary(*ac_strategy, bx + cx + blocks, + by + cy, by + cy + blocks)) { + return; // not suitable for JxJ analysis, some transforms leak out. + } + // For floating transforms there may be + // already blocks selected that make either or both JXK and + // KXJ not feasible for this location. + const bool allow_JXK = !MultiBlockTransformCrossesVerticalBoundary( + *ac_strategy, bx + cx + blocks_half, by + cy, by + cy + blocks); + const bool allow_KXJ = !MultiBlockTransformCrossesHorizontalBoundary( + *ac_strategy, bx + cx, by + cy + blocks_half, bx + cx + blocks); + // Current entropies aggregated on NxN resolution. + float entropy[2][2] = {}; + for (size_t dy = 0; dy < blocks; ++dy) { + for (size_t dx = 0; dx < blocks; ++dx) { + entropy[dy / blocks_half][dx / blocks_half] += + entropy_estimate[(cy + dy) * 8 + (cx + dx)]; + } + } + float entropy_JXK_left = std::numeric_limits::max(); + float entropy_JXK_right = std::numeric_limits::max(); + float entropy_KXJ_top = std::numeric_limits::max(); + float entropy_KXJ_bottom = std::numeric_limits::max(); + float entropy_JXJ = std::numeric_limits::max(); + if (allow_JXK) { + if (row0[bx + cx + 0].RawStrategy() != acs_rawJXK) { + entropy_JXK_left = + entropy_mul_JXK * + EstimateEntropy(acsJXK, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, + cmap_factors, block, scratch_space, quantized); + } + if (row0[bx + cx + blocks_half].RawStrategy() != acs_rawJXK) { + entropy_JXK_right = + entropy_mul_JXK * EstimateEntropy(acsJXK, (bx + cx + blocks_half) * 8, + (by + cy + 0) * 8, config, + cmap_factors, block, scratch_space, + quantized); + } + } + if (allow_KXJ) { + if (row0[bx + cx].RawStrategy() != acs_rawKXJ) { + entropy_KXJ_top = + entropy_mul_JXK * + EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, (by + cy + 0) * 8, config, + cmap_factors, block, scratch_space, quantized); + } + if (row1[bx + cx].RawStrategy() != acs_rawKXJ) { + entropy_KXJ_bottom = + entropy_mul_JXK * EstimateEntropy(acsKXJ, (bx + cx + 0) * 8, + (by + cy + blocks_half) * 8, config, + cmap_factors, block, scratch_space, + quantized); + } + } + if (allow_square_transform) { + // We control the exploration of the square transform separately so that + // we can turn it off at high decoding speeds for 32x32, but still allow + // exploring 16x32 and 32x16. + entropy_JXJ = entropy_mul_JXJ * EstimateEntropy(acsJXJ, (bx + cx + 0) * 8, + (by + cy + 0) * 8, config, + cmap_factors, block, + scratch_space, quantized); + } + + // Test if this block should have JXK or KXJ transforms, + // because it can have only one or the other. + float costJxN = std::min(entropy_JXK_left, entropy[0][0] + entropy[1][0]) + + std::min(entropy_JXK_right, entropy[0][1] + entropy[1][1]); + float costNxJ = std::min(entropy_KXJ_top, entropy[0][0] + entropy[0][1]) + + std::min(entropy_KXJ_bottom, entropy[1][0] + entropy[1][1]); + if (entropy_JXJ < costJxN && entropy_JXJ < costNxJ) { + ac_strategy->Set(bx + cx, by + cy, acs_rawJXJ); + SetEntropyForTransform(cx, cy, acs_rawJXJ, entropy_JXJ, entropy_estimate); + } else if (costJxN < costNxJ) { + if (entropy_JXK_left < entropy[0][0] + entropy[1][0]) { + ac_strategy->Set(bx + cx, by + cy, acs_rawJXK); + SetEntropyForTransform(cx, cy, acs_rawJXK, entropy_JXK_left, + entropy_estimate); + } + if (entropy_JXK_right < entropy[0][1] + entropy[1][1]) { + ac_strategy->Set(bx + cx + blocks_half, by + cy, acs_rawJXK); + SetEntropyForTransform(cx + blocks_half, cy, acs_rawJXK, + entropy_JXK_right, entropy_estimate); + } + } else { + if (entropy_KXJ_top < entropy[0][0] + entropy[0][1]) { + ac_strategy->Set(bx + cx, by + cy, acs_rawKXJ); + SetEntropyForTransform(cx, cy, acs_rawKXJ, entropy_KXJ_top, + entropy_estimate); + } + if (entropy_KXJ_bottom < entropy[1][0] + entropy[1][1]) { + ac_strategy->Set(bx + cx, by + cy + blocks_half, acs_rawKXJ); + SetEntropyForTransform(cx, cy + blocks_half, acs_rawKXJ, + entropy_KXJ_bottom, entropy_estimate); + } + } +} + +void ProcessRectACS(PassesEncoderState* JXL_RESTRICT enc_state, + const ACSConfig& config, const Rect& rect) { + // Main philosophy here: + // 1. First find best 8x8 transform for each area. + // 2. Merging them into larger transforms where possibly, but + // starting from the smallest transforms (16x8 and 8x16). + // Additional complication: 16x8 and 8x16 are considered + // simultanouesly and fairly against each other. + // We are looking at 64x64 squares since the YtoX and YtoB + // maps happen to be at that resolution, and having + // integral transforms cross these boundaries leads to + // additional complications. + const CompressParams& cparams = enc_state->cparams; + const float butteraugli_target = cparams.butteraugli_distance; + AcStrategyImage* ac_strategy = &enc_state->shared.ac_strategy; + // TODO(veluca): reuse allocations + auto mem = hwy::AllocateAligned(5 * AcStrategy::kMaxCoeffArea); + auto qmem = hwy::AllocateAligned(AcStrategy::kMaxCoeffArea); + uint32_t* JXL_RESTRICT quantized = qmem.get(); + float* JXL_RESTRICT block = mem.get(); + float* JXL_RESTRICT scratch_space = mem.get() + 3 * AcStrategy::kMaxCoeffArea; + size_t bx = rect.x0(); + size_t by = rect.y0(); + JXL_ASSERT(rect.xsize() <= 8); + JXL_ASSERT(rect.ysize() <= 8); + size_t tx = bx / kColorTileDimInBlocks; + size_t ty = by / kColorTileDimInBlocks; + const float cmap_factors[3] = { + enc_state->shared.cmap.YtoXRatio( + enc_state->shared.cmap.ytox_map.ConstRow(ty)[tx]), + 0.0f, + enc_state->shared.cmap.YtoBRatio( + enc_state->shared.cmap.ytob_map.ConstRow(ty)[tx]), + }; + if (cparams.speed_tier > SpeedTier::kHare) return; + // First compute the best 8x8 transform for each square. Later, we do not + // experiment with different combinations, but only use the best of the 8x8s + // when DCT8X8 is specified in the tree search. + // 8x8 transforms have 10 variants, but every larger transform is just a DCT. + float entropy_estimate[64] = {}; + // Favor all 8x8 transforms (against 16x8 and larger transforms)) at + // low butteraugli_target distances. + static const float k8x8mul1 = -0.55; + static const float k8x8mul2 = 1.0; + static const float k8x8base = 1.4; + const float mul8x8 = k8x8mul2 + k8x8mul1 / (butteraugli_target + k8x8base); + for (size_t iy = 0; iy < rect.ysize(); iy++) { + for (size_t ix = 0; ix < rect.xsize(); ix++) { + float entropy = 0.0; + const uint8_t best_of_8x8s = FindBest8x8Transform( + 8 * (bx + ix), 8 * (by + iy), static_cast(cparams.speed_tier), + config, cmap_factors, ac_strategy, block, scratch_space, quantized, + &entropy); + ac_strategy->Set(bx + ix, by + iy, + static_cast(best_of_8x8s)); + entropy_estimate[iy * 8 + ix] = entropy * mul8x8; + } + } + // Merge when a larger transform is better than the previously + // searched best combination of 8x8 transforms. + struct MergeTry { + AcStrategy::Type type; + uint8_t priority; + uint8_t decoding_speed_tier_max_limit; + uint8_t encoding_speed_tier_max_limit; + float entropy_mul; + }; + static const float k8X16mul1 = -0.55; + static const float k8X16mul2 = 0.885; + static const float k8X16base = 1.6; + const float entropy_mul16X8 = + k8X16mul2 + k8X16mul1 / (butteraugli_target + k8X16base); + // const float entropy_mul16X8 = mul8X16 * 0.91195782912371126f; + + static const float k16X16mul1 = -0.35; + static const float k16X16mul2 = 0.808; + static const float k16X16base = 2.0; + const float entropy_mul16X16 = + k16X16mul2 + k16X16mul1 / (butteraugli_target + k16X16base); + // const float entropy_mul16X16 = mul16X16 * 0.83183417727960129f; + + static const float k32X16mul1 = -0.1; + static const float k32X16mul2 = 0.854; + static const float k32X16base = 2.5; + const float entropy_mul16X32 = + k32X16mul2 + k32X16mul1 / (butteraugli_target + k32X16base); + + const float entropy_mul32X32 = 0.93; + const float entropy_mul64X64 = 1.52f; + // TODO(jyrki): Consider this feedback in further changes: + // Also effectively when the multipliers for smaller blocks are + // below 1, this raises the bar for the bigger blocks even higher + // in that sense these constants are not independent (e.g. changing + // the constant for DCT16x32 by -5% (making it more likely) also + // means that DCT32x32 becomes harder to do when starting from + // two DCT16x32s). It might be better to make them more independent, + // e.g. by not applying the multiplier when storing the new entropy + // estimates in TryMergeToACSCandidate(). + const MergeTry kTransformsForMerge[9] = { + {AcStrategy::Type::DCT16X8, 2, 4, 5, entropy_mul16X8}, + {AcStrategy::Type::DCT8X16, 2, 4, 5, entropy_mul16X8}, + // FindBestFirstLevelDivisionForSquare looks for DCT16X16 and its + // subdivisions. {AcStrategy::Type::DCT16X16, 3, entropy_mul16X16}, + {AcStrategy::Type::DCT16X32, 4, 4, 4, entropy_mul16X32}, + {AcStrategy::Type::DCT32X16, 4, 4, 4, entropy_mul16X32}, + // FindBestFirstLevelDivisionForSquare looks for DCT32X32 and its + // subdivisions. {AcStrategy::Type::DCT32X32, 5, 1, 5, + // 0.9822994906548809f}, + {AcStrategy::Type::DCT64X32, 6, 1, 3, 1.29f}, + {AcStrategy::Type::DCT32X64, 6, 1, 3, 1.29f}, + // {AcStrategy::Type::DCT64X64, 8, 1, 3, 2.0846542128012948f}, + }; + /* + These sizes not yet included in merge heuristic: + set(AcStrategy::Type::DCT32X8, 0.0f, 2.261390410971102f); + set(AcStrategy::Type::DCT8X32, 0.0f, 2.261390410971102f); + set(AcStrategy::Type::DCT128X128, 0.0f, 1.0f); + set(AcStrategy::Type::DCT128X64, 0.0f, 0.73f); + set(AcStrategy::Type::DCT64X128, 0.0f, 0.73f); + set(AcStrategy::Type::DCT256X256, 0.0f, 1.0f); + set(AcStrategy::Type::DCT256X128, 0.0f, 0.73f); + set(AcStrategy::Type::DCT128X256, 0.0f, 0.73f); + */ + + // Priority is a tricky kludge to avoid collisions so that transforms + // don't overlap. + uint8_t priority[64] = {}; + bool enable_32x32 = cparams.decoding_speed_tier < 4; + for (auto tx : kTransformsForMerge) { + if (tx.decoding_speed_tier_max_limit < cparams.decoding_speed_tier) { + continue; + } + AcStrategy acs = AcStrategy::FromRawStrategy(tx.type); + + for (size_t cy = 0; cy + acs.covered_blocks_y() - 1 < rect.ysize(); + cy += acs.covered_blocks_y()) { + for (size_t cx = 0; cx + acs.covered_blocks_x() - 1 < rect.xsize(); + cx += acs.covered_blocks_x()) { + if (cy + 7 < rect.ysize() && cx + 7 < rect.xsize()) { + if (cparams.decoding_speed_tier < 4 && + tx.type == AcStrategy::Type::DCT32X64) { + // We handle both DCT8X16 and DCT16X8 at the same time. + if ((cy | cx) % 8 == 0) { + FindBestFirstLevelDivisionForSquare( + 8, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, + tx.entropy_mul, entropy_mul64X64, entropy_estimate, block, + scratch_space, quantized); + } + continue; + } else if (tx.type == AcStrategy::Type::DCT32X16) { + // We handled both DCT8X16 and DCT16X8 at the same time, + // and that is above. The last column and last row, + // when the last column or last row is odd numbered, + // are still handled by TryMergeAcs. + continue; + } + } + if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) || + (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) { + // already covered by FindBest32X32 + continue; + } + + if (cy + 3 < rect.ysize() && cx + 3 < rect.xsize()) { + if (tx.type == AcStrategy::Type::DCT16X32) { + // We handle both DCT8X16 and DCT16X8 at the same time. + if ((cy | cx) % 4 == 0) { + FindBestFirstLevelDivisionForSquare( + 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, + ac_strategy, tx.entropy_mul, entropy_mul32X32, + entropy_estimate, block, scratch_space, quantized); + } + continue; + } else if (tx.type == AcStrategy::Type::DCT32X16) { + // We handled both DCT8X16 and DCT16X8 at the same time, + // and that is above. The last column and last row, + // when the last column or last row is odd numbered, + // are still handled by TryMergeAcs. + continue; + } + } + if ((tx.type == AcStrategy::Type::DCT16X32 && cy % 4 != 0) || + (tx.type == AcStrategy::Type::DCT32X16 && cx % 4 != 0)) { + // already covered by FindBest32X32 + continue; + } + if (cy + 1 < rect.ysize() && cx + 1 < rect.xsize()) { + if (tx.type == AcStrategy::Type::DCT8X16) { + // We handle both DCT8X16 and DCT16X8 at the same time. + if ((cy | cx) % 2 == 0) { + FindBestFirstLevelDivisionForSquare( + 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, + tx.entropy_mul, entropy_mul16X16, entropy_estimate, block, + scratch_space, quantized); + } + continue; + } else if (tx.type == AcStrategy::Type::DCT16X8) { + // We handled both DCT8X16 and DCT16X8 at the same time, + // and that is above. The last column and last row, + // when the last column or last row is odd numbered, + // are still handled by TryMergeAcs. + continue; + } + } + if ((tx.type == AcStrategy::Type::DCT8X16 && cy % 2 == 1) || + (tx.type == AcStrategy::Type::DCT16X8 && cx % 2 == 1)) { + // already covered by FindBestFirstLevelDivisionForSquare + continue; + } + // All other merge sizes are handled here. + // Some of the DCT16X8s and DCT8X16s will still leak through here + // when there is an odd number of 8x8 blocks, then the last row + // and column will get their DCT16X8s and DCT8X16s through the + // normal integral transform merging process. + TryMergeAcs(tx.type, bx, by, cx, cy, config, cmap_factors, ac_strategy, + tx.entropy_mul, tx.priority, &priority[0], entropy_estimate, + block, scratch_space, quantized); + } + } + } + if (cparams.speed_tier >= SpeedTier::kHare) { + return; + } + // Here we still try to do some non-aligned matching, find a few more + // 16X8, 8X16 and 16X16s between the non-2-aligned blocks. + for (size_t cy = 0; cy + 1 < rect.ysize(); ++cy) { + for (size_t cx = 0; cx + 1 < rect.xsize(); ++cx) { + if ((cy | cx) % 2 != 0) { + FindBestFirstLevelDivisionForSquare( + 2, true, bx, by, cx, cy, config, cmap_factors, ac_strategy, + entropy_mul16X8, entropy_mul16X16, entropy_estimate, block, + scratch_space, quantized); + } + } + } + // Non-aligned matching for 32X32, 16X32 and 32X16. + size_t step = cparams.speed_tier >= SpeedTier::kTortoise ? 2 : 1; + for (size_t cy = 0; cy + 3 < rect.ysize(); cy += step) { + for (size_t cx = 0; cx + 3 < rect.xsize(); cx += step) { + if ((cy | cx) % 4 == 0) { + continue; // Already tried with loop above (DCT16X32 case). + } + FindBestFirstLevelDivisionForSquare( + 4, enable_32x32, bx, by, cx, cy, config, cmap_factors, ac_strategy, + entropy_mul16X32, entropy_mul32X32, entropy_estimate, block, + scratch_space, quantized); + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(ProcessRectACS); + +void AcStrategyHeuristics::Init(const Image3F& src, + PassesEncoderState* enc_state) { + this->enc_state = enc_state; + config.dequant = &enc_state->shared.matrices; + const CompressParams& cparams = enc_state->cparams; + + if (cparams.speed_tier >= SpeedTier::kCheetah) { + JXL_CHECK(enc_state->shared.matrices.EnsureComputed(1)); // DCT8 only + } else { + uint32_t acs_mask = 0; + // All transforms up to 64x64. + for (size_t i = 0; i < AcStrategy::DCT128X128; i++) { + acs_mask |= (1 << i); + } + JXL_CHECK(enc_state->shared.matrices.EnsureComputed(acs_mask)); + } + + // Image row pointers and strides. + config.quant_field_row = enc_state->initial_quant_field.Row(0); + config.quant_field_stride = enc_state->initial_quant_field.PixelsPerRow(); + auto& mask = enc_state->initial_quant_masking; + if (mask.xsize() > 0 && mask.ysize() > 0) { + config.masking_field_row = mask.Row(0); + config.masking_field_stride = mask.PixelsPerRow(); + } + + config.src_rows[0] = src.ConstPlaneRow(0, 0); + config.src_rows[1] = src.ConstPlaneRow(1, 0); + config.src_rows[2] = src.ConstPlaneRow(2, 0); + config.src_stride = src.PixelsPerRow(); + + // Entropy estimate is composed of two factors: + // - estimate of the number of bits that will be used by the block + // - information loss due to quantization + // The following constant controls the relative weights of these components. + config.info_loss_multiplier = 58.67516723857484f; + config.info_loss_multiplier2 = 43.0f; + config.zeros_mul = 2.55f; + config.cost_delta = 4.9425062806007478f; + JXL_ASSERT(enc_state->shared.ac_strategy.xsize() == + enc_state->shared.frame_dim.xsize_blocks); + JXL_ASSERT(enc_state->shared.ac_strategy.ysize() == + enc_state->shared.frame_dim.ysize_blocks); +} + +void AcStrategyHeuristics::ProcessRect(const Rect& rect) { + const CompressParams& cparams = enc_state->cparams; + // In Falcon mode, use DCT8 everywhere and uniform quantization. + if (cparams.speed_tier >= SpeedTier::kCheetah) { + enc_state->shared.ac_strategy.FillDCT8(rect); + return; + } + HWY_DYNAMIC_DISPATCH(ProcessRectACS) + (enc_state, config, rect); +} + +void AcStrategyHeuristics::Finalize(AuxOut* aux_out) { + const auto& ac_strategy = enc_state->shared.ac_strategy; + // Accounting and debug output. + if (aux_out != nullptr) { + aux_out->num_small_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::IDENTITY) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT2X2) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT4X4); + aux_out->num_dct4x8_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT4X8) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT8X4); + aux_out->num_afv_blocks = ac_strategy.CountBlocks(AcStrategy::Type::AFV0) + + ac_strategy.CountBlocks(AcStrategy::Type::AFV1) + + ac_strategy.CountBlocks(AcStrategy::Type::AFV2) + + ac_strategy.CountBlocks(AcStrategy::Type::AFV3); + aux_out->num_dct8_blocks = ac_strategy.CountBlocks(AcStrategy::Type::DCT); + aux_out->num_dct8x16_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT8X16) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT16X8); + aux_out->num_dct8x32_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT8X32) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT32X8); + aux_out->num_dct16_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT16X16); + aux_out->num_dct16x32_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT16X32) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT32X16); + aux_out->num_dct32_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT32X32); + aux_out->num_dct32x64_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT32X64) + + ac_strategy.CountBlocks(AcStrategy::Type::DCT64X32); + aux_out->num_dct64_blocks = + ac_strategy.CountBlocks(AcStrategy::Type::DCT64X64); + } + + // if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(aux_out)) { + if (JXL_DEBUG_AC_STRATEGY && WantDebugOutput(enc_state->cparams)) { + DumpAcStrategy(ac_strategy, enc_state->shared.frame_dim.xsize, + enc_state->shared.frame_dim.ysize, "ac_strategy", aux_out, + enc_state->cparams); + } +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.h b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.h new file mode 100644 index 0000000000..c53a79bb04 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ac_strategy.h @@ -0,0 +1,65 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_AC_STRATEGY_H_ +#define LIB_JXL_ENC_AC_STRATEGY_H_ + +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" +#include "lib/jxl/quant_weights.h" + +// `FindBestAcStrategy` uses heuristics to choose which AC strategy should be +// used in each block, as well as the initial quantization field. + +namespace jxl { + +struct AuxOut; + +// AC strategy selection: utility struct. + +struct ACSConfig { + const DequantMatrices* JXL_RESTRICT dequant; + float info_loss_multiplier; + float info_loss_multiplier2; + float* JXL_RESTRICT quant_field_row; + size_t quant_field_stride; + float* JXL_RESTRICT masking_field_row; + size_t masking_field_stride; + const float* JXL_RESTRICT src_rows[3]; + size_t src_stride; + float cost_delta; + float zeros_mul; + const float& Pixel(size_t c, size_t x, size_t y) const { + return src_rows[c][y * src_stride + x]; + } + float Masking(size_t bx, size_t by) const { + JXL_DASSERT(masking_field_row[by * masking_field_stride + bx] > 0); + return masking_field_row[by * masking_field_stride + bx]; + } + float Quant(size_t bx, size_t by) const { + JXL_DASSERT(quant_field_row[by * quant_field_stride + bx] > 0); + return quant_field_row[by * quant_field_stride + bx]; + } +}; + +struct AcStrategyHeuristics { + void Init(const Image3F& src, PassesEncoderState* enc_state); + void ProcessRect(const Rect& rect); + void Finalize(AuxOut* aux_out); + ACSConfig config; + PassesEncoderState* enc_state; +}; + +} // namespace jxl + +#endif // LIB_JXL_ENC_AC_STRATEGY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.cc b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.cc new file mode 100644 index 0000000000..fbd3f953c9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.cc @@ -0,0 +1,1170 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_adaptive_quantization.h" + +#include +#include +#include + +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_adaptive_quantization.cc" +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/butteraugli/butteraugli.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/dec_group.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_debug_image.h" +#include "lib/jxl/enc_group.h" +#include "lib/jxl/enc_modular.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_transforms-inl.h" +#include "lib/jxl/epf.h" +#include "lib/jxl/fast_math-inl.h" +#include "lib/jxl/gauss_blur.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/quant_weights.h" + +// Set JXL_DEBUG_ADAPTIVE_QUANTIZATION to 1 to enable debugging. +#ifndef JXL_DEBUG_ADAPTIVE_QUANTIZATION +#define JXL_DEBUG_ADAPTIVE_QUANTIZATION 0 +#endif + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::AbsDiff; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Sqrt; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +// The following functions modulate an exponent (out_val) and return the updated +// value. Their descriptor is limited to 8 lanes for 8x8 blocks. + +// Hack for mask estimation. Eventually replace this code with butteraugli's +// masking. +float ComputeMaskForAcStrategyUse(const float out_val) { + const float kMul = 1.0f; + const float kOffset = 0.001f; + return kMul / (out_val + kOffset); +} + +template +V ComputeMask(const D d, const V out_val) { + const auto kBase = Set(d, -0.76471879237038032f); + const auto kMul4 = Set(d, 4.4585596705216615f); + const auto kMul2 = Set(d, 17.282053892620215f); + const auto kOffset2 = Set(d, 302.36961315317848f); + const auto kMul3 = Set(d, 7.0561261998705858f); + const auto kOffset3 = Set(d, 2.3179635626140773f); + const auto kOffset4 = Mul(Set(d, 0.25f), kOffset3); + const auto kMul0 = Set(d, 0.80061762862741759f); + const auto k1 = Set(d, 1.0f); + + // Avoid division by zero. + const auto v1 = Max(Mul(out_val, kMul0), Set(d, 1e-3f)); + const auto v2 = Div(k1, Add(v1, kOffset2)); + const auto v3 = Div(k1, MulAdd(v1, v1, kOffset3)); + const auto v4 = Div(k1, MulAdd(v1, v1, kOffset4)); + // TODO(jyrki): + // A log or two here could make sense. In butteraugli we have effectively + // log(log(x + C)) for this kind of use, as a single log is used in + // saturating visual masking and here the modulation values are exponential, + // another log would counter that. + return Add(kBase, MulAdd(kMul4, v4, MulAdd(kMul2, v2, Mul(kMul3, v3)))); +} + +// mul and mul2 represent a scaling difference between jxl and butteraugli. +static const float kSGmul = 226.77216153508914f; +static const float kSGmul2 = 1.0f / 73.377132366608819f; +static const float kLog2 = 0.693147181f; +// Includes correction factor for std::log -> log2. +static const float kSGRetMul = kSGmul2 * 18.6580932135f * kLog2; +static const float kSGVOffset = 7.7825991679894591f; + +template +V RatioOfDerivativesOfCubicRootToSimpleGamma(const D d, V v) { + // The opsin space in jxl is the cubic root of photons, i.e., v * v * v + // is related to the number of photons. + // + // SimpleGamma(v * v * v) is the psychovisual space in butteraugli. + // This ratio allows quantization to move from jxl's opsin space to + // butteraugli's log-gamma space. + float kEpsilon = 1e-2; + v = ZeroIfNegative(v); + const auto kNumMul = Set(d, kSGRetMul * 3 * kSGmul); + const auto kVOffset = Set(d, kSGVOffset * kLog2 + kEpsilon); + const auto kDenMul = Set(d, kLog2 * kSGmul); + + const auto v2 = Mul(v, v); + + const auto num = MulAdd(kNumMul, v2, Set(d, kEpsilon)); + const auto den = MulAdd(Mul(kDenMul, v), v2, kVOffset); + return invert ? Div(num, den) : Div(den, num); +} + +template +static float RatioOfDerivativesOfCubicRootToSimpleGamma(float v) { + using DScalar = HWY_CAPPED(float, 1); + auto vscalar = Load(DScalar(), &v); + return GetLane( + RatioOfDerivativesOfCubicRootToSimpleGamma(DScalar(), vscalar)); +} + +// TODO(veluca): this function computes an approximation of the derivative of +// SimpleGamma with (f(x+eps)-f(x))/eps. Consider two-sided approximation or +// exact derivatives. For reference, SimpleGamma was: +/* +template +V SimpleGamma(const D d, V v) { + // A simple HDR compatible gamma function. + const auto mul = Set(d, kSGmul); + const auto kRetMul = Set(d, kSGRetMul); + const auto kRetAdd = Set(d, kSGmul2 * -20.2789020414f); + const auto kVOffset = Set(d, kSGVOffset); + + v *= mul; + + // This should happen rarely, but may lead to a NaN, which is rather + // undesirable. Since negative photons don't exist we solve the NaNs by + // clamping here. + // TODO(veluca): with FastLog2f, this no longer leads to NaNs. + v = ZeroIfNegative(v); + return kRetMul * FastLog2f(d, v + kVOffset) + kRetAdd; +} +*/ + +template +V GammaModulation(const D d, const size_t x, const size_t y, + const ImageF& xyb_x, const ImageF& xyb_y, const V out_val) { + const float kBias = 0.16f; + JXL_DASSERT(kBias > kOpsinAbsorbanceBias[0]); + JXL_DASSERT(kBias > kOpsinAbsorbanceBias[1]); + JXL_DASSERT(kBias > kOpsinAbsorbanceBias[2]); + auto overall_ratio = Zero(d); + auto bias = Set(d, kBias); + auto half = Set(d, 0.5f); + for (size_t dy = 0; dy < 8; ++dy) { + const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy); + const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy); + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { + const auto iny = Add(Load(d, row_in_y + x + dx), bias); + const auto inx = Load(d, row_in_x + x + dx); + const auto r = Sub(iny, inx); + const auto g = Add(iny, inx); + const auto ratio_r = + RatioOfDerivativesOfCubicRootToSimpleGamma(d, r); + const auto ratio_g = + RatioOfDerivativesOfCubicRootToSimpleGamma(d, g); + const auto avg_ratio = Mul(half, Add(ratio_r, ratio_g)); + + overall_ratio = Add(overall_ratio, avg_ratio); + } + } + overall_ratio = Mul(SumOfLanes(d, overall_ratio), Set(d, 1.0f / 64)); + // ideally -1.0, but likely optimal correction adds some entropy, so slightly + // less than that. + // ln(2) constant folded in because we want std::log but have FastLog2f. + const auto kGam = Set(d, -0.15526878023684174f * 0.693147180559945f); + return MulAdd(kGam, FastLog2f(d, overall_ratio), out_val); +} + +template +V ColorModulation(const D d, const size_t x, const size_t y, + const ImageF& xyb_x, const ImageF& xyb_y, const ImageF& xyb_b, + const double butteraugli_target, V out_val) { + static const float kStrengthMul = 4.2456542701250122f; + static const float kRedRampStart = 0.18748564245760829f; + static const float kRedRampLength = 0.16701783842516479f; + static const float kBlueRampLength = 0.16117602661852037f; + static const float kBlueRampStart = 0.47897504338287333f; + const float strength = kStrengthMul * (1.0f - 0.15f * butteraugli_target); + if (strength < 0) { + return out_val; + } + // x values are smaller than y and b values, need to take the difference into + // account. + const float red_strength = strength * 6.0f; + const float blue_strength = strength; + { + // Reduce some bits from areas not blue or red. + const float offset = strength * -0.007; // 9174542291185913f; + out_val = Add(out_val, Set(d, offset)); + } + // Calculate how much of the 8x8 block is covered with blue or red. + auto blue_coverage = Zero(d); + auto red_coverage = Zero(d); + auto bias_y = Set(d, 0.2f); + auto bias_y_add = Set(d, 0.1f); + for (size_t dy = 0; dy < 8; ++dy) { + const float* const JXL_RESTRICT row_in_x = xyb_x.Row(y + dy); + const float* const JXL_RESTRICT row_in_y = xyb_y.Row(y + dy); + const float* const JXL_RESTRICT row_in_b = xyb_b.Row(y + dy); + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { + const auto pixel_y = Load(d, row_in_y + x + dx); + // Estimate redness-greeness relative to the intensity. + const auto pixel_xpy = Div(Abs(Load(d, row_in_x + x + dx)), + Max(Add(bias_y_add, pixel_y), bias_y)); + const auto pixel_x = + Max(Set(d, 0.0f), Sub(pixel_xpy, Set(d, kRedRampStart))); + const auto pixel_b = + Max(Set(d, 0.0f), Sub(Load(d, row_in_b + x + dx), + Add(pixel_y, Set(d, kBlueRampStart)))); + const auto blue_slope = Min(pixel_b, Set(d, kBlueRampLength)); + const auto red_slope = Min(pixel_x, Set(d, kRedRampLength)); + red_coverage = Add(red_coverage, red_slope); + blue_coverage = Add(blue_coverage, blue_slope); + } + } + + // Saturate when the high red or high blue coverage is above a level. + // The idea here is that if a certain fraction of the block is red or + // blue we consider as if it was fully red or blue. + static const float ratio = 28.0f; // out of 64 pixels. + + auto overall_red_coverage = SumOfLanes(d, red_coverage); + overall_red_coverage = + Min(overall_red_coverage, Set(d, ratio * kRedRampLength)); + overall_red_coverage = + Mul(overall_red_coverage, Set(d, red_strength / ratio)); + + auto overall_blue_coverage = SumOfLanes(d, blue_coverage); + overall_blue_coverage = + Min(overall_blue_coverage, Set(d, ratio * kBlueRampLength)); + overall_blue_coverage = + Mul(overall_blue_coverage, Set(d, blue_strength / ratio)); + + return Add(overall_red_coverage, Add(overall_blue_coverage, out_val)); +} + +// Change precision in 8x8 blocks that have high frequency content. +template +V HfModulation(const D d, const size_t x, const size_t y, const ImageF& xyb, + const V out_val) { + // Zero out the invalid differences for the rightmost value per row. + const Rebind du; + HWY_ALIGN constexpr uint32_t kMaskRight[kBlockDim] = {~0u, ~0u, ~0u, ~0u, + ~0u, ~0u, ~0u, 0}; + + auto sum = Zero(d); // sum of absolute differences with right and below + + static const float valmin = 0.52489909479039587f; + auto valminv = Set(d, valmin); + for (size_t dy = 0; dy < 8; ++dy) { + const float* JXL_RESTRICT row_in = xyb.Row(y + dy) + x; + const float* JXL_RESTRICT row_in_next = + dy == 7 ? row_in : xyb.Row(y + dy + 1) + x; + + // In SCALAR, there is no guarantee of having extra row padding. + // Hence, we need to ensure we don't access pixels outside the row itself. + // In SIMD modes, however, rows are padded, so it's safe to access one + // garbage value after the row. The vector then gets masked with kMaskRight + // to remove the influence of that value. +#if HWY_TARGET != HWY_SCALAR + for (size_t dx = 0; dx < 8; dx += Lanes(d)) { +#else + for (size_t dx = 0; dx < 7; dx += Lanes(d)) { +#endif + const auto p = Load(d, row_in + dx); + const auto pr = LoadU(d, row_in + dx + 1); + const auto mask = BitCast(d, Load(du, kMaskRight + dx)); + sum = Add(sum, And(mask, Min(valminv, AbsDiff(p, pr)))); + + const auto pd = Load(d, row_in_next + dx); + sum = Add(sum, Min(valminv, AbsDiff(p, pd))); + } +#if HWY_TARGET == HWY_SCALAR + const auto p = Load(d, row_in + 7); + const auto pd = Load(d, row_in_next + 7); + sum = Add(sum, Min(valminv, AbsDiff(p, pd))); +#endif + } + // more negative value gives more bpp + static const float kOffset = -2.6545897672771526; + static const float kMul = -0.049868161744916512; + + sum = SumOfLanes(d, sum); + float scalar_sum = GetLane(sum); + static const float maxsum = 7.9076877647025947f; + static const float minsum = 0.53640540945659809f; + scalar_sum = std::min(maxsum, scalar_sum); + scalar_sum = std::max(minsum, scalar_sum); + scalar_sum += kOffset; + scalar_sum *= kMul; + return Add(Set(d, scalar_sum), out_val); +} + +void PerBlockModulations(const float butteraugli_target, const ImageF& xyb_x, + const ImageF& xyb_y, const ImageF& xyb_b, + const float scale, const Rect& rect, ImageF* out) { + JXL_ASSERT(SameSize(xyb_x, xyb_y)); + JXL_ASSERT(DivCeil(xyb_x.xsize(), kBlockDim) == out->xsize()); + JXL_ASSERT(DivCeil(xyb_x.ysize(), kBlockDim) == out->ysize()); + + float base_level = 0.48f * scale; + float kDampenRampStart = 2.0f; + float kDampenRampEnd = 14.0f; + float dampen = 1.0f; + if (butteraugli_target >= kDampenRampStart) { + dampen = 1.0f - ((butteraugli_target - kDampenRampStart) / + (kDampenRampEnd - kDampenRampStart)); + if (dampen < 0) { + dampen = 0; + } + } + const float mul = scale * dampen; + const float add = (1.0f - dampen) * base_level; + for (size_t iy = rect.y0(); iy < rect.y0() + rect.ysize(); iy++) { + const size_t y = iy * 8; + float* const JXL_RESTRICT row_out = out->Row(iy); + const HWY_CAPPED(float, kBlockDim) df; + for (size_t ix = rect.x0(); ix < rect.x0() + rect.xsize(); ix++) { + size_t x = ix * 8; + auto out_val = Set(df, row_out[ix]); + out_val = ComputeMask(df, out_val); + out_val = HfModulation(df, x, y, xyb_y, out_val); + out_val = ColorModulation(df, x, y, xyb_x, xyb_y, xyb_b, + butteraugli_target, out_val); + out_val = GammaModulation(df, x, y, xyb_x, xyb_y, out_val); + // We want multiplicative quantization field, so everything + // until this point has been modulating the exponent. + row_out[ix] = FastPow2f(GetLane(out_val) * 1.442695041f) * mul + add; + } + } +} + +template +V MaskingSqrt(const D d, V v) { + static const float kLogOffset = 27.97044946785558f; + static const float kMul = 211.53333281566171f; + const auto mul_v = Set(d, kMul * 1e8); + const auto offset_v = Set(d, kLogOffset); + return Mul(Set(d, 0.25f), Sqrt(MulAdd(v, Sqrt(mul_v), offset_v))); +} + +float MaskingSqrt(const float v) { + using DScalar = HWY_CAPPED(float, 1); + auto vscalar = Load(DScalar(), &v); + return GetLane(MaskingSqrt(DScalar(), vscalar)); +} + +void StoreMin4(const float v, float& min0, float& min1, float& min2, + float& min3) { + if (v < min3) { + if (v < min0) { + min3 = min2; + min2 = min1; + min1 = min0; + min0 = v; + } else if (v < min1) { + min3 = min2; + min2 = min1; + min1 = v; + } else if (v < min2) { + min3 = min2; + min2 = v; + } else { + min3 = v; + } + } +} + +// Look for smooth areas near the area of degradation. +// If the areas are generally smooth, don't do masking. +// Output is downsampled 2x. +void FuzzyErosion(const Rect& from_rect, const ImageF& from, + const Rect& to_rect, ImageF* to) { + const size_t xsize = from.xsize(); + const size_t ysize = from.ysize(); + constexpr int kStep = 1; + static_assert(kStep == 1, "Step must be 1"); + JXL_ASSERT(to_rect.xsize() * 2 == from_rect.xsize()); + JXL_ASSERT(to_rect.ysize() * 2 == from_rect.ysize()); + for (size_t fy = 0; fy < from_rect.ysize(); ++fy) { + size_t y = fy + from_rect.y0(); + size_t ym1 = y >= kStep ? y - kStep : y; + size_t yp1 = y + kStep < ysize ? y + kStep : y; + const float* rowt = from.Row(ym1); + const float* row = from.Row(y); + const float* rowb = from.Row(yp1); + float* row_out = to_rect.Row(to, fy / 2); + for (size_t fx = 0; fx < from_rect.xsize(); ++fx) { + size_t x = fx + from_rect.x0(); + size_t xm1 = x >= kStep ? x - kStep : x; + size_t xp1 = x + kStep < xsize ? x + kStep : x; + float min0 = row[x]; + float min1 = row[xm1]; + float min2 = row[xp1]; + float min3 = rowt[xm1]; + // Sort the first four values. + if (min0 > min1) std::swap(min0, min1); + if (min0 > min2) std::swap(min0, min2); + if (min0 > min3) std::swap(min0, min3); + if (min1 > min2) std::swap(min1, min2); + if (min1 > min3) std::swap(min1, min3); + if (min2 > min3) std::swap(min2, min3); + // The remaining five values of a 3x3 neighbourhood. + StoreMin4(rowt[x], min0, min1, min2, min3); + StoreMin4(rowt[xp1], min0, min1, min2, min3); + StoreMin4(rowb[xm1], min0, min1, min2, min3); + StoreMin4(rowb[x], min0, min1, min2, min3); + StoreMin4(rowb[xp1], min0, min1, min2, min3); + static const float kMul0 = 0.125f; + static const float kMul1 = 0.075f; + static const float kMul2 = 0.06f; + static const float kMul3 = 0.05f; + float v = kMul0 * min0 + kMul1 * min1 + kMul2 * min2 + kMul3 * min3; + if (fx % 2 == 0 && fy % 2 == 0) { + row_out[fx / 2] = v; + } else { + row_out[fx / 2] += v; + } + } + } +} + +struct AdaptiveQuantizationImpl { + void Init(const Image3F& xyb) { + JXL_DASSERT(xyb.xsize() % kBlockDim == 0); + JXL_DASSERT(xyb.ysize() % kBlockDim == 0); + const size_t xsize = xyb.xsize(); + const size_t ysize = xyb.ysize(); + aq_map = ImageF(xsize / kBlockDim, ysize / kBlockDim); + } + void PrepareBuffers(size_t num_threads) { + diff_buffer = ImageF(kEncTileDim + 8, num_threads); + for (size_t i = pre_erosion.size(); i < num_threads; i++) { + pre_erosion.emplace_back(kEncTileDimInBlocks * 2 + 2, + kEncTileDimInBlocks * 2 + 2); + } + } + + void ComputeTile(float butteraugli_target, float scale, const Image3F& xyb, + const Rect& rect, const int thread, ImageF* mask) { + const size_t xsize = xyb.xsize(); + const size_t ysize = xyb.ysize(); + + // The XYB gamma is 3.0 to be able to decode faster with two muls. + // Butteraugli's gamma is matching the gamma of human eye, around 2.6. + // We approximate the gamma difference by adding one cubic root into + // the adaptive quantization. This gives us a total gamma of 2.6666 + // for quantization uses. + const float match_gamma_offset = 0.019; + + const HWY_FULL(float) df; + + size_t y_start = rect.y0() * 8; + size_t y_end = y_start + rect.ysize() * 8; + + size_t x0 = rect.x0() * 8; + size_t x1 = x0 + rect.xsize() * 8; + if (x0 != 0) x0 -= 4; + if (x1 != xyb.xsize()) x1 += 4; + if (y_start != 0) y_start -= 4; + if (y_end != xyb.ysize()) y_end += 4; + pre_erosion[thread].ShrinkTo((x1 - x0) / 4, (y_end - y_start) / 4); + + static const float limit = 0.2f; + // Computes image (padded to multiple of 8x8) of local pixel differences. + // Subsample both directions by 4. + for (size_t y = y_start; y < y_end; ++y) { + size_t y2 = y + 1 < ysize ? y + 1 : y; + size_t y1 = y > 0 ? y - 1 : y; + + const float* row_in = xyb.PlaneRow(1, y); + const float* row_in1 = xyb.PlaneRow(1, y1); + const float* row_in2 = xyb.PlaneRow(1, y2); + float* JXL_RESTRICT row_out = diff_buffer.Row(thread); + + auto scalar_pixel = [&](size_t x) { + const size_t x2 = x + 1 < xsize ? x + 1 : x; + const size_t x1 = x > 0 ? x - 1 : x; + const float base = + 0.25f * (row_in2[x] + row_in1[x] + row_in[x1] + row_in[x2]); + const float gammac = RatioOfDerivativesOfCubicRootToSimpleGamma( + row_in[x] + match_gamma_offset); + float diff = gammac * (row_in[x] - base); + diff *= diff; + if (diff >= limit) { + diff = limit; + } + diff = MaskingSqrt(diff); + if ((y % 4) != 0) { + row_out[x - x0] += diff; + } else { + row_out[x - x0] = diff; + } + }; + + size_t x = x0; + // First pixel of the row. + if (x0 == 0) { + scalar_pixel(x0); + ++x; + } + // SIMD + const auto match_gamma_offset_v = Set(df, match_gamma_offset); + const auto quarter = Set(df, 0.25f); + for (; x + 1 + Lanes(df) < x1; x += Lanes(df)) { + const auto in = LoadU(df, row_in + x); + const auto in_r = LoadU(df, row_in + x + 1); + const auto in_l = LoadU(df, row_in + x - 1); + const auto in_t = LoadU(df, row_in2 + x); + const auto in_b = LoadU(df, row_in1 + x); + auto base = Mul(quarter, Add(Add(in_r, in_l), Add(in_t, in_b))); + auto gammacv = + RatioOfDerivativesOfCubicRootToSimpleGamma( + df, Add(in, match_gamma_offset_v)); + auto diff = Mul(gammacv, Sub(in, base)); + diff = Mul(diff, diff); + diff = Min(diff, Set(df, limit)); + diff = MaskingSqrt(df, diff); + if ((y & 3) != 0) { + diff = Add(diff, LoadU(df, row_out + x - x0)); + } + StoreU(diff, df, row_out + x - x0); + } + // Scalar + for (; x < x1; ++x) { + scalar_pixel(x); + } + if (y % 4 == 3) { + float* row_dout = pre_erosion[thread].Row((y - y_start) / 4); + for (size_t x = 0; x < (x1 - x0) / 4; x++) { + row_dout[x] = (row_out[x * 4] + row_out[x * 4 + 1] + + row_out[x * 4 + 2] + row_out[x * 4 + 3]) * + 0.25f; + } + } + } + Rect from_rect(x0 % 8 == 0 ? 0 : 1, y_start % 8 == 0 ? 0 : 1, + rect.xsize() * 2, rect.ysize() * 2); + FuzzyErosion(from_rect, pre_erosion[thread], rect, &aq_map); + for (size_t y = 0; y < rect.ysize(); ++y) { + const float* aq_map_row = rect.ConstRow(aq_map, y); + float* mask_row = rect.Row(mask, y); + for (size_t x = 0; x < rect.xsize(); ++x) { + mask_row[x] = ComputeMaskForAcStrategyUse(aq_map_row[x]); + } + } + PerBlockModulations(butteraugli_target, xyb.Plane(0), xyb.Plane(1), + xyb.Plane(2), scale, rect, &aq_map); + } + std::vector pre_erosion; + ImageF aq_map; + ImageF diff_buffer; +}; + +ImageF AdaptiveQuantizationMap(const float butteraugli_target, + const Image3F& xyb, + const FrameDimensions& frame_dim, float scale, + ThreadPool* pool, ImageF* mask) { + AdaptiveQuantizationImpl impl; + impl.Init(xyb); + *mask = ImageF(frame_dim.xsize_blocks, frame_dim.ysize_blocks); + JXL_CHECK(RunOnPool( + pool, 0, + DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks) * + DivCeil(frame_dim.ysize_blocks, kEncTileDimInBlocks), + [&](const size_t num_threads) { + impl.PrepareBuffers(num_threads); + return true; + }, + [&](const uint32_t tid, const size_t thread) { + size_t n_enc_tiles = + DivCeil(frame_dim.xsize_blocks, kEncTileDimInBlocks); + size_t tx = tid % n_enc_tiles; + size_t ty = tid / n_enc_tiles; + size_t by0 = ty * kEncTileDimInBlocks; + size_t by1 = + std::min((ty + 1) * kEncTileDimInBlocks, frame_dim.ysize_blocks); + size_t bx0 = tx * kEncTileDimInBlocks; + size_t bx1 = + std::min((tx + 1) * kEncTileDimInBlocks, frame_dim.xsize_blocks); + Rect r(bx0, by0, bx1 - bx0, by1 - by0); + impl.ComputeTile(butteraugli_target, scale, xyb, r, thread, mask); + }, + "AQ DiffPrecompute")); + + return std::move(impl).aq_map; +} + +} // namespace + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(AdaptiveQuantizationMap); + +namespace { + +// If true, prints the quantization maps at each iteration. +constexpr bool FLAGS_dump_quant_state = false; + +void DumpHeatmap(const CompressParams& cparams, const AuxOut* aux_out, + const std::string& label, const ImageF& image, + float good_threshold, float bad_threshold) { + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) { + Image3F heatmap = CreateHeatMapImage(image, good_threshold, bad_threshold); + char filename[200]; + snprintf(filename, sizeof(filename), "%s%05d", label.c_str(), + aux_out->num_butteraugli_iters); + DumpImage(cparams, filename, heatmap); + } +} + +void DumpHeatmaps(const CompressParams& cparams, const AuxOut* aux_out, + float ba_target, const ImageF& quant_field, + const ImageF& tile_heatmap, const ImageF& bt_diffmap) { + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) { + if (!WantDebugOutput(cparams)) return; + ImageF inv_qmap(quant_field.xsize(), quant_field.ysize()); + for (size_t y = 0; y < quant_field.ysize(); ++y) { + const float* JXL_RESTRICT row_q = quant_field.ConstRow(y); + float* JXL_RESTRICT row_inv_q = inv_qmap.Row(y); + for (size_t x = 0; x < quant_field.xsize(); ++x) { + row_inv_q[x] = 1.0f / row_q[x]; // never zero + } + } + DumpHeatmap(cparams, aux_out, "quant_heatmap", inv_qmap, 4.0f * ba_target, + 6.0f * ba_target); + DumpHeatmap(cparams, aux_out, "tile_heatmap", tile_heatmap, ba_target, + 1.5f * ba_target); + // matches heat maps produced by the command line tool. + DumpHeatmap(cparams, aux_out, "bt_diffmap", bt_diffmap, + ButteraugliFuzzyInverse(1.5), ButteraugliFuzzyInverse(0.5)); + } +} + +ImageF TileDistMap(const ImageF& distmap, int tile_size, int margin, + const AcStrategyImage& ac_strategy) { + const int tile_xsize = (distmap.xsize() + tile_size - 1) / tile_size; + const int tile_ysize = (distmap.ysize() + tile_size - 1) / tile_size; + ImageF tile_distmap(tile_xsize, tile_ysize); + size_t distmap_stride = tile_distmap.PixelsPerRow(); + for (int tile_y = 0; tile_y < tile_ysize; ++tile_y) { + AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(tile_y); + float* JXL_RESTRICT dist_row = tile_distmap.Row(tile_y); + for (int tile_x = 0; tile_x < tile_xsize; ++tile_x) { + AcStrategy acs = ac_strategy_row[tile_x]; + if (!acs.IsFirstBlock()) continue; + int this_tile_xsize = acs.covered_blocks_x() * tile_size; + int this_tile_ysize = acs.covered_blocks_y() * tile_size; + int y_begin = std::max(0, tile_size * tile_y - margin); + int y_end = std::min(distmap.ysize(), + tile_size * tile_y + this_tile_ysize + margin); + int x_begin = std::max(0, tile_size * tile_x - margin); + int x_end = std::min(distmap.xsize(), + tile_size * tile_x + this_tile_xsize + margin); + float dist_norm = 0.0; + double pixels = 0; + for (int y = y_begin; y < y_end; ++y) { + float ymul = 1.0; + constexpr float kBorderMul = 0.98f; + constexpr float kCornerMul = 0.7f; + if (margin != 0 && (y == y_begin || y == y_end - 1)) { + ymul = kBorderMul; + } + const float* const JXL_RESTRICT row = distmap.Row(y); + for (int x = x_begin; x < x_end; ++x) { + float xmul = ymul; + if (margin != 0 && (x == x_begin || x == x_end - 1)) { + if (xmul == 1.0) { + xmul = kBorderMul; + } else { + xmul = kCornerMul; + } + } + float v = row[x]; + v *= v; + v *= v; + v *= v; + v *= v; + dist_norm += xmul * v; + pixels += xmul; + } + } + if (pixels == 0) pixels = 1; + // 16th norm is less than the max norm, we reduce the difference + // with this normalization factor. + constexpr float kTileNorm = 1.2f; + const float tile_dist = + kTileNorm * std::pow(dist_norm / pixels, 1.0f / 16.0f); + dist_row[tile_x] = tile_dist; + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + dist_row[tile_x + distmap_stride * iy + ix] = tile_dist; + } + } + } + } + return tile_distmap; +} + +static const float kDcQuantPow = 0.83; +static const float kDcQuant = 1.095924047623553f; +static const float kAcQuant = 0.7635; + +// Computes the decoded image for a given set of compression parameters. +ImageBundle RoundtripImage(const Image3F& opsin, PassesEncoderState* enc_state, + const JxlCmsInterface& cms, ThreadPool* pool) { + std::unique_ptr dec_state = + jxl::make_unique(); + JXL_CHECK(dec_state->output_encoding_info.SetFromMetadata( + *enc_state->shared.metadata)); + dec_state->shared = &enc_state->shared; + JXL_ASSERT(opsin.ysize() % kBlockDim == 0); + + const size_t xsize_groups = DivCeil(opsin.xsize(), kGroupDim); + const size_t ysize_groups = DivCeil(opsin.ysize(), kGroupDim); + const size_t num_groups = xsize_groups * ysize_groups; + + size_t num_special_frames = enc_state->special_frames.size(); + + std::unique_ptr modular_frame_encoder = + jxl::make_unique(enc_state->shared.frame_header, + enc_state->cparams); + JXL_CHECK(InitializePassesEncoder(opsin, cms, pool, enc_state, + modular_frame_encoder.get(), nullptr)); + JXL_CHECK(dec_state->Init()); + JXL_CHECK(dec_state->InitForAC(pool)); + + ImageBundle decoded(&enc_state->shared.metadata->m); + decoded.origin = enc_state->shared.frame_header.frame_origin; + decoded.SetFromImage(Image3F(opsin.xsize(), opsin.ysize()), + dec_state->output_encoding_info.color_encoding); + + PassesDecoderState::PipelineOptions options; + options.use_slow_render_pipeline = false; + options.coalescing = false; + options.render_spotcolors = false; + + // Same as dec_state->shared->frame_header.nonserialized_metadata->m + const ImageMetadata& metadata = *decoded.metadata(); + + JXL_CHECK(dec_state->PreparePipeline(&decoded, options)); + + hwy::AlignedUniquePtr group_dec_caches; + const auto allocate_storage = [&](const size_t num_threads) -> Status { + JXL_RETURN_IF_ERROR( + dec_state->render_pipeline->PrepareForThreads(num_threads, + /*use_group_ids=*/false)); + group_dec_caches = hwy::MakeUniqueAlignedArray(num_threads); + return true; + }; + const auto process_group = [&](const uint32_t group_index, + const size_t thread) { + if (dec_state->shared->frame_header.loop_filter.epf_iters > 0) { + ComputeSigma(dec_state->shared->BlockGroupRect(group_index), + dec_state.get()); + } + RenderPipelineInput input = + dec_state->render_pipeline->GetInputBuffers(group_index, thread); + JXL_CHECK(DecodeGroupForRoundtrip( + enc_state->coeffs, group_index, dec_state.get(), + &group_dec_caches[thread], thread, input, &decoded, nullptr)); + for (size_t c = 0; c < metadata.num_extra_channels; c++) { + std::pair ri = input.GetBuffer(3 + c); + FillPlane(0.0f, ri.first, ri.second); + } + input.Done(); + }; + JXL_CHECK(RunOnPool(pool, 0, num_groups, allocate_storage, process_group, + "AQ loop")); + + // Ensure we don't create any new special frames. + enc_state->special_frames.resize(num_special_frames); + + return decoded; +} + +constexpr int kMaxButteraugliIters = 4; + +void FindBestQuantization(const ImageBundle& linear, const Image3F& opsin, + PassesEncoderState* enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out) { + const CompressParams& cparams = enc_state->cparams; + if (cparams.resampling > 1 && + cparams.original_butteraugli_distance <= 4.0 * cparams.resampling) { + // For downsampled opsin image, the butteraugli based adaptive quantization + // loop would only make the size bigger without improving the distance much, + // so in this case we enable it only for very high butteraugli targets. + return; + } + Quantizer& quantizer = enc_state->shared.quantizer; + ImageI& raw_quant_field = enc_state->shared.raw_quant_field; + ImageF& quant_field = enc_state->initial_quant_field; + + // TODO(veluca): this should really be rather handled on the + // ButteraugliComparator side. + struct TemporaryShrink { + TemporaryShrink(ImageBundle& bundle, size_t xsize, size_t ysize) + : bundle(bundle), + orig_xsize(bundle.xsize()), + orig_ysize(bundle.ysize()) { + bundle.ShrinkTo(xsize, ysize); + } + TemporaryShrink(const TemporaryShrink&) = delete; + TemporaryShrink(TemporaryShrink&&) = delete; + + ~TemporaryShrink() { bundle.ShrinkTo(orig_xsize, orig_ysize); } + + ImageBundle& bundle; + size_t orig_xsize; + size_t orig_ysize; + } t(const_cast(linear), + enc_state->shared.frame_header.frame_size.xsize, + enc_state->shared.frame_header.frame_size.ysize); + + const float butteraugli_target = cparams.butteraugli_distance; + const float original_butteraugli = cparams.original_butteraugli_distance; + ButteraugliParams params; + params.intensity_target = linear.metadata()->IntensityTarget(); + // Hack the default intensity target value to be 80.0, the intensity + // target of sRGB images and a more reasonable viewing default than + // JPEG XL file format's default. + if (fabs(params.intensity_target - 255.0f) < 1e-3) { + params.intensity_target = 80.0f; + } + JxlButteraugliComparator comparator(params, cms); + JXL_CHECK(comparator.SetReferenceImage(linear)); + bool lower_is_better = + (comparator.GoodQualityScore() < comparator.BadQualityScore()); + const float initial_quant_dc = InitialQuantDC(butteraugli_target); + AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field), + original_butteraugli, &quant_field); + ImageF tile_distmap; + ImageF initial_quant_field(quant_field.xsize(), quant_field.ysize()); + CopyImageTo(quant_field, &initial_quant_field); + + float initial_qf_min, initial_qf_max; + ImageMinMax(initial_quant_field, &initial_qf_min, &initial_qf_max); + float initial_qf_ratio = initial_qf_max / initial_qf_min; + float qf_max_deviation_low = std::sqrt(250 / initial_qf_ratio); + float asymmetry = 2; + if (qf_max_deviation_low < asymmetry) asymmetry = qf_max_deviation_low; + float qf_lower = initial_qf_min / (asymmetry * qf_max_deviation_low); + float qf_higher = initial_qf_max * (qf_max_deviation_low / asymmetry); + + JXL_ASSERT(qf_higher / qf_lower < 253); + + constexpr int kOriginalComparisonRound = 1; + int iters = kMaxButteraugliIters; + if (cparams.speed_tier != SpeedTier::kTortoise) { + iters = 2; + } + for (int i = 0; i < iters + 1; ++i) { + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) { + printf("\nQuantization field:\n"); + for (size_t y = 0; y < quant_field.ysize(); ++y) { + for (size_t x = 0; x < quant_field.xsize(); ++x) { + printf(" %.5f", quant_field.Row(y)[x]); + } + printf("\n"); + } + } + quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field); + ImageBundle dec_linear = RoundtripImage(opsin, enc_state, cms, pool); + float score; + ImageF diffmap; + JXL_CHECK(comparator.CompareWith(dec_linear, &diffmap, &score)); + if (!lower_is_better) { + score = -score; + ScaleImage(-1.0f, &diffmap); + } + tile_distmap = TileDistMap(diffmap, 8 * cparams.resampling, 0, + enc_state->shared.ac_strategy); + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION && WantDebugOutput(cparams)) { + DumpImage(cparams, ("dec" + ToString(i)).c_str(), *dec_linear.color()); + DumpHeatmaps(cparams, aux_out, butteraugli_target, quant_field, + tile_distmap, diffmap); + } + if (aux_out != nullptr) ++aux_out->num_butteraugli_iters; + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION) { + float minval, maxval; + ImageMinMax(quant_field, &minval, &maxval); + printf("\nButteraugli iter: %d/%d\n", i, kMaxButteraugliIters); + printf("Butteraugli distance: %f (target = %f)\n", score, + original_butteraugli); + printf("quant range: %f ... %f DC quant: %f\n", minval, maxval, + initial_quant_dc); + if (FLAGS_dump_quant_state) { + quantizer.DumpQuantizationMap(raw_quant_field); + } + } + + if (i == iters) break; + + double kPow[8] = { + 0.2, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + }; + double kPowMod[8] = { + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + }; + if (i == kOriginalComparisonRound) { + // Don't allow optimization to make the quant field a lot worse than + // what the initial guess was. This allows the AC field to have enough + // precision to reduce the oscillations due to the dc reconstruction. + double kInitMul = 0.6; + const double kOneMinusInitMul = 1.0 - kInitMul; + for (size_t y = 0; y < quant_field.ysize(); ++y) { + float* const JXL_RESTRICT row_q = quant_field.Row(y); + const float* const JXL_RESTRICT row_init = initial_quant_field.Row(y); + for (size_t x = 0; x < quant_field.xsize(); ++x) { + double clamp = kOneMinusInitMul * row_q[x] + kInitMul * row_init[x]; + if (row_q[x] < clamp) { + row_q[x] = clamp; + if (row_q[x] > qf_higher) row_q[x] = qf_higher; + if (row_q[x] < qf_lower) row_q[x] = qf_lower; + } + } + } + } + + double cur_pow = 0.0; + if (i < 7) { + cur_pow = kPow[i] + (original_butteraugli - 1.0) * kPowMod[i]; + if (cur_pow < 0) { + cur_pow = 0; + } + } + if (cur_pow == 0.0) { + for (size_t y = 0; y < quant_field.ysize(); ++y) { + const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y); + float* const JXL_RESTRICT row_q = quant_field.Row(y); + for (size_t x = 0; x < quant_field.xsize(); ++x) { + const float diff = row_dist[x] / original_butteraugli; + if (diff > 1.0f) { + float old = row_q[x]; + row_q[x] *= diff; + int qf_old = old * quantizer.InvGlobalScale() + 0.5; + int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5; + if (qf_old == qf_new) { + row_q[x] = old + quantizer.Scale(); + } + } + if (row_q[x] > qf_higher) row_q[x] = qf_higher; + if (row_q[x] < qf_lower) row_q[x] = qf_lower; + } + } + } else { + for (size_t y = 0; y < quant_field.ysize(); ++y) { + const float* const JXL_RESTRICT row_dist = tile_distmap.Row(y); + float* const JXL_RESTRICT row_q = quant_field.Row(y); + for (size_t x = 0; x < quant_field.xsize(); ++x) { + const float diff = row_dist[x] / original_butteraugli; + if (diff <= 1.0f) { + row_q[x] *= std::pow(diff, cur_pow); + } else { + float old = row_q[x]; + row_q[x] *= diff; + int qf_old = old * quantizer.InvGlobalScale() + 0.5; + int qf_new = row_q[x] * quantizer.InvGlobalScale() + 0.5; + if (qf_old == qf_new) { + row_q[x] = old + quantizer.Scale(); + } + } + if (row_q[x] > qf_higher) row_q[x] = qf_higher; + if (row_q[x] < qf_lower) row_q[x] = qf_lower; + } + } + } + } + quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field); +} + +void FindBestQuantizationMaxError(const Image3F& opsin, + PassesEncoderState* enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out) { + // TODO(szabadka): Make this work for non-opsin color spaces. + const CompressParams& cparams = enc_state->cparams; + Quantizer& quantizer = enc_state->shared.quantizer; + ImageI& raw_quant_field = enc_state->shared.raw_quant_field; + ImageF& quant_field = enc_state->initial_quant_field; + + // TODO(veluca): better choice of this value. + const float initial_quant_dc = + 16 * std::sqrt(0.1f / cparams.butteraugli_distance); + AdjustQuantField(enc_state->shared.ac_strategy, Rect(quant_field), + cparams.original_butteraugli_distance, &quant_field); + + const float inv_max_err[3] = {1.0f / enc_state->cparams.max_error[0], + 1.0f / enc_state->cparams.max_error[1], + 1.0f / enc_state->cparams.max_error[2]}; + + for (int i = 0; i < kMaxButteraugliIters + 1; ++i) { + quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field); + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION && aux_out) { + DumpXybImage(cparams, ("ops" + ToString(i)).c_str(), opsin); + } + ImageBundle decoded = RoundtripImage(opsin, enc_state, cms, pool); + if (JXL_DEBUG_ADAPTIVE_QUANTIZATION && aux_out) { + DumpXybImage(cparams, ("dec" + ToString(i)).c_str(), *decoded.color()); + } + for (size_t by = 0; by < enc_state->shared.frame_dim.ysize_blocks; by++) { + AcStrategyRow ac_strategy_row = + enc_state->shared.ac_strategy.ConstRow(by); + for (size_t bx = 0; bx < enc_state->shared.frame_dim.xsize_blocks; bx++) { + AcStrategy acs = ac_strategy_row[bx]; + if (!acs.IsFirstBlock()) continue; + float max_error = 0; + for (size_t c = 0; c < 3; c++) { + for (size_t y = by * kBlockDim; + y < (by + acs.covered_blocks_y()) * kBlockDim; y++) { + if (y >= decoded.ysize()) continue; + const float* JXL_RESTRICT in_row = opsin.ConstPlaneRow(c, y); + const float* JXL_RESTRICT dec_row = + decoded.color()->ConstPlaneRow(c, y); + for (size_t x = bx * kBlockDim; + x < (bx + acs.covered_blocks_x()) * kBlockDim; x++) { + if (x >= decoded.xsize()) continue; + max_error = std::max( + std::abs(in_row[x] - dec_row[x]) * inv_max_err[c], max_error); + } + } + } + // Target an error between max_error/2 and max_error. + // If the error in the varblock is above the target, increase the qf to + // compensate. If the error is below the target, decrease the qf. + // However, to avoid an excessive increase of the qf, only do so if the + // error is less than half the maximum allowed error. + const float qf_mul = (max_error < 0.5f) ? max_error * 2.0f + : (max_error > 1.0f) ? max_error + : 1.0f; + for (size_t qy = by; qy < by + acs.covered_blocks_y(); qy++) { + float* JXL_RESTRICT quant_field_row = quant_field.Row(qy); + for (size_t qx = bx; qx < bx + acs.covered_blocks_x(); qx++) { + quant_field_row[qx] *= qf_mul; + } + } + } + } + } + quantizer.SetQuantField(initial_quant_dc, quant_field, &raw_quant_field); +} + +} // namespace + +void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect, + float butteraugli_target, ImageF* quant_field) { + // Replace the whole quant_field in non-8x8 blocks with the maximum of each + // 8x8 block. + size_t stride = quant_field->PixelsPerRow(); + + // At low distances it is great to use max, but mean works better + // at high distances. We interpolate between them for a distance + // range. + float mean_max_mixer = 1.0f; + { + static const float kLimit = 1.54138f; + static const float kMul = 0.56391f; + static const float kMin = 0.0f; + if (butteraugli_target > kLimit) { + mean_max_mixer -= (butteraugli_target - kLimit) * kMul; + if (mean_max_mixer < kMin) { + mean_max_mixer = kMin; + } + } + } + for (size_t y = 0; y < rect.ysize(); ++y) { + AcStrategyRow ac_strategy_row = ac_strategy.ConstRow(rect, y); + float* JXL_RESTRICT quant_row = rect.Row(quant_field, y); + for (size_t x = 0; x < rect.xsize(); ++x) { + AcStrategy acs = ac_strategy_row[x]; + if (!acs.IsFirstBlock()) continue; + JXL_ASSERT(x + acs.covered_blocks_x() <= quant_field->xsize()); + JXL_ASSERT(y + acs.covered_blocks_y() <= quant_field->ysize()); + float max = quant_row[x]; + float mean = 0.0; + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + mean += quant_row[x + ix + iy * stride]; + max = std::max(quant_row[x + ix + iy * stride], max); + } + } + mean /= acs.covered_blocks_y() * acs.covered_blocks_x(); + if (acs.covered_blocks_y() * acs.covered_blocks_x() >= 4) { + max *= mean_max_mixer; + max += (1.0f - mean_max_mixer) * mean; + } + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + quant_row[x + ix + iy * stride] = max; + } + } + } + } +} + +float InitialQuantDC(float butteraugli_target) { + const float kDcMul = 0.3; // Butteraugli target where non-linearity kicks in. + const float butteraugli_target_dc = std::max( + 0.5f * butteraugli_target, + std::min(butteraugli_target, + kDcMul * std::pow((1.0f / kDcMul) * butteraugli_target, + kDcQuantPow))); + // We want the maximum DC value to be at most 2**15 * kInvDCQuant / quant_dc. + // The maximum DC value might not be in the kXybRange because of inverse + // gaborish, so we add some slack to the maximum theoretical quant obtained + // this way (64). + return std::min(kDcQuant / butteraugli_target_dc, 50.f); +} + +ImageF InitialQuantField(const float butteraugli_target, const Image3F& opsin, + const FrameDimensions& frame_dim, ThreadPool* pool, + float rescale, ImageF* mask) { + const float quant_ac = kAcQuant / butteraugli_target; + return HWY_DYNAMIC_DISPATCH(AdaptiveQuantizationMap)( + butteraugli_target, opsin, frame_dim, quant_ac * rescale, pool, mask); +} + +void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin, + PassesEncoderState* enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out, double rescale) { + const CompressParams& cparams = enc_state->cparams; + if (cparams.max_error_mode) { + FindBestQuantizationMaxError(opsin, enc_state, cms, pool, aux_out); + } else if (cparams.speed_tier <= SpeedTier::kKitten) { + // Normal encoding to a butteraugli score. + FindBestQuantization(*linear, opsin, enc_state, cms, pool, aux_out); + } +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.h b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.h new file mode 100644 index 0000000000..730cec6dcb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_adaptive_quantization.h @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_ +#define LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_ + +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" +#include "lib/jxl/splines.h" + +// Heuristics to find a good quantizer for a given image. InitialQuantField +// produces a quantization field (i.e. relative quantization amounts for each +// block) out of an opsin-space image. `InitialQuantField` uses heuristics, +// `FindBestQuantizer` (in non-fast mode) will run multiple encoding-decoding +// steps and try to improve the given quant field. + +namespace jxl { + +struct AuxOut; + +// Returns an image subsampled by kBlockDim in each direction. If the value +// at pixel (x,y) in the returned image is greater than 1.0, it means that +// more fine-grained quantization should be used in the corresponding block +// of the input image, while a value less than 1.0 indicates that less +// fine-grained quantization should be enough. Returns a mask, too, which +// can later be used to make better decisions about ac strategy. +ImageF InitialQuantField(float butteraugli_target, const Image3F& opsin, + const FrameDimensions& frame_dim, ThreadPool* pool, + float rescale, ImageF* initial_quant_mask); + +float InitialQuantDC(float butteraugli_target); + +void AdjustQuantField(const AcStrategyImage& ac_strategy, const Rect& rect, + float butteraugli_target, ImageF* quant_field); + +// Returns a quantizer that uses an adjusted version of the provided +// quant_field. Also computes the dequant_map corresponding to the given +// dequant_float_map and chosen quantization levels. +// `linear` is only used in Kitten mode or slower. +void FindBestQuantizer(const ImageBundle* linear, const Image3F& opsin, + PassesEncoderState* enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out, double rescale = 1.0); + +} // namespace jxl + +#endif // LIB_JXL_ENC_ADAPTIVE_QUANTIZATION_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ans.cc b/third-party/libjxl/libjxl/lib/jxl/enc_ans.cc new file mode 100644 index 0000000000..564ceba71b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ans.cc @@ -0,0 +1,1691 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_ans.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/ans_common.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_cluster.h" +#include "lib/jxl/enc_context_map.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_huffman.h" +#include "lib/jxl/fast_math-inl.h" +#include "lib/jxl/fields.h" + +namespace jxl { + +namespace { + +#if !JXL_IS_DEBUG_BUILD +constexpr +#endif + bool ans_fuzzer_friendly_ = false; + +static const int kMaxNumSymbolsForSmallCode = 4; + +void ANSBuildInfoTable(const ANSHistBin* counts, const AliasTable::Entry* table, + size_t alphabet_size, size_t log_alpha_size, + ANSEncSymbolInfo* info) { + size_t log_entry_size = ANS_LOG_TAB_SIZE - log_alpha_size; + size_t entry_size_minus_1 = (1 << log_entry_size) - 1; + // create valid alias table for empty streams. + for (size_t s = 0; s < std::max(1, alphabet_size); ++s) { + const ANSHistBin freq = s == alphabet_size ? ANS_TAB_SIZE : counts[s]; + info[s].freq_ = static_cast(freq); +#ifdef USE_MULT_BY_RECIPROCAL + if (freq != 0) { + info[s].ifreq_ = + ((1ull << RECIPROCAL_PRECISION) + info[s].freq_ - 1) / info[s].freq_; + } else { + info[s].ifreq_ = 1; // shouldn't matter (symbol shouldn't occur), but... + } +#endif + info[s].reverse_map_.resize(freq); + } + for (int i = 0; i < ANS_TAB_SIZE; i++) { + AliasTable::Symbol s = + AliasTable::Lookup(table, i, log_entry_size, entry_size_minus_1); + info[s.value].reverse_map_[s.offset] = i; + } +} + +float EstimateDataBits(const ANSHistBin* histogram, const ANSHistBin* counts, + size_t len) { + float sum = 0.0f; + int total_histogram = 0; + int total_counts = 0; + for (size_t i = 0; i < len; ++i) { + total_histogram += histogram[i]; + total_counts += counts[i]; + if (histogram[i] > 0) { + JXL_ASSERT(counts[i] > 0); + // += histogram[i] * -log(counts[i]/total_counts) + sum += histogram[i] * + std::max(0.0f, ANS_LOG_TAB_SIZE - FastLog2f(counts[i])); + } + } + if (total_histogram > 0) { + // Used only in assert. + (void)total_counts; + JXL_ASSERT(total_counts == ANS_TAB_SIZE); + } + return sum; +} + +float EstimateDataBitsFlat(const ANSHistBin* histogram, size_t len) { + const float flat_bits = std::max(FastLog2f(len), 0.0f); + float total_histogram = 0; + for (size_t i = 0; i < len; ++i) { + total_histogram += histogram[i]; + } + return total_histogram * flat_bits; +} + +// Static Huffman code for encoding logcounts. The last symbol is used as RLE +// sequence. +static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 2] = { + 5, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 6, 7, 7, +}; +static const uint8_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 2] = { + 17, 11, 15, 3, 9, 7, 4, 2, 5, 6, 0, 33, 1, 65, +}; + +// Returns the difference between largest count that can be represented and is +// smaller than "count" and smallest representable count larger than "count". +static int SmallestIncrement(uint32_t count, uint32_t shift) { + int bits = count == 0 ? -1 : FloorLog2Nonzero(count); + int drop_bits = bits - GetPopulationCountPrecision(bits, shift); + return drop_bits < 0 ? 1 : (1 << drop_bits); +} + +template +bool RebalanceHistogram(const float* targets, int max_symbol, int table_size, + uint32_t shift, int* omit_pos, ANSHistBin* counts) { + int sum = 0; + float sum_nonrounded = 0.0; + int remainder_pos = 0; // if all of them are handled in first loop + int remainder_log = -1; + for (int n = 0; n < max_symbol; ++n) { + if (targets[n] > 0 && targets[n] < 1.0f) { + counts[n] = 1; + sum_nonrounded += targets[n]; + sum += counts[n]; + } + } + const float discount_ratio = + (table_size - sum) / (table_size - sum_nonrounded); + JXL_ASSERT(discount_ratio > 0); + JXL_ASSERT(discount_ratio <= 1.0f); + // Invariant for minimize_error_of_sum == true: + // abs(sum - sum_nonrounded) + // <= SmallestIncrement(max(targets[])) + max_symbol + for (int n = 0; n < max_symbol; ++n) { + if (targets[n] >= 1.0f) { + sum_nonrounded += targets[n]; + counts[n] = + static_cast(targets[n] * discount_ratio); // truncate + if (counts[n] == 0) counts[n] = 1; + if (counts[n] == table_size) counts[n] = table_size - 1; + // Round the count to the closest nonzero multiple of SmallestIncrement + // (when minimize_error_of_sum is false) or one of two closest so as to + // keep the sum as close as possible to sum_nonrounded. + int inc = SmallestIncrement(counts[n], shift); + counts[n] -= counts[n] & (inc - 1); + // TODO(robryk): Should we rescale targets[n]? + const float target = + minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n]; + if (counts[n] == 0 || + (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) { + counts[n] += inc; + } + sum += counts[n]; + const int count_log = FloorLog2Nonzero(static_cast(counts[n])); + if (count_log > remainder_log) { + remainder_pos = n; + remainder_log = count_log; + } + } + } + JXL_ASSERT(remainder_pos != -1); + // NOTE: This is the only place where counts could go negative. We could + // detect that, return false and make ANSHistBin uint32_t. + counts[remainder_pos] -= sum - table_size; + *omit_pos = remainder_pos; + return counts[remainder_pos] > 0; +} + +Status NormalizeCounts(ANSHistBin* counts, int* omit_pos, const int length, + const int precision_bits, uint32_t shift, + int* num_symbols, int* symbols) { + const int32_t table_size = 1 << precision_bits; // target sum / table size + uint64_t total = 0; + int max_symbol = 0; + int symbol_count = 0; + for (int n = 0; n < length; ++n) { + total += counts[n]; + if (counts[n] > 0) { + if (symbol_count < kMaxNumSymbolsForSmallCode) { + symbols[symbol_count] = n; + } + ++symbol_count; + max_symbol = n + 1; + } + } + *num_symbols = symbol_count; + if (symbol_count == 0) { + return true; + } + if (symbol_count == 1) { + counts[symbols[0]] = table_size; + return true; + } + if (symbol_count > table_size) + return JXL_FAILURE("Too many entries in an ANS histogram"); + + const float norm = 1.f * table_size / total; + std::vector targets(max_symbol); + for (size_t n = 0; n < targets.size(); ++n) { + targets[n] = norm * counts[n]; + } + if (!RebalanceHistogram(&targets[0], max_symbol, table_size, shift, + omit_pos, counts)) { + // Use an alternative rebalancing mechanism if the one above failed + // to create a histogram that is positive wherever the original one was. + if (!RebalanceHistogram(&targets[0], max_symbol, table_size, shift, + omit_pos, counts)) { + return JXL_FAILURE("Logic error: couldn't rebalance a histogram"); + } + } + return true; +} + +struct SizeWriter { + size_t size = 0; + void Write(size_t num, size_t bits) { size += num; } +}; + +template +void StoreVarLenUint8(size_t n, Writer* writer) { + JXL_DASSERT(n <= 255); + if (n == 0) { + writer->Write(1, 0); + } else { + writer->Write(1, 1); + size_t nbits = FloorLog2Nonzero(n); + writer->Write(3, nbits); + writer->Write(nbits, n - (1ULL << nbits)); + } +} + +template +void StoreVarLenUint16(size_t n, Writer* writer) { + JXL_DASSERT(n <= 65535); + if (n == 0) { + writer->Write(1, 0); + } else { + writer->Write(1, 1); + size_t nbits = FloorLog2Nonzero(n); + writer->Write(4, nbits); + writer->Write(nbits, n - (1ULL << nbits)); + } +} + +template +bool EncodeCounts(const ANSHistBin* counts, const int alphabet_size, + const int omit_pos, const int num_symbols, uint32_t shift, + const int* symbols, Writer* writer) { + bool ok = true; + if (num_symbols <= 2) { + // Small tree marker to encode 1-2 symbols. + writer->Write(1, 1); + if (num_symbols == 0) { + writer->Write(1, 0); + StoreVarLenUint8(0, writer); + } else { + writer->Write(1, num_symbols - 1); + for (int i = 0; i < num_symbols; ++i) { + StoreVarLenUint8(symbols[i], writer); + } + } + if (num_symbols == 2) { + writer->Write(ANS_LOG_TAB_SIZE, counts[symbols[0]]); + } + } else { + // Mark non-small tree. + writer->Write(1, 0); + // Mark non-flat histogram. + writer->Write(1, 0); + + // Precompute sequences for RLE encoding. Contains the number of identical + // values starting at a given index. Only contains the value at the first + // element of the series. + std::vector same(alphabet_size, 0); + int last = 0; + for (int i = 1; i < alphabet_size; i++) { + // Store the sequence length once different symbol reached, or we're at + // the end, or the length is longer than we can encode, or we are at + // the omit_pos. We don't support including the omit_pos in an RLE + // sequence because this value may use a different amount of log2 bits + // than standard, it is too complex to handle in the decoder. + if (counts[i] != counts[last] || i + 1 == alphabet_size || + (i - last) >= 255 || i == omit_pos || i == omit_pos + 1) { + same[last] = (i - last); + last = i + 1; + } + } + + int length = 0; + std::vector logcounts(alphabet_size); + int omit_log = 0; + for (int i = 0; i < alphabet_size; ++i) { + JXL_ASSERT(counts[i] <= ANS_TAB_SIZE); + JXL_ASSERT(counts[i] >= 0); + if (i == omit_pos) { + length = i + 1; + } else if (counts[i] > 0) { + logcounts[i] = FloorLog2Nonzero(static_cast(counts[i])) + 1; + length = i + 1; + if (i < omit_pos) { + omit_log = std::max(omit_log, logcounts[i] + 1); + } else { + omit_log = std::max(omit_log, logcounts[i]); + } + } + } + logcounts[omit_pos] = omit_log; + + // Elias gamma-like code for shift. Only difference is that if the number + // of bits to be encoded is equal to FloorLog2(ANS_LOG_TAB_SIZE+1), we skip + // the terminating 0 in unary coding. + int upper_bound_log = FloorLog2Nonzero(ANS_LOG_TAB_SIZE + 1); + int log = FloorLog2Nonzero(shift + 1); + writer->Write(log, (1 << log) - 1); + if (log != upper_bound_log) writer->Write(1, 0); + writer->Write(log, ((1 << log) - 1) & (shift + 1)); + + // Since num_symbols >= 3, we know that length >= 3, therefore we encode + // length - 3. + if (length - 3 > 255) { + // Pretend that everything is OK, but complain about correctness later. + StoreVarLenUint8(255, writer); + ok = false; + } else { + StoreVarLenUint8(length - 3, writer); + } + + // The logcount values are encoded with a static Huffman code. + static const size_t kMinReps = 4; + size_t rep = ANS_LOG_TAB_SIZE + 1; + for (int i = 0; i < length; ++i) { + if (i > 0 && same[i - 1] > kMinReps) { + // Encode the RLE symbol and skip the repeated ones. + writer->Write(kLogCountBitLengths[rep], kLogCountSymbols[rep]); + StoreVarLenUint8(same[i - 1] - kMinReps - 1, writer); + i += same[i - 1] - 2; + continue; + } + writer->Write(kLogCountBitLengths[logcounts[i]], + kLogCountSymbols[logcounts[i]]); + } + for (int i = 0; i < length; ++i) { + if (i > 0 && same[i - 1] > kMinReps) { + // Skip symbols encoded by RLE. + i += same[i - 1] - 2; + continue; + } + if (logcounts[i] > 1 && i != omit_pos) { + int bitcount = GetPopulationCountPrecision(logcounts[i] - 1, shift); + int drop_bits = logcounts[i] - 1 - bitcount; + JXL_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0); + writer->Write(bitcount, (counts[i] >> drop_bits) - (1 << bitcount)); + } + } + } + return ok; +} + +void EncodeFlatHistogram(const int alphabet_size, BitWriter* writer) { + // Mark non-small tree. + writer->Write(1, 0); + // Mark uniform histogram. + writer->Write(1, 1); + JXL_ASSERT(alphabet_size > 0); + // Encode alphabet size. + StoreVarLenUint8(alphabet_size - 1, writer); +} + +float ComputeHistoAndDataCost(const ANSHistBin* histogram, size_t alphabet_size, + uint32_t method) { + if (method == 0) { // Flat code + return ANS_LOG_TAB_SIZE + 2 + + EstimateDataBitsFlat(histogram, alphabet_size); + } + // Non-flat: shift = method-1. + uint32_t shift = method - 1; + std::vector counts(histogram, histogram + alphabet_size); + int omit_pos = 0; + int num_symbols; + int symbols[kMaxNumSymbolsForSmallCode] = {}; + JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size, + ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols)); + SizeWriter writer; + // Ignore the correctness, no real encoding happens at this stage. + (void)EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols, shift, + symbols, &writer); + return writer.size + + EstimateDataBits(histogram, counts.data(), alphabet_size); +} + +uint32_t ComputeBestMethod( + const ANSHistBin* histogram, size_t alphabet_size, float* cost, + HistogramParams::ANSHistogramStrategy ans_histogram_strategy) { + size_t method = 0; + float fcost = ComputeHistoAndDataCost(histogram, alphabet_size, 0); + auto try_shift = [&](size_t shift) { + float c = ComputeHistoAndDataCost(histogram, alphabet_size, shift + 1); + if (c < fcost) { + method = shift + 1; + fcost = c; + } + }; + switch (ans_histogram_strategy) { + case HistogramParams::ANSHistogramStrategy::kPrecise: { + for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift++) { + try_shift(shift); + } + break; + } + case HistogramParams::ANSHistogramStrategy::kApproximate: { + for (uint32_t shift = 0; shift <= ANS_LOG_TAB_SIZE; shift += 2) { + try_shift(shift); + } + break; + } + case HistogramParams::ANSHistogramStrategy::kFast: { + try_shift(0); + try_shift(ANS_LOG_TAB_SIZE / 2); + try_shift(ANS_LOG_TAB_SIZE); + break; + } + }; + *cost = fcost; + return method; +} + +} // namespace + +// Returns an estimate of the cost of encoding this histogram and the +// corresponding data. +size_t BuildAndStoreANSEncodingData( + HistogramParams::ANSHistogramStrategy ans_histogram_strategy, + const ANSHistBin* histogram, size_t alphabet_size, size_t log_alpha_size, + bool use_prefix_code, ANSEncSymbolInfo* info, BitWriter* writer) { + if (use_prefix_code) { + if (alphabet_size <= 1) return 0; + std::vector histo(alphabet_size); + for (size_t i = 0; i < alphabet_size; i++) { + histo[i] = histogram[i]; + JXL_CHECK(histogram[i] >= 0); + } + size_t cost = 0; + { + std::vector depths(alphabet_size); + std::vector bits(alphabet_size); + if (writer == nullptr) { + BitWriter tmp_writer; + BitWriter::Allotment allotment( + &tmp_writer, 8 * alphabet_size + 8); // safe upper bound + BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(), + bits.data(), &tmp_writer); + allotment.ReclaimAndCharge(&tmp_writer, 0, /*aux_out=*/nullptr); + cost = tmp_writer.BitsWritten(); + } else { + size_t start = writer->BitsWritten(); + BuildAndStoreHuffmanTree(histo.data(), alphabet_size, depths.data(), + bits.data(), writer); + cost = writer->BitsWritten() - start; + } + for (size_t i = 0; i < alphabet_size; i++) { + info[i].bits = depths[i] == 0 ? 0 : bits[i]; + info[i].depth = depths[i]; + } + } + // Estimate data cost. + for (size_t i = 0; i < alphabet_size; i++) { + cost += histogram[i] * info[i].depth; + } + return cost; + } + JXL_ASSERT(alphabet_size <= ANS_TAB_SIZE); + // Ensure we ignore trailing zeros in the histogram. + if (alphabet_size != 0) { + size_t largest_symbol = 0; + for (size_t i = 0; i < alphabet_size; i++) { + if (histogram[i] != 0) largest_symbol = i; + } + alphabet_size = largest_symbol + 1; + } + float cost; + uint32_t method = ComputeBestMethod(histogram, alphabet_size, &cost, + ans_histogram_strategy); + JXL_ASSERT(cost >= 0); + int num_symbols; + int symbols[kMaxNumSymbolsForSmallCode] = {}; + std::vector counts(histogram, histogram + alphabet_size); + if (!counts.empty()) { + size_t sum = 0; + for (size_t i = 0; i < counts.size(); i++) { + sum += counts[i]; + } + if (sum == 0) { + counts[0] = ANS_TAB_SIZE; + } + } + if (method == 0) { + counts = CreateFlatHistogram(alphabet_size, ANS_TAB_SIZE); + AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE]; + InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a); + ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info); + if (writer != nullptr) { + EncodeFlatHistogram(alphabet_size, writer); + } + return cost; + } + int omit_pos = 0; + uint32_t shift = method - 1; + JXL_CHECK(NormalizeCounts(counts.data(), &omit_pos, alphabet_size, + ANS_LOG_TAB_SIZE, shift, &num_symbols, symbols)); + AliasTable::Entry a[ANS_MAX_ALPHABET_SIZE]; + InitAliasTable(counts, ANS_TAB_SIZE, log_alpha_size, a); + ANSBuildInfoTable(counts.data(), a, alphabet_size, log_alpha_size, info); + if (writer != nullptr) { + bool ok = EncodeCounts(counts.data(), alphabet_size, omit_pos, num_symbols, + shift, symbols, writer); + (void)ok; + JXL_DASSERT(ok); + } + return cost; +} + +float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size) { + float c; + ComputeBestMethod(data, alphabet_size, &c, + HistogramParams::ANSHistogramStrategy::kFast); + return c; +} + +template +void EncodeUintConfig(const HybridUintConfig uint_config, Writer* writer, + size_t log_alpha_size) { + writer->Write(CeilLog2Nonzero(log_alpha_size + 1), + uint_config.split_exponent); + if (uint_config.split_exponent == log_alpha_size) { + return; // msb/lsb don't matter. + } + size_t nbits = CeilLog2Nonzero(uint_config.split_exponent + 1); + writer->Write(nbits, uint_config.msb_in_token); + nbits = CeilLog2Nonzero(uint_config.split_exponent - + uint_config.msb_in_token + 1); + writer->Write(nbits, uint_config.lsb_in_token); +} +template +void EncodeUintConfigs(const std::vector& uint_config, + Writer* writer, size_t log_alpha_size) { + // TODO(veluca): RLE? + for (size_t i = 0; i < uint_config.size(); i++) { + EncodeUintConfig(uint_config[i], writer, log_alpha_size); + } +} +template void EncodeUintConfigs(const std::vector&, + BitWriter*, size_t); + +namespace { + +void ChooseUintConfigs(const HistogramParams& params, + const std::vector>& tokens, + const std::vector& context_map, + std::vector* clustered_histograms, + EntropyEncodingData* codes, size_t* log_alpha_size) { + codes->uint_config.resize(clustered_histograms->size()); + + if (params.uint_method == HistogramParams::HybridUintMethod::kNone) return; + if (params.uint_method == HistogramParams::HybridUintMethod::k000) { + codes->uint_config.clear(); + codes->uint_config.resize(clustered_histograms->size(), + HybridUintConfig(0, 0, 0)); + return; + } + if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) { + codes->uint_config.clear(); + codes->uint_config.resize(clustered_histograms->size(), + HybridUintConfig(2, 0, 1)); + return; + } + + // Brute-force method that tries a few options. + std::vector configs; + if (params.uint_method == HistogramParams::HybridUintMethod::kBest) { + configs = { + HybridUintConfig(4, 2, 0), // default + HybridUintConfig(4, 1, 0), // less precise + HybridUintConfig(4, 2, 1), // add sign + HybridUintConfig(4, 2, 2), // add sign+parity + HybridUintConfig(4, 1, 2), // add parity but less msb + // Same as above, but more direct coding. + HybridUintConfig(5, 2, 0), HybridUintConfig(5, 1, 0), + HybridUintConfig(5, 2, 1), HybridUintConfig(5, 2, 2), + HybridUintConfig(5, 1, 2), + // Same as above, but less direct coding. + HybridUintConfig(3, 2, 0), HybridUintConfig(3, 1, 0), + HybridUintConfig(3, 2, 1), HybridUintConfig(3, 1, 2), + // For near-lossless. + HybridUintConfig(4, 1, 3), HybridUintConfig(5, 1, 4), + HybridUintConfig(5, 2, 3), HybridUintConfig(6, 1, 5), + HybridUintConfig(6, 2, 4), HybridUintConfig(6, 0, 0), + // Other + HybridUintConfig(0, 0, 0), // varlenuint + HybridUintConfig(2, 0, 1), // works well for ctx map + HybridUintConfig(7, 0, 0), // direct coding + HybridUintConfig(8, 0, 0), // direct coding + HybridUintConfig(9, 0, 0), // direct coding + HybridUintConfig(10, 0, 0), // direct coding + HybridUintConfig(11, 0, 0), // direct coding + HybridUintConfig(12, 0, 0), // direct coding + }; + } else if (params.uint_method == HistogramParams::HybridUintMethod::kFast) { + configs = { + HybridUintConfig(4, 2, 0), // default + HybridUintConfig(4, 1, 2), // add parity but less msb + HybridUintConfig(0, 0, 0), // smallest histograms + HybridUintConfig(2, 0, 1), // works well for ctx map + }; + } + + std::vector costs(clustered_histograms->size(), + std::numeric_limits::max()); + std::vector extra_bits(clustered_histograms->size()); + std::vector is_valid(clustered_histograms->size()); + size_t max_alpha = + codes->use_prefix_code ? PREFIX_MAX_ALPHABET_SIZE : ANS_MAX_ALPHABET_SIZE; + for (HybridUintConfig cfg : configs) { + std::fill(is_valid.begin(), is_valid.end(), true); + std::fill(extra_bits.begin(), extra_bits.end(), 0); + + for (size_t i = 0; i < clustered_histograms->size(); i++) { + (*clustered_histograms)[i].Clear(); + } + for (size_t i = 0; i < tokens.size(); ++i) { + for (size_t j = 0; j < tokens[i].size(); ++j) { + const Token token = tokens[i][j]; + // TODO(veluca): do not ignore lz77 commands. + if (token.is_lz77_length) continue; + size_t histo = context_map[token.context]; + uint32_t tok, nbits, bits; + cfg.Encode(token.value, &tok, &nbits, &bits); + if (tok >= max_alpha || + (codes->lz77.enabled && tok >= codes->lz77.min_symbol)) { + is_valid[histo] = false; + continue; + } + extra_bits[histo] += nbits; + (*clustered_histograms)[histo].Add(tok); + } + } + + for (size_t i = 0; i < clustered_histograms->size(); i++) { + if (!is_valid[i]) continue; + float cost = (*clustered_histograms)[i].PopulationCost() + extra_bits[i]; + // add signaling cost of the hybriduintconfig itself + cost += CeilLog2Nonzero(cfg.split_exponent + 1); + cost += CeilLog2Nonzero(cfg.split_exponent - cfg.msb_in_token + 1); + if (cost < costs[i]) { + codes->uint_config[i] = cfg; + costs[i] = cost; + } + } + } + + // Rebuild histograms. + for (size_t i = 0; i < clustered_histograms->size(); i++) { + (*clustered_histograms)[i].Clear(); + } + *log_alpha_size = 4; + for (size_t i = 0; i < tokens.size(); ++i) { + for (size_t j = 0; j < tokens[i].size(); ++j) { + const Token token = tokens[i][j]; + uint32_t tok, nbits, bits; + size_t histo = context_map[token.context]; + (token.is_lz77_length ? codes->lz77.length_uint_config + : codes->uint_config[histo]) + .Encode(token.value, &tok, &nbits, &bits); + tok += token.is_lz77_length ? codes->lz77.min_symbol : 0; + (*clustered_histograms)[histo].Add(tok); + while (tok >= (1u << *log_alpha_size)) (*log_alpha_size)++; + } + } +#if JXL_ENABLE_ASSERT + size_t max_log_alpha_size = codes->use_prefix_code ? PREFIX_MAX_BITS : 8; + JXL_ASSERT(*log_alpha_size <= max_log_alpha_size); +#endif +} + +class HistogramBuilder { + public: + explicit HistogramBuilder(const size_t num_contexts) + : histograms_(num_contexts) {} + + void VisitSymbol(int symbol, size_t histo_idx) { + JXL_DASSERT(histo_idx < histograms_.size()); + histograms_[histo_idx].Add(symbol); + } + + // NOTE: `layer` is only for clustered_entropy; caller does ReclaimAndCharge. + size_t BuildAndStoreEntropyCodes( + const HistogramParams& params, + const std::vector>& tokens, EntropyEncodingData* codes, + std::vector* context_map, bool use_prefix_code, + BitWriter* writer, size_t layer, AuxOut* aux_out) const { + size_t cost = 0; + codes->encoding_info.clear(); + std::vector clustered_histograms(histograms_); + context_map->resize(histograms_.size()); + if (histograms_.size() > 1) { + if (!ans_fuzzer_friendly_) { + std::vector histogram_symbols; + ClusterHistograms(params, histograms_, kClustersLimit, + &clustered_histograms, &histogram_symbols); + for (size_t c = 0; c < histograms_.size(); ++c) { + (*context_map)[c] = static_cast(histogram_symbols[c]); + } + } else { + fill(context_map->begin(), context_map->end(), 0); + size_t max_symbol = 0; + for (const Histogram& h : histograms_) { + max_symbol = std::max(h.data_.size(), max_symbol); + } + size_t num_symbols = 1 << CeilLog2Nonzero(max_symbol + 1); + clustered_histograms.resize(1); + clustered_histograms[0].Clear(); + for (size_t i = 0; i < num_symbols; i++) { + clustered_histograms[0].Add(i); + } + } + if (writer != nullptr) { + EncodeContextMap(*context_map, clustered_histograms.size(), writer, + layer, aux_out); + } + } + if (aux_out != nullptr) { + for (size_t i = 0; i < clustered_histograms.size(); ++i) { + aux_out->layers[layer].clustered_entropy += + clustered_histograms[i].ShannonEntropy(); + } + } + codes->use_prefix_code = use_prefix_code; + size_t log_alpha_size = codes->lz77.enabled ? 8 : 7; // Sane default. + if (ans_fuzzer_friendly_) { + codes->uint_config.clear(); + codes->uint_config.resize(1, HybridUintConfig(7, 0, 0)); + } else { + ChooseUintConfigs(params, tokens, *context_map, &clustered_histograms, + codes, &log_alpha_size); + } + if (log_alpha_size < 5) log_alpha_size = 5; + SizeWriter size_writer; // Used if writer == nullptr to estimate costs. + cost += 1; + if (writer) writer->Write(1, use_prefix_code); + + if (use_prefix_code) { + log_alpha_size = PREFIX_MAX_BITS; + } else { + cost += 2; + } + if (writer == nullptr) { + EncodeUintConfigs(codes->uint_config, &size_writer, log_alpha_size); + } else { + if (!use_prefix_code) writer->Write(2, log_alpha_size - 5); + EncodeUintConfigs(codes->uint_config, writer, log_alpha_size); + } + if (use_prefix_code) { + for (size_t c = 0; c < clustered_histograms.size(); ++c) { + size_t num_symbol = 1; + for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) { + if (clustered_histograms[c].data_[i]) num_symbol = i + 1; + } + if (writer) { + StoreVarLenUint16(num_symbol - 1, writer); + } else { + StoreVarLenUint16(num_symbol - 1, &size_writer); + } + } + } + cost += size_writer.size; + for (size_t c = 0; c < clustered_histograms.size(); ++c) { + size_t num_symbol = 1; + for (size_t i = 0; i < clustered_histograms[c].data_.size(); i++) { + if (clustered_histograms[c].data_[i]) num_symbol = i + 1; + } + codes->encoding_info.emplace_back(); + codes->encoding_info.back().resize(std::max(1, num_symbol)); + + BitWriter::Allotment allotment(writer, 256 + num_symbol * 24); + cost += BuildAndStoreANSEncodingData( + params.ans_histogram_strategy, clustered_histograms[c].data_.data(), + num_symbol, log_alpha_size, use_prefix_code, + codes->encoding_info.back().data(), writer); + allotment.FinishedHistogram(writer); + allotment.ReclaimAndCharge(writer, layer, aux_out); + } + return cost; + } + + const Histogram& Histo(size_t i) const { return histograms_[i]; } + + private: + std::vector histograms_; +}; + +class SymbolCostEstimator { + public: + SymbolCostEstimator(size_t num_contexts, bool force_huffman, + const std::vector>& tokens, + const LZ77Params& lz77) { + HistogramBuilder builder(num_contexts); + // Build histograms for estimating lz77 savings. + HybridUintConfig uint_config; + for (size_t i = 0; i < tokens.size(); ++i) { + for (size_t j = 0; j < tokens[i].size(); ++j) { + const Token token = tokens[i][j]; + uint32_t tok, nbits, bits; + (token.is_lz77_length ? lz77.length_uint_config : uint_config) + .Encode(token.value, &tok, &nbits, &bits); + tok += token.is_lz77_length ? lz77.min_symbol : 0; + builder.VisitSymbol(tok, token.context); + } + } + max_alphabet_size_ = 0; + for (size_t i = 0; i < num_contexts; i++) { + max_alphabet_size_ = + std::max(max_alphabet_size_, builder.Histo(i).data_.size()); + } + bits_.resize(num_contexts * max_alphabet_size_); + // TODO(veluca): SIMD? + add_symbol_cost_.resize(num_contexts); + for (size_t i = 0; i < num_contexts; i++) { + float inv_total = 1.0f / (builder.Histo(i).total_count_ + 1e-8f); + float total_cost = 0; + for (size_t j = 0; j < builder.Histo(i).data_.size(); j++) { + size_t cnt = builder.Histo(i).data_[j]; + float cost = 0; + if (cnt != 0 && cnt != builder.Histo(i).total_count_) { + cost = -FastLog2f(cnt * inv_total); + if (force_huffman) cost = std::ceil(cost); + } else if (cnt == 0) { + cost = ANS_LOG_TAB_SIZE; // Highest possible cost. + } + bits_[i * max_alphabet_size_ + j] = cost; + total_cost += cost * builder.Histo(i).data_[j]; + } + // Penalty for adding a lz77 symbol to this contest (only used for static + // cost model). Higher penalty for contexts that have a very low + // per-symbol entropy. + add_symbol_cost_[i] = std::max(0.0f, 6.0f - total_cost * inv_total); + } + } + float Bits(size_t ctx, size_t sym) const { + return bits_[ctx * max_alphabet_size_ + sym]; + } + float LenCost(size_t ctx, size_t len, const LZ77Params& lz77) const { + uint32_t nbits, bits, tok; + lz77.length_uint_config.Encode(len, &tok, &nbits, &bits); + tok += lz77.min_symbol; + return nbits + Bits(ctx, tok); + } + float DistCost(size_t len, const LZ77Params& lz77) const { + uint32_t nbits, bits, tok; + HybridUintConfig().Encode(len, &tok, &nbits, &bits); + return nbits + Bits(lz77.nonserialized_distance_context, tok); + } + float AddSymbolCost(size_t idx) const { return add_symbol_cost_[idx]; } + + private: + size_t max_alphabet_size_; + std::vector bits_; + std::vector add_symbol_cost_; +}; + +void ApplyLZ77_RLE(const HistogramParams& params, size_t num_contexts, + const std::vector>& tokens, + LZ77Params& lz77, + std::vector>& tokens_lz77) { + // TODO(veluca): tune heuristics here. + SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77); + float bit_decrease = 0; + size_t total_symbols = 0; + tokens_lz77.resize(tokens.size()); + std::vector sym_cost; + HybridUintConfig uint_config; + for (size_t stream = 0; stream < tokens.size(); stream++) { + size_t distance_multiplier = + params.image_widths.size() > stream ? params.image_widths[stream] : 0; + const auto& in = tokens[stream]; + auto& out = tokens_lz77[stream]; + total_symbols += in.size(); + // Cumulative sum of bit costs. + sym_cost.resize(in.size() + 1); + for (size_t i = 0; i < in.size(); i++) { + uint32_t tok, nbits, unused_bits; + uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits); + sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i]; + } + out.reserve(in.size()); + for (size_t i = 0; i < in.size(); i++) { + size_t num_to_copy = 0; + size_t distance_symbol = 0; // 1 for RLE. + if (distance_multiplier != 0) { + distance_symbol = 1; // Special distance 1 if enabled. + JXL_DASSERT(kSpecialDistances[1][0] == 1); + JXL_DASSERT(kSpecialDistances[1][1] == 0); + } + if (i > 0) { + for (; i + num_to_copy < in.size(); num_to_copy++) { + if (in[i + num_to_copy].value != in[i - 1].value) { + break; + } + } + } + if (num_to_copy == 0) { + out.push_back(in[i]); + continue; + } + float cost = sym_cost[i + num_to_copy] - sym_cost[i]; + // This subtraction might overflow, but that's OK. + size_t lz77_len = num_to_copy - lz77.min_length; + float lz77_cost = num_to_copy >= lz77.min_length + ? CeilLog2Nonzero(lz77_len + 1) + 1 + : 0; + if (num_to_copy < lz77.min_length || cost <= lz77_cost) { + for (size_t j = 0; j < num_to_copy; j++) { + out.push_back(in[i + j]); + } + i += num_to_copy - 1; + continue; + } + // Output the LZ77 length + out.emplace_back(in[i].context, lz77_len); + out.back().is_lz77_length = true; + i += num_to_copy - 1; + bit_decrease += cost - lz77_cost; + // Output the LZ77 copy distance. + out.emplace_back(lz77.nonserialized_distance_context, distance_symbol); + } + } + + if (bit_decrease > total_symbols * 0.2 + 16) { + lz77.enabled = true; + } +} + +// Hash chain for LZ77 matching +struct HashChain { + size_t size_; + std::vector data_; + + unsigned hash_num_values_ = 32768; + unsigned hash_mask_ = hash_num_values_ - 1; + unsigned hash_shift_ = 5; + + std::vector head; + std::vector chain; + std::vector val; + + // Speed up repetitions of zero + std::vector headz; + std::vector chainz; + std::vector zeros; + uint32_t numzeros = 0; + + size_t window_size_; + size_t window_mask_; + size_t min_length_; + size_t max_length_; + + // Map of special distance codes. + std::unordered_map special_dist_table_; + size_t num_special_distances_ = 0; + + uint32_t maxchainlength = 256; // window_size_ to allow all + + HashChain(const Token* data, size_t size, size_t window_size, + size_t min_length, size_t max_length, size_t distance_multiplier) + : size_(size), + window_size_(window_size), + window_mask_(window_size - 1), + min_length_(min_length), + max_length_(max_length) { + data_.resize(size); + for (size_t i = 0; i < size; i++) { + data_[i] = data[i].value; + } + + head.resize(hash_num_values_, -1); + val.resize(window_size_, -1); + chain.resize(window_size_); + for (uint32_t i = 0; i < window_size_; ++i) { + chain[i] = i; // same value as index indicates uninitialized + } + + zeros.resize(window_size_); + headz.resize(window_size_ + 1, -1); + chainz.resize(window_size_); + for (uint32_t i = 0; i < window_size_; ++i) { + chainz[i] = i; + } + // Translate distance to special distance code. + if (distance_multiplier) { + // Count down, so if due to small distance multiplier multiple distances + // map to the same code, the smallest code will be used in the end. + for (int i = kNumSpecialDistances - 1; i >= 0; --i) { + int xi = kSpecialDistances[i][0]; + int yi = kSpecialDistances[i][1]; + int distance = yi * distance_multiplier + xi; + // Ensure that we map distance 1 to the lowest symbols. + if (distance < 1) distance = 1; + special_dist_table_[distance] = i; + } + num_special_distances_ = kNumSpecialDistances; + } + } + + uint32_t GetHash(size_t pos) const { + uint32_t result = 0; + if (pos + 2 < size_) { + // TODO(lode): take the MSB's of the uint32_t values into account as well, + // given that the hash code itself is less than 32 bits. + result ^= (uint32_t)(data_[pos + 0] << 0u); + result ^= (uint32_t)(data_[pos + 1] << hash_shift_); + result ^= (uint32_t)(data_[pos + 2] << (hash_shift_ * 2)); + } else { + // No need to compute hash of last 2 bytes, the length 2 is too short. + return 0; + } + return result & hash_mask_; + } + + uint32_t CountZeros(size_t pos, uint32_t prevzeros) const { + size_t end = pos + window_size_; + if (end > size_) end = size_; + if (prevzeros > 0) { + if (prevzeros >= window_mask_ && data_[end - 1] == 0 && + end == pos + window_size_) { + return prevzeros; + } else { + return prevzeros - 1; + } + } + uint32_t num = 0; + while (pos + num < end && data_[pos + num] == 0) num++; + return num; + } + + void Update(size_t pos) { + uint32_t hashval = GetHash(pos); + uint32_t wpos = pos & window_mask_; + + val[wpos] = (int)hashval; + if (head[hashval] != -1) chain[wpos] = head[hashval]; + head[hashval] = wpos; + + if (pos > 0 && data_[pos] != data_[pos - 1]) numzeros = 0; + numzeros = CountZeros(pos, numzeros); + + zeros[wpos] = numzeros; + if (headz[numzeros] != -1) chainz[wpos] = headz[numzeros]; + headz[numzeros] = wpos; + } + + void Update(size_t pos, size_t len) { + for (size_t i = 0; i < len; i++) { + Update(pos + i); + } + } + + template + void FindMatches(size_t pos, int max_dist, const CB& found_match) const { + uint32_t wpos = pos & window_mask_; + uint32_t hashval = GetHash(pos); + uint32_t hashpos = chain[wpos]; + + int prev_dist = 0; + int end = std::min(pos + max_length_, size_); + uint32_t chainlength = 0; + uint32_t best_len = 0; + for (;;) { + int dist = (hashpos <= wpos) ? (wpos - hashpos) + : (wpos - hashpos + window_mask_ + 1); + if (dist < prev_dist) break; + prev_dist = dist; + uint32_t len = 0; + if (dist > 0) { + int i = pos; + int j = pos - dist; + if (numzeros > 3) { + int r = std::min(numzeros - 1, zeros[hashpos]); + if (i + r >= end) r = end - i - 1; + i += r; + j += r; + } + while (i < end && data_[i] == data_[j]) { + i++; + j++; + } + len = i - pos; + // This can trigger even if the new length is slightly smaller than the + // best length, because it is possible for a slightly cheaper distance + // symbol to occur. + if (len >= min_length_ && len + 2 >= best_len) { + auto it = special_dist_table_.find(dist); + int dist_symbol = (it == special_dist_table_.end()) + ? (num_special_distances_ + dist - 1) + : it->second; + found_match(len, dist_symbol); + if (len > best_len) best_len = len; + } + } + + chainlength++; + if (chainlength >= maxchainlength) break; + + if (numzeros >= 3 && len > numzeros) { + if (hashpos == chainz[hashpos]) break; + hashpos = chainz[hashpos]; + if (zeros[hashpos] != numzeros) break; + } else { + if (hashpos == chain[hashpos]) break; + hashpos = chain[hashpos]; + if (val[hashpos] != (int)hashval) break; // outdated hash value + } + } + } + void FindMatch(size_t pos, int max_dist, size_t* result_dist_symbol, + size_t* result_len) const { + *result_dist_symbol = 0; + *result_len = 1; + FindMatches(pos, max_dist, [&](size_t len, size_t dist_symbol) { + if (len > *result_len || + (len == *result_len && *result_dist_symbol > dist_symbol)) { + *result_len = len; + *result_dist_symbol = dist_symbol; + } + }); + } +}; + +float LenCost(size_t len) { + uint32_t nbits, bits, tok; + HybridUintConfig(1, 0, 0).Encode(len, &tok, &nbits, &bits); + constexpr float kCostTable[] = { + 2.797667318563126, 3.213177690381199, 2.5706009246743737, + 2.408392498667534, 2.829649191872326, 3.3923087753324577, + 4.029267451554331, 4.415576699706408, 4.509357574741465, + 9.21481543803004, 10.020590190114898, 11.858671627804766, + 12.45853300490526, 11.713105831990857, 12.561996324849314, + 13.775477692278367, 13.174027068768641, + }; + size_t table_size = sizeof kCostTable / sizeof *kCostTable; + if (tok >= table_size) tok = table_size - 1; + return kCostTable[tok] + nbits; +} + +// TODO(veluca): this does not take into account usage or non-usage of distance +// multipliers. +float DistCost(size_t dist) { + uint32_t nbits, bits, tok; + HybridUintConfig(7, 0, 0).Encode(dist, &tok, &nbits, &bits); + constexpr float kCostTable[] = { + 6.368282626312716, 5.680793277090298, 8.347404197105247, + 7.641619201599141, 6.914328374119438, 7.959808291537444, + 8.70023120759855, 8.71378518934703, 9.379132523982769, + 9.110472749092708, 9.159029569270908, 9.430936766731973, + 7.278284055315169, 7.8278514904267755, 10.026641158289236, + 9.976049229827066, 9.64351607048908, 9.563403863480442, + 10.171474111762747, 10.45950155077234, 9.994813912104219, + 10.322524683741156, 8.465808729388186, 8.756254166066853, + 10.160930174662234, 10.247329273413435, 10.04090403724809, + 10.129398517544082, 9.342311691539546, 9.07608009102374, + 10.104799540677513, 10.378079384990906, 10.165828974075072, + 10.337595322341553, 7.940557464567944, 10.575665823319431, + 11.023344321751955, 10.736144698831827, 11.118277044595054, + 7.468468230648442, 10.738305230932939, 10.906980780216568, + 10.163468216353817, 10.17805759656433, 11.167283670483565, + 11.147050200274544, 10.517921919244333, 10.651764778156886, + 10.17074446448919, 11.217636876224745, 11.261630721139484, + 11.403140815247259, 10.892472096873417, 11.1859607804481, + 8.017346947551262, 7.895143720278828, 11.036577113822025, + 11.170562110315794, 10.326988722591086, 10.40872184751056, + 11.213498225466386, 11.30580635516863, 10.672272515665442, + 10.768069466228063, 11.145257364153565, 11.64668307145549, + 10.593156194627339, 11.207499484844943, 10.767517766396908, + 10.826629811407042, 10.737764794499988, 10.6200448518045, + 10.191315385198092, 8.468384171390085, 11.731295299170432, + 11.824619886654398, 10.41518844301179, 10.16310536548649, + 10.539423685097576, 10.495136599328031, 10.469112847728267, + 11.72057686174922, 10.910326337834674, 11.378921834673758, + 11.847759036098536, 11.92071647623854, 10.810628276345282, + 11.008601085273893, 11.910326337834674, 11.949212023423133, + 11.298614839104337, 11.611603659010392, 10.472930394619985, + 11.835564720850282, 11.523267392285337, 12.01055816679611, + 8.413029688994023, 11.895784139536406, 11.984679534970505, + 11.220654278717394, 11.716311684833672, 10.61036646226114, + 10.89849965960364, 10.203762898863669, 10.997560826267238, + 11.484217379438984, 11.792836176993665, 12.24310468755171, + 11.464858097919262, 12.212747017409377, 11.425595666074955, + 11.572048533398757, 12.742093965163013, 11.381874288645637, + 12.191870445817015, 11.683156920035426, 11.152442115262197, + 11.90303691580457, 11.653292787169159, 11.938615382266098, + 16.970641701570223, 16.853602280380002, 17.26240782594733, + 16.644655390108507, 17.14310889757499, 16.910935455445955, + 17.505678976959697, 17.213498225466388, 2.4162310293553024, + 3.494587244462329, 3.5258600986408344, 3.4959806589517095, + 3.098390886949687, 3.343454654302911, 3.588847442290287, + 4.14614790111827, 5.152948641990529, 7.433696808092598, + 9.716311684833672, + }; + size_t table_size = sizeof kCostTable / sizeof *kCostTable; + if (tok >= table_size) tok = table_size - 1; + return kCostTable[tok] + nbits; +} + +void ApplyLZ77_LZ77(const HistogramParams& params, size_t num_contexts, + const std::vector>& tokens, + LZ77Params& lz77, + std::vector>& tokens_lz77) { + // TODO(veluca): tune heuristics here. + SymbolCostEstimator sce(num_contexts, params.force_huffman, tokens, lz77); + float bit_decrease = 0; + size_t total_symbols = 0; + tokens_lz77.resize(tokens.size()); + HybridUintConfig uint_config; + std::vector sym_cost; + for (size_t stream = 0; stream < tokens.size(); stream++) { + size_t distance_multiplier = + params.image_widths.size() > stream ? params.image_widths[stream] : 0; + const auto& in = tokens[stream]; + auto& out = tokens_lz77[stream]; + total_symbols += in.size(); + // Cumulative sum of bit costs. + sym_cost.resize(in.size() + 1); + for (size_t i = 0; i < in.size(); i++) { + uint32_t tok, nbits, unused_bits; + uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits); + sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i]; + } + + out.reserve(in.size()); + size_t max_distance = in.size(); + size_t min_length = lz77.min_length; + JXL_ASSERT(min_length >= 3); + size_t max_length = in.size(); + + // Use next power of two as window size. + size_t window_size = 1; + while (window_size < max_distance && window_size < kWindowSize) { + window_size <<= 1; + } + + HashChain chain(in.data(), in.size(), window_size, min_length, max_length, + distance_multiplier); + size_t len, dist_symbol; + + const size_t max_lazy_match_len = 256; // 0 to disable lazy matching + + // Whether the next symbol was already updated (to test lazy matching) + bool already_updated = false; + for (size_t i = 0; i < in.size(); i++) { + out.push_back(in[i]); + if (!already_updated) chain.Update(i); + already_updated = false; + chain.FindMatch(i, max_distance, &dist_symbol, &len); + if (len >= min_length) { + if (len < max_lazy_match_len && i + 1 < in.size()) { + // Try length at next symbol lazy matching + chain.Update(i + 1); + already_updated = true; + size_t len2, dist_symbol2; + chain.FindMatch(i + 1, max_distance, &dist_symbol2, &len2); + if (len2 > len) { + // Use the lazy match. Add literal, and use the next length starting + // from the next byte. + ++i; + already_updated = false; + len = len2; + dist_symbol = dist_symbol2; + out.push_back(in[i]); + } + } + + float cost = sym_cost[i + len] - sym_cost[i]; + size_t lz77_len = len - lz77.min_length; + float lz77_cost = LenCost(lz77_len) + DistCost(dist_symbol) + + sce.AddSymbolCost(out.back().context); + + if (lz77_cost <= cost) { + out.back().value = len - min_length; + out.back().is_lz77_length = true; + out.emplace_back(lz77.nonserialized_distance_context, dist_symbol); + bit_decrease += cost - lz77_cost; + } else { + // LZ77 match ignored, and symbol already pushed. Push all other + // symbols and skip. + for (size_t j = 1; j < len; j++) { + out.push_back(in[i + j]); + } + } + + if (already_updated) { + chain.Update(i + 2, len - 2); + already_updated = false; + } else { + chain.Update(i + 1, len - 1); + } + i += len - 1; + } else { + // Literal, already pushed + } + } + } + + if (bit_decrease > total_symbols * 0.2 + 16) { + lz77.enabled = true; + } +} + +void ApplyLZ77_Optimal(const HistogramParams& params, size_t num_contexts, + const std::vector>& tokens, + LZ77Params& lz77, + std::vector>& tokens_lz77) { + std::vector> tokens_for_cost_estimate; + ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_for_cost_estimate); + // If greedy-LZ77 does not give better compression than no-lz77, no reason to + // run the optimal matching. + if (!lz77.enabled) return; + SymbolCostEstimator sce(num_contexts + 1, params.force_huffman, + tokens_for_cost_estimate, lz77); + tokens_lz77.resize(tokens.size()); + HybridUintConfig uint_config; + std::vector sym_cost; + std::vector dist_symbols; + for (size_t stream = 0; stream < tokens.size(); stream++) { + size_t distance_multiplier = + params.image_widths.size() > stream ? params.image_widths[stream] : 0; + const auto& in = tokens[stream]; + auto& out = tokens_lz77[stream]; + // Cumulative sum of bit costs. + sym_cost.resize(in.size() + 1); + for (size_t i = 0; i < in.size(); i++) { + uint32_t tok, nbits, unused_bits; + uint_config.Encode(in[i].value, &tok, &nbits, &unused_bits); + sym_cost[i + 1] = sce.Bits(in[i].context, tok) + nbits + sym_cost[i]; + } + + out.reserve(in.size()); + size_t max_distance = in.size(); + size_t min_length = lz77.min_length; + JXL_ASSERT(min_length >= 3); + size_t max_length = in.size(); + + // Use next power of two as window size. + size_t window_size = 1; + while (window_size < max_distance && window_size < kWindowSize) { + window_size <<= 1; + } + + HashChain chain(in.data(), in.size(), window_size, min_length, max_length, + distance_multiplier); + + struct MatchInfo { + uint32_t len; + uint32_t dist_symbol; + uint32_t ctx; + float total_cost = std::numeric_limits::max(); + }; + // Total cost to encode the first N symbols. + std::vector prefix_costs(in.size() + 1); + prefix_costs[0].total_cost = 0; + + size_t rle_length = 0; + size_t skip_lz77 = 0; + for (size_t i = 0; i < in.size(); i++) { + chain.Update(i); + float lit_cost = + prefix_costs[i].total_cost + sym_cost[i + 1] - sym_cost[i]; + if (prefix_costs[i + 1].total_cost > lit_cost) { + prefix_costs[i + 1].dist_symbol = 0; + prefix_costs[i + 1].len = 1; + prefix_costs[i + 1].ctx = in[i].context; + prefix_costs[i + 1].total_cost = lit_cost; + } + if (skip_lz77 > 0) { + skip_lz77--; + continue; + } + dist_symbols.clear(); + chain.FindMatches(i, max_distance, + [&dist_symbols](size_t len, size_t dist_symbol) { + if (dist_symbols.size() <= len) { + dist_symbols.resize(len + 1, dist_symbol); + } + if (dist_symbol < dist_symbols[len]) { + dist_symbols[len] = dist_symbol; + } + }); + if (dist_symbols.size() <= min_length) continue; + { + size_t best_cost = dist_symbols.back(); + for (size_t j = dist_symbols.size() - 1; j >= min_length; j--) { + if (dist_symbols[j] < best_cost) { + best_cost = dist_symbols[j]; + } + dist_symbols[j] = best_cost; + } + } + for (size_t j = min_length; j < dist_symbols.size(); j++) { + // Cost model that uses results from lazy LZ77. + float lz77_cost = sce.LenCost(in[i].context, j - min_length, lz77) + + sce.DistCost(dist_symbols[j], lz77); + float cost = prefix_costs[i].total_cost + lz77_cost; + if (prefix_costs[i + j].total_cost > cost) { + prefix_costs[i + j].len = j; + prefix_costs[i + j].dist_symbol = dist_symbols[j] + 1; + prefix_costs[i + j].ctx = in[i].context; + prefix_costs[i + j].total_cost = cost; + } + } + // We are in a RLE sequence: skip all the symbols except the first 8 and + // the last 8. This avoid quadratic costs for sequences with long runs of + // the same symbol. + if ((dist_symbols.back() == 0 && distance_multiplier == 0) || + (dist_symbols.back() == 1 && distance_multiplier != 0)) { + rle_length++; + } else { + rle_length = 0; + } + if (rle_length >= 8 && dist_symbols.size() > 9) { + skip_lz77 = dist_symbols.size() - 10; + rle_length = 0; + } + } + size_t pos = in.size(); + while (pos > 0) { + bool is_lz77_length = prefix_costs[pos].dist_symbol != 0; + if (is_lz77_length) { + size_t dist_symbol = prefix_costs[pos].dist_symbol - 1; + out.emplace_back(lz77.nonserialized_distance_context, dist_symbol); + } + size_t val = is_lz77_length ? prefix_costs[pos].len - min_length + : in[pos - 1].value; + out.emplace_back(prefix_costs[pos].ctx, val); + out.back().is_lz77_length = is_lz77_length; + pos -= prefix_costs[pos].len; + } + std::reverse(out.begin(), out.end()); + } +} + +void ApplyLZ77(const HistogramParams& params, size_t num_contexts, + const std::vector>& tokens, LZ77Params& lz77, + std::vector>& tokens_lz77) { + lz77.enabled = false; + if (params.force_huffman) { + lz77.min_symbol = std::min(PREFIX_MAX_ALPHABET_SIZE - 32, 512); + } else { + lz77.min_symbol = 224; + } + if (params.lz77_method == HistogramParams::LZ77Method::kNone) { + return; + } else if (params.lz77_method == HistogramParams::LZ77Method::kRLE) { + ApplyLZ77_RLE(params, num_contexts, tokens, lz77, tokens_lz77); + } else if (params.lz77_method == HistogramParams::LZ77Method::kLZ77) { + ApplyLZ77_LZ77(params, num_contexts, tokens, lz77, tokens_lz77); + } else if (params.lz77_method == HistogramParams::LZ77Method::kOptimal) { + ApplyLZ77_Optimal(params, num_contexts, tokens, lz77, tokens_lz77); + } else { + JXL_UNREACHABLE("Not implemented"); + } +} +} // namespace + +size_t BuildAndEncodeHistograms(const HistogramParams& params, + size_t num_contexts, + std::vector>& tokens, + EntropyEncodingData* codes, + std::vector* context_map, + BitWriter* writer, size_t layer, + AuxOut* aux_out) { + size_t total_bits = 0; + codes->lz77.nonserialized_distance_context = num_contexts; + std::vector> tokens_lz77; + ApplyLZ77(params, num_contexts, tokens, codes->lz77, tokens_lz77); + if (ans_fuzzer_friendly_) { + codes->lz77.length_uint_config = HybridUintConfig(10, 0, 0); + codes->lz77.min_symbol = 2048; + } + + const size_t max_contexts = std::min(num_contexts, kClustersLimit); + BitWriter::Allotment allotment(writer, + 128 + num_contexts * 40 + max_contexts * 96); + if (writer) { + JXL_CHECK(Bundle::Write(codes->lz77, writer, layer, aux_out)); + } else { + size_t ebits, bits; + JXL_CHECK(Bundle::CanEncode(codes->lz77, &ebits, &bits)); + total_bits += bits; + } + if (codes->lz77.enabled) { + if (writer) { + size_t b = writer->BitsWritten(); + EncodeUintConfig(codes->lz77.length_uint_config, writer, + /*log_alpha_size=*/8); + total_bits += writer->BitsWritten() - b; + } else { + SizeWriter size_writer; + EncodeUintConfig(codes->lz77.length_uint_config, &size_writer, + /*log_alpha_size=*/8); + total_bits += size_writer.size; + } + num_contexts += 1; + tokens = std::move(tokens_lz77); + } + size_t total_tokens = 0; + // Build histograms. + HistogramBuilder builder(num_contexts); + HybridUintConfig uint_config; // Default config for clustering. + // Unless we are using the kContextMap histogram option. + if (params.uint_method == HistogramParams::HybridUintMethod::kContextMap) { + uint_config = HybridUintConfig(2, 0, 1); + } + if (params.uint_method == HistogramParams::HybridUintMethod::k000) { + uint_config = HybridUintConfig(0, 0, 0); + } + if (ans_fuzzer_friendly_) { + uint_config = HybridUintConfig(10, 0, 0); + } + for (size_t i = 0; i < tokens.size(); ++i) { + if (codes->lz77.enabled) { + for (size_t j = 0; j < tokens[i].size(); ++j) { + const Token& token = tokens[i][j]; + total_tokens++; + uint32_t tok, nbits, bits; + (token.is_lz77_length ? codes->lz77.length_uint_config : uint_config) + .Encode(token.value, &tok, &nbits, &bits); + tok += token.is_lz77_length ? codes->lz77.min_symbol : 0; + builder.VisitSymbol(tok, token.context); + } + } else if (num_contexts == 1) { + for (size_t j = 0; j < tokens[i].size(); ++j) { + const Token& token = tokens[i][j]; + total_tokens++; + uint32_t tok, nbits, bits; + uint_config.Encode(token.value, &tok, &nbits, &bits); + builder.VisitSymbol(tok, /*token.context=*/0); + } + } else { + for (size_t j = 0; j < tokens[i].size(); ++j) { + const Token& token = tokens[i][j]; + total_tokens++; + uint32_t tok, nbits, bits; + uint_config.Encode(token.value, &tok, &nbits, &bits); + builder.VisitSymbol(tok, token.context); + } + } + } + + bool use_prefix_code = + params.force_huffman || total_tokens < 100 || + params.clustering == HistogramParams::ClusteringType::kFastest || + ans_fuzzer_friendly_; + if (!use_prefix_code) { + bool all_singleton = true; + for (size_t i = 0; i < num_contexts; i++) { + if (builder.Histo(i).ShannonEntropy() >= 1e-5) { + all_singleton = false; + } + } + if (all_singleton) { + use_prefix_code = true; + } + } + + // Encode histograms. + total_bits += builder.BuildAndStoreEntropyCodes(params, tokens, codes, + context_map, use_prefix_code, + writer, layer, aux_out); + allotment.FinishedHistogram(writer); + allotment.ReclaimAndCharge(writer, layer, aux_out); + + if (aux_out != nullptr) { + aux_out->layers[layer].num_clustered_histograms += + codes->encoding_info.size(); + } + return total_bits; +} + +size_t WriteTokens(const std::vector& tokens, + const EntropyEncodingData& codes, + const std::vector& context_map, BitWriter* writer) { + size_t num_extra_bits = 0; + if (codes.use_prefix_code) { + for (size_t i = 0; i < tokens.size(); i++) { + uint32_t tok, nbits, bits; + const Token& token = tokens[i]; + size_t histo = context_map[token.context]; + (token.is_lz77_length ? codes.lz77.length_uint_config + : codes.uint_config[histo]) + .Encode(token.value, &tok, &nbits, &bits); + tok += token.is_lz77_length ? codes.lz77.min_symbol : 0; + // Combine two calls to the BitWriter. Equivalent to: + // writer->Write(codes.encoding_info[histo][tok].depth, + // codes.encoding_info[histo][tok].bits); + // writer->Write(nbits, bits); + uint64_t data = codes.encoding_info[histo][tok].bits; + data |= bits << codes.encoding_info[histo][tok].depth; + writer->Write(codes.encoding_info[histo][tok].depth + nbits, data); + num_extra_bits += nbits; + } + return num_extra_bits; + } + std::vector out; + std::vector out_nbits; + out.reserve(tokens.size()); + out_nbits.reserve(tokens.size()); + uint64_t allbits = 0; + size_t numallbits = 0; + // Writes in *reversed* order. + auto addbits = [&](size_t bits, size_t nbits) { + if (JXL_UNLIKELY(nbits)) { + JXL_DASSERT(bits >> nbits == 0); + if (JXL_UNLIKELY(numallbits + nbits > BitWriter::kMaxBitsPerCall)) { + out.push_back(allbits); + out_nbits.push_back(numallbits); + numallbits = allbits = 0; + } + allbits <<= nbits; + allbits |= bits; + numallbits += nbits; + } + }; + const int end = tokens.size(); + ANSCoder ans; + if (codes.lz77.enabled || context_map.size() > 1) { + for (int i = end - 1; i >= 0; --i) { + const Token token = tokens[i]; + const uint8_t histo = context_map[token.context]; + uint32_t tok, nbits, bits; + (token.is_lz77_length ? codes.lz77.length_uint_config + : codes.uint_config[histo]) + .Encode(tokens[i].value, &tok, &nbits, &bits); + tok += token.is_lz77_length ? codes.lz77.min_symbol : 0; + const ANSEncSymbolInfo& info = codes.encoding_info[histo][tok]; + // Extra bits first as this is reversed. + addbits(bits, nbits); + num_extra_bits += nbits; + uint8_t ans_nbits = 0; + uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits); + addbits(ans_bits, ans_nbits); + } + } else { + for (int i = end - 1; i >= 0; --i) { + uint32_t tok, nbits, bits; + codes.uint_config[0].Encode(tokens[i].value, &tok, &nbits, &bits); + const ANSEncSymbolInfo& info = codes.encoding_info[0][tok]; + // Extra bits first as this is reversed. + addbits(bits, nbits); + num_extra_bits += nbits; + uint8_t ans_nbits = 0; + uint32_t ans_bits = ans.PutSymbol(info, &ans_nbits); + addbits(ans_bits, ans_nbits); + } + } + const uint32_t state = ans.GetState(); + writer->Write(32, state); + writer->Write(numallbits, allbits); + for (int i = out.size(); i > 0; --i) { + writer->Write(out_nbits[i - 1], out[i - 1]); + } + return num_extra_bits; +} + +void WriteTokens(const std::vector& tokens, + const EntropyEncodingData& codes, + const std::vector& context_map, BitWriter* writer, + size_t layer, AuxOut* aux_out) { + BitWriter::Allotment allotment(writer, 32 * tokens.size() + 32 * 1024 * 4); + size_t num_extra_bits = WriteTokens(tokens, codes, context_map, writer); + allotment.ReclaimAndCharge(writer, layer, aux_out); + if (aux_out != nullptr) { + aux_out->layers[layer].extra_bits += num_extra_bits; + } +} + +void SetANSFuzzerFriendly(bool ans_fuzzer_friendly) { +#if JXL_IS_DEBUG_BUILD // Guard against accidental / malicious changes. + ans_fuzzer_friendly_ = ans_fuzzer_friendly; +#endif +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ans.h b/third-party/libjxl/libjxl/lib/jxl/enc_ans.h new file mode 100644 index 0000000000..a4afb19b4e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ans.h @@ -0,0 +1,143 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_ANS_H_ +#define LIB_JXL_ENC_ANS_H_ + +// Library to encode the ANS population counts to the bit-stream and encode +// symbols based on the respective distributions. + +#include +#include +#include +#include +#include + +#include +#include + +#include "lib/jxl/ans_common.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/enc_ans_params.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/huffman_table.h" + +namespace jxl { + +struct AuxOut; + +#define USE_MULT_BY_RECIPROCAL + +// precision must be equal to: #bits(state_) + #bits(freq) +#define RECIPROCAL_PRECISION (32 + ANS_LOG_TAB_SIZE) + +// Data structure representing one element of the encoding table built +// from a distribution. +// TODO(veluca): split this up, or use an union. +struct ANSEncSymbolInfo { + // ANS + uint16_t freq_; + std::vector reverse_map_; +#ifdef USE_MULT_BY_RECIPROCAL + uint64_t ifreq_; +#endif + // Prefix coding. + uint8_t depth; + uint16_t bits; +}; + +class ANSCoder { + public: + ANSCoder() : state_(ANS_SIGNATURE << 16) {} + + uint32_t PutSymbol(const ANSEncSymbolInfo& t, uint8_t* nbits) { + uint32_t bits = 0; + *nbits = 0; + if ((state_ >> (32 - ANS_LOG_TAB_SIZE)) >= t.freq_) { + bits = state_ & 0xffff; + state_ >>= 16; + *nbits = 16; + } +#ifdef USE_MULT_BY_RECIPROCAL + // We use mult-by-reciprocal trick, but that requires 64b calc. + const uint32_t v = (state_ * t.ifreq_) >> RECIPROCAL_PRECISION; + const uint32_t offset = t.reverse_map_[state_ - v * t.freq_]; + state_ = (v << ANS_LOG_TAB_SIZE) + offset; +#else + state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) + + t.reverse_map_[state_ % t.freq_]; +#endif + return bits; + } + + uint32_t GetState() const { return state_; } + + private: + uint32_t state_; +}; + +// RebalanceHistogram requires a signed type. +using ANSHistBin = int32_t; + +struct EntropyEncodingData { + std::vector> encoding_info; + bool use_prefix_code; + std::vector uint_config; + LZ77Params lz77; +}; + +// Integer to be encoded by an entropy coder, either ANS or Huffman. +struct Token { + Token() {} + Token(uint32_t c, uint32_t value) + : is_lz77_length(false), context(c), value(value) {} + uint32_t is_lz77_length : 1; + uint32_t context : 31; + uint32_t value; +}; + +// Returns an estimate of the number of bits required to encode the given +// histogram (header bits plus data bits). +float ANSPopulationCost(const ANSHistBin* data, size_t alphabet_size); + +// Apply context clustering, compute histograms and encode them. Returns an +// estimate of the total bits used for encoding the stream. If `writer` == +// nullptr, the bit estimate will not take into account the context map (which +// does not get written if `num_contexts` == 1). +size_t BuildAndEncodeHistograms(const HistogramParams& params, + size_t num_contexts, + std::vector>& tokens, + EntropyEncodingData* codes, + std::vector* context_map, + BitWriter* writer, size_t layer, + AuxOut* aux_out); + +// Write the tokens to a string. +void WriteTokens(const std::vector& tokens, + const EntropyEncodingData& codes, + const std::vector& context_map, BitWriter* writer, + size_t layer, AuxOut* aux_out); + +// Same as above, but assumes allotment created by caller. +size_t WriteTokens(const std::vector& tokens, + const EntropyEncodingData& codes, + const std::vector& context_map, BitWriter* writer); + +// Exposed for tests; to be used with Writer=BitWriter only. +template +void EncodeUintConfigs(const std::vector& uint_config, + Writer* writer, size_t log_alpha_size); +extern template void EncodeUintConfigs(const std::vector&, + BitWriter*, size_t); + +// Globally set the option to create fuzzer-friendly ANS streams. Negatively +// impacts compression. Not thread-safe. +void SetANSFuzzerFriendly(bool ans_fuzzer_friendly); +} // namespace jxl + +#endif // LIB_JXL_ENC_ANS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ans_params.h b/third-party/libjxl/libjxl/lib/jxl/enc_ans_params.h new file mode 100644 index 0000000000..50ca31dc03 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ans_params.h @@ -0,0 +1,76 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_ANS_PARAMS_H_ +#define LIB_JXL_ENC_ANS_PARAMS_H_ + +// Encoder-only parameter needed for ANS entropy encoding methods. + +#include +#include + +#include "lib/jxl/enc_params.h" + +namespace jxl { + +struct HistogramParams { + enum class ClusteringType { + kFastest, // Only 4 clusters. + kFast, + kBest, + }; + + enum class HybridUintMethod { + kNone, // just use kHybridUint420Config. + k000, // force the fastest option. + kFast, // just try a couple of options. + kContextMap, // fast choice for ctx map. + kBest, + }; + + enum class LZ77Method { + kNone, // do not try lz77. + kRLE, // only try doing RLE. + kLZ77, // try lz77 with backward references. + kOptimal, // optimal-matching LZ77 parsing. + }; + + enum class ANSHistogramStrategy { + kFast, // Only try some methods, early exit. + kApproximate, // Only try some methods. + kPrecise, // Try all methods. + }; + + HistogramParams() = default; + + HistogramParams(SpeedTier tier, size_t num_ctx) { + if (tier > SpeedTier::kFalcon) { + clustering = ClusteringType::kFastest; + lz77_method = LZ77Method::kNone; + } else if (tier > SpeedTier::kTortoise) { + clustering = ClusteringType::kFast; + } else { + clustering = ClusteringType::kBest; + } + if (tier > SpeedTier::kTortoise) { + uint_method = HybridUintMethod::kNone; + } + if (tier >= SpeedTier::kSquirrel) { + ans_histogram_strategy = ANSHistogramStrategy::kApproximate; + } + } + + ClusteringType clustering = ClusteringType::kBest; + HybridUintMethod uint_method = HybridUintMethod::kBest; + LZ77Method lz77_method = LZ77Method::kRLE; + ANSHistogramStrategy ans_histogram_strategy = ANSHistogramStrategy::kPrecise; + std::vector image_widths; + size_t max_histograms = ~0; + bool force_huffman = false; +}; + +} // namespace jxl + +#endif // LIB_JXL_ENC_ANS_PARAMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.cc b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.cc new file mode 100644 index 0000000000..9030430e2b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.cc @@ -0,0 +1,325 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_ar_control_field.h" + +#include +#include + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_ar_control_field.cc" +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_adaptive_quantization.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Sqrt; + +void ProcessTile(const Image3F& opsin, PassesEncoderState* enc_state, + const Rect& rect, + ArControlFieldHeuristics::TempImages* temp_image) { + constexpr size_t N = kBlockDim; + ImageB* JXL_RESTRICT epf_sharpness = &enc_state->shared.epf_sharpness; + ImageF* JXL_RESTRICT quant = &enc_state->initial_quant_field; + JXL_ASSERT( + epf_sharpness->xsize() == enc_state->shared.frame_dim.xsize_blocks && + epf_sharpness->ysize() == enc_state->shared.frame_dim.ysize_blocks); + + if (enc_state->cparams.butteraugli_distance < kMinButteraugliForDynamicAR || + enc_state->cparams.speed_tier > SpeedTier::kWombat || + enc_state->shared.frame_header.loop_filter.epf_iters == 0) { + FillPlane(static_cast(4), epf_sharpness, rect); + return; + } + + // Likely better to have a higher X weight, like: + // const float kChannelWeights[3] = {47.0f, 4.35f, 0.287f}; + const float kChannelWeights[3] = {4.35f, 4.35f, 0.287f}; + const float kChannelWeightsLapNeg[3] = {-0.125f * kChannelWeights[0], + -0.125f * kChannelWeights[1], + -0.125f * kChannelWeights[2]}; + const size_t sharpness_stride = + static_cast(epf_sharpness->PixelsPerRow()); + + size_t by0 = rect.y0(); + size_t by1 = rect.y0() + rect.ysize(); + size_t bx0 = rect.x0(); + size_t bx1 = rect.x0() + rect.xsize(); + temp_image->InitOnce(); + ImageF& laplacian_sqrsum = temp_image->laplacian_sqrsum; + // Calculate the L2 of the 3x3 Laplacian in an integral transform + // (for example 32x32 dct). This relates to transforms ability + // to propagate artefacts. + size_t y0 = by0 == 0 ? 2 : 0; + size_t y1 = by1 * N + 4 <= opsin.ysize() + 2 ? (by1 - by0) * N + 4 + : opsin.ysize() + 2 - by0 * N; + size_t x0 = bx0 == 0 ? 2 : 0; + size_t x1 = bx1 * N + 4 <= opsin.xsize() + 2 ? (bx1 - bx0) * N + 4 + : opsin.xsize() + 2 - bx0 * N; + HWY_FULL(float) df; + for (size_t y = y0; y < y1; y++) { + float* JXL_RESTRICT laplacian_sqrsum_row = laplacian_sqrsum.Row(y); + size_t cy = y + by0 * N - 2; + const float* JXL_RESTRICT in_row_t[3]; + const float* JXL_RESTRICT in_row[3]; + const float* JXL_RESTRICT in_row_b[3]; + for (size_t c = 0; c < 3; c++) { + in_row_t[c] = opsin.PlaneRow(c, cy > 0 ? cy - 1 : cy); + in_row[c] = opsin.PlaneRow(c, cy); + in_row_b[c] = opsin.PlaneRow(c, cy + 1 < opsin.ysize() ? cy + 1 : cy); + } + auto compute_laplacian_scalar = [&](size_t x) { + size_t cx = x + bx0 * N - 2; + const size_t prevX = cx >= 1 ? cx - 1 : cx; + const size_t nextX = cx + 1 < opsin.xsize() ? cx + 1 : cx; + float sumsqr = 0; + for (size_t c = 0; c < 3; c++) { + float laplacian = + kChannelWeights[c] * in_row[c][cx] + + kChannelWeightsLapNeg[c] * + (in_row[c][prevX] + in_row[c][nextX] + in_row_b[c][prevX] + + in_row_b[c][cx] + in_row_b[c][nextX] + in_row_t[c][prevX] + + in_row_t[c][cx] + in_row_t[c][nextX]); + sumsqr += laplacian * laplacian; + } + laplacian_sqrsum_row[x] = sumsqr; + }; + size_t x = x0; + for (; x + bx0 * N < 3; x++) { + compute_laplacian_scalar(x); + } + // Interior. One extra pixel of border as the last pixel is special. + for (; x + Lanes(df) <= x1 && x + Lanes(df) + bx0 * N - 1 <= opsin.xsize(); + x += Lanes(df)) { + size_t cx = x + bx0 * N - 2; + auto sumsqr = Zero(df); + for (size_t c = 0; c < 3; c++) { + auto laplacian = + Mul(LoadU(df, in_row[c] + cx), Set(df, kChannelWeights[c])); + auto sum_oth0 = LoadU(df, in_row[c] + cx - 1); + auto sum_oth1 = LoadU(df, in_row[c] + cx + 1); + auto sum_oth2 = LoadU(df, in_row_t[c] + cx - 1); + auto sum_oth3 = LoadU(df, in_row_t[c] + cx); + sum_oth0 = Add(sum_oth0, LoadU(df, in_row_t[c] + cx + 1)); + sum_oth1 = Add(sum_oth1, LoadU(df, in_row_b[c] + cx - 1)); + sum_oth2 = Add(sum_oth2, LoadU(df, in_row_b[c] + cx)); + sum_oth3 = Add(sum_oth3, LoadU(df, in_row_b[c] + cx + 1)); + sum_oth0 = Add(sum_oth0, sum_oth1); + sum_oth2 = Add(sum_oth2, sum_oth3); + sum_oth0 = Add(sum_oth0, sum_oth2); + laplacian = + MulAdd(Set(df, kChannelWeightsLapNeg[c]), sum_oth0, laplacian); + sumsqr = MulAdd(laplacian, laplacian, sumsqr); + } + StoreU(sumsqr, df, laplacian_sqrsum_row + x); + } + for (; x < x1; x++) { + compute_laplacian_scalar(x); + } + } + HWY_CAPPED(float, 4) df4; + // Calculate the L2 of the 3x3 Laplacian in 4x4 blocks within the area + // of the integral transform. Sample them within the integral transform + // with two offsets (0,0) and (-2, -2) pixels (sqrsum_00 and sqrsum_22, + // respectively). + ImageF& sqrsum_00 = temp_image->sqrsum_00; + size_t sqrsum_00_stride = sqrsum_00.PixelsPerRow(); + float* JXL_RESTRICT sqrsum_00_row = sqrsum_00.Row(0); + for (size_t y = 0; y < (by1 - by0) * 2; y++) { + const float* JXL_RESTRICT rows_in[4]; + for (size_t iy = 0; iy < 4; iy++) { + rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy + 2); + } + float* JXL_RESTRICT row_out = sqrsum_00_row + y * sqrsum_00_stride; + for (size_t x = 0; x < (bx1 - bx0) * 2; x++) { + auto sum = Zero(df4); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix += Lanes(df4)) { + sum = Add(sum, LoadU(df4, rows_in[iy] + x * 4 + ix + 2)); + } + } + row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f); + } + } + // Indexing iy and ix is a bit tricky as we include a 2 pixel border + // around the block for evenness calculations. This is similar to what + // we did in guetzli for the observability of artefacts, except there + // the element is a sliding 5x5, not sparsely sampled 4x4 box like here. + ImageF& sqrsum_22 = temp_image->sqrsum_22; + size_t sqrsum_22_stride = sqrsum_22.PixelsPerRow(); + float* JXL_RESTRICT sqrsum_22_row = sqrsum_22.Row(0); + for (size_t y = 0; y < (by1 - by0) * 2 + 1; y++) { + const float* JXL_RESTRICT rows_in[4]; + for (size_t iy = 0; iy < 4; iy++) { + rows_in[iy] = laplacian_sqrsum.ConstRow(y * 4 + iy); + } + float* JXL_RESTRICT row_out = sqrsum_22_row + y * sqrsum_22_stride; + // ignore pixels outside the image. + // Y coordinates are relative to by0*8+y*4. + size_t sy = y * 4 + by0 * 8 > 0 ? 0 : 2; + size_t ey = y * 4 + by0 * 8 + 4 <= opsin.ysize() + 2 + ? 4 + : opsin.ysize() - y * 4 - by0 * 8 + 2; + for (size_t x = 0; x < (bx1 - bx0) * 2 + 1; x++) { + // ignore pixels outside the image. + // X coordinates are relative to bx0*8. + size_t sx = x * 4 + bx0 * 8 > 0 ? x * 4 : x * 4 + 2; + size_t ex = x * 4 + bx0 * 8 + 4 <= opsin.xsize() + 2 + ? x * 4 + 4 + : opsin.xsize() - bx0 * 8 + 2; + if (ex - sx == 4 && ey - sy == 4) { + auto sum = Zero(df4); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix += Lanes(df4)) { + sum = Add(sum, Load(df4, rows_in[iy] + sx + ix)); + } + } + row_out[x] = GetLane(Sqrt(SumOfLanes(df4, sum))) * (1.0f / 4.0f); + } else { + float sum = 0; + for (size_t iy = sy; iy < ey; iy++) { + for (size_t ix = sx; ix < ex; ix++) { + sum += rows_in[iy][ix]; + } + } + row_out[x] = std::sqrt(sum / ((ex - sx) * (ey - sy))); + } + } + } + for (size_t by = by0; by < by1; by++) { + AcStrategyRow acs_row = enc_state->shared.ac_strategy.ConstRow(by); + uint8_t* JXL_RESTRICT out_row = epf_sharpness->Row(by); + float* JXL_RESTRICT quant_row = quant->Row(by); + for (size_t bx = bx0; bx < bx1; bx++) { + AcStrategy acs = acs_row[bx]; + if (!acs.IsFirstBlock()) continue; + // The errors are going to be linear to the quantization value in this + // locality. We only have access to the initial quant field here. + float quant_val = 1.0f / quant_row[bx]; + + const auto sq00 = [&](size_t y, size_t x) { + return sqrsum_00_row[((by - by0) * 2 + y) * sqrsum_00_stride + + (bx - bx0) * 2 + x]; + }; + const auto sq22 = [&](size_t y, size_t x) { + return sqrsum_22_row[((by - by0) * 2 + y) * sqrsum_22_stride + + (bx - bx0) * 2 + x]; + }; + float sqrsum_integral_transform = 0; + for (size_t iy = 0; iy < acs.covered_blocks_y() * 2; iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x() * 2; ix++) { + sqrsum_integral_transform += sq00(iy, ix) * sq00(iy, ix); + } + } + sqrsum_integral_transform /= + 4 * acs.covered_blocks_x() * acs.covered_blocks_y(); + sqrsum_integral_transform = std::sqrt(sqrsum_integral_transform); + // If masking is high or amplitude of the artefacts is low, then no + // smoothing is needed. + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + // Five 4x4 blocks for masking estimation, all within the + // 8x8 area. + float minval_1 = std::min(sq00(2 * iy + 0, 2 * ix + 0), + sq00(2 * iy + 0, 2 * ix + 1)); + float minval_2 = std::min(sq00(2 * iy + 1, 2 * ix + 0), + sq00(2 * iy + 1, 2 * ix + 1)); + float minval = std::min(minval_1, minval_2); + minval = std::min(minval, sq22(2 * iy + 1, 2 * ix + 1)); + // Nine more 4x4 blocks for masking estimation, includes + // the 2 pixel area around the 8x8 block being controlled. + float minval2_1 = std::min(sq22(2 * iy + 0, 2 * ix + 0), + sq22(2 * iy + 0, 2 * ix + 1)); + float minval2_2 = std::min(sq22(2 * iy + 0, 2 * ix + 2), + sq22(2 * iy + 1, 2 * ix + 0)); + float minval2_3 = std::min(sq22(2 * iy + 1, 2 * ix + 1), + sq22(2 * iy + 1, 2 * ix + 2)); + float minval2_4 = std::min(sq22(2 * iy + 2, 2 * ix + 0), + sq22(2 * iy + 2, 2 * ix + 1)); + float minval2_5 = std::min(minval2_1, minval2_2); + float minval2_6 = std::min(minval2_3, minval2_4); + float minval2 = std::min(minval2_5, minval2_6); + minval2 = std::min(minval2, sq22(2 * iy + 2, 2 * ix + 2)); + float minval3 = std::min(minval, minval2); + minval *= 0.125f; + minval += 0.625f * minval3; + minval += + 0.125f * std::min(1.5f * minval3, sq22(2 * iy + 1, 2 * ix + 1)); + minval += 0.125f * minval2; + // Larger kBias, less smoothing for low intensity changes. + float kDeltaLimit = 3.2; + float bias = 0.0625f * quant_val; + float delta = + (sqrsum_integral_transform + (kDeltaLimit + 0.05) * bias) / + (minval + bias); + int out = 4; + if (delta > kDeltaLimit) { + out = 4; // smooth + } else { + out = 0; + } + // 'threshold' is separate from 'bias' for easier tuning of these + // heuristics. + float threshold = 0.0625f * quant_val; + const float kSmoothLimit = 0.085f; + float smooth = 0.20f * (sq00(2 * iy + 0, 2 * ix + 0) + + sq00(2 * iy + 0, 2 * ix + 1) + + sq00(2 * iy + 1, 2 * ix + 0) + + sq00(2 * iy + 1, 2 * ix + 1) + minval); + if (smooth < kSmoothLimit * threshold) { + out = 4; + } + out_row[bx + sharpness_stride * iy + ix] = out; + } + } + } + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(ProcessTile); + +void ArControlFieldHeuristics::RunRect(const Rect& block_rect, + const Image3F& opsin, + PassesEncoderState* enc_state, + size_t thread) { + HWY_DYNAMIC_DISPATCH(ProcessTile) + (opsin, enc_state, block_rect, &temp_images[thread]); +} + +} // namespace jxl + +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.h b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.h new file mode 100644 index 0000000000..aabe71f46f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_ar_control_field.h @@ -0,0 +1,49 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_AR_CONTROL_FIELD_H_ +#define LIB_JXL_ENC_AR_CONTROL_FIELD_H_ + +#include + +#include + +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" + +namespace jxl { + +struct PassesEncoderState; + +struct ArControlFieldHeuristics { + struct TempImages { + void InitOnce() { + if (laplacian_sqrsum.xsize() != 0) return; + laplacian_sqrsum = ImageF(kEncTileDim + 4, kEncTileDim + 4); + sqrsum_00 = ImageF(kEncTileDim / 4, kEncTileDim / 4); + sqrsum_22 = ImageF(kEncTileDim / 4 + 1, kEncTileDim / 4 + 1); + } + + ImageF laplacian_sqrsum; + ImageF sqrsum_00; + ImageF sqrsum_22; + }; + + void PrepareForThreads(size_t num_threads) { + temp_images.resize(num_threads); + } + + void RunRect(const Rect& block_rect, const Image3F& opsin, + PassesEncoderState* enc_state, size_t thread); + + std::vector temp_images; + ImageB* epf_sharpness; + ImageF* quant; + bool all_default; +}; + +} // namespace jxl + +#endif // LIB_JXL_AR_ENC_CONTROL_FIELD_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.cc b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.cc new file mode 100644 index 0000000000..5d784c43f6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.cc @@ -0,0 +1,127 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_aux_out.h" + +#include +#include +#include +#include + +#include +#include // accumulate +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +const char* LayerName(size_t layer) { + switch (layer) { + case kLayerHeader: + return "Headers"; + case kLayerTOC: + return "TOC"; + case kLayerDictionary: + return "Patches"; + case kLayerSplines: + return "Splines"; + case kLayerNoise: + return "Noise"; + case kLayerQuant: + return "Quantizer"; + case kLayerModularTree: + return "ModularTree"; + case kLayerModularGlobal: + return "ModularGlobal"; + case kLayerDC: + return "DC"; + case kLayerModularDcGroup: + return "ModularDcGroup"; + case kLayerControlFields: + return "ControlFields"; + case kLayerOrder: + return "CoeffOrder"; + case kLayerAC: + return "ACHistograms"; + case kLayerACTokens: + return "ACTokens"; + case kLayerModularAcGroup: + return "ModularAcGroup"; + default: + JXL_UNREACHABLE("Invalid layer %d\n", static_cast(layer)); + } +} + +void AuxOut::LayerTotals::Print(size_t num_inputs) const { + if (JXL_DEBUG_V_LEVEL > 0) { + printf("%10" PRId64, static_cast(total_bits)); + if (histogram_bits != 0) { + printf(" [c/i:%6.2f | hst:%8" PRId64 " | ex:%8" PRId64 + " | h+c+e:%12.3f", + num_clustered_histograms * 1.0 / num_inputs, + static_cast(histogram_bits >> 3), + static_cast(extra_bits >> 3), + (histogram_bits + clustered_entropy + extra_bits) / 8.0); + printf("]"); + } + printf("\n"); + } +} + +void AuxOut::Assimilate(const AuxOut& victim) { + for (size_t i = 0; i < layers.size(); ++i) { + layers[i].Assimilate(victim.layers[i]); + } + num_blocks += victim.num_blocks; + num_small_blocks += victim.num_small_blocks; + num_dct4x8_blocks += victim.num_dct4x8_blocks; + num_afv_blocks += victim.num_afv_blocks; + num_dct8_blocks += victim.num_dct8_blocks; + num_dct8x16_blocks += victim.num_dct8x16_blocks; + num_dct8x32_blocks += victim.num_dct8x32_blocks; + num_dct16_blocks += victim.num_dct16_blocks; + num_dct16x32_blocks += victim.num_dct16x32_blocks; + num_dct32_blocks += victim.num_dct32_blocks; + num_dct32x64_blocks += victim.num_dct32x64_blocks; + num_dct64_blocks += victim.num_dct64_blocks; + num_butteraugli_iters += victim.num_butteraugli_iters; +} + +void AuxOut::Print(size_t num_inputs) const { + if (JXL_DEBUG_V_LEVEL > 0) { + if (num_inputs == 0) return; + + LayerTotals all_layers; + for (size_t i = 0; i < layers.size(); ++i) { + all_layers.Assimilate(layers[i]); + } + + printf("Average butteraugli iters: %10.2f\n", + num_butteraugli_iters * 1.0 / num_inputs); + + for (size_t i = 0; i < layers.size(); ++i) { + if (layers[i].total_bits != 0) { + printf("Total layer bits %-10s\t", LayerName(i)); + printf("%10f%%", 100.0 * layers[i].total_bits / all_layers.total_bits); + layers[i].Print(num_inputs); + } + } + printf("Total image size "); + all_layers.Print(num_inputs); + + size_t total_blocks = 0; + size_t total_positions = 0; + if (total_blocks != 0 && total_positions != 0) { + printf("\n\t\t Blocks\t\tPositions\t\t\tBlocks/Position\n"); + printf(" Total:\t\t %7" PRIuS "\t\t %7" PRIuS " \t\t\t%10f%%\n\n", + total_blocks, total_positions, + 100.0 * total_blocks / total_positions); + } + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.h b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.h new file mode 100644 index 0000000000..545711af83 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_aux_out.h @@ -0,0 +1,102 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_AUX_OUT_H_ +#define LIB_JXL_AUX_OUT_H_ + +// Optional output information for debugging and analyzing size usage. + +#include + +#include +#include +#include + +namespace jxl { + +struct ColorEncoding; + +// For LayerName and AuxOut::layers[] index. Order does not matter. +enum { + kLayerHeader = 0, + kLayerTOC, + kLayerDictionary, + kLayerSplines, + kLayerNoise, + kLayerQuant, + kLayerModularTree, + kLayerModularGlobal, + kLayerDC, + kLayerModularDcGroup, + kLayerControlFields, + kLayerOrder, + kLayerAC, + kLayerACTokens, + kLayerModularAcGroup, + kNumImageLayers +}; + +const char* LayerName(size_t layer); + +// Statistics gathered during compression or decompression. +struct AuxOut { + private: + struct LayerTotals { + void Assimilate(const LayerTotals& victim) { + num_clustered_histograms += victim.num_clustered_histograms; + histogram_bits += victim.histogram_bits; + extra_bits += victim.extra_bits; + total_bits += victim.total_bits; + clustered_entropy += victim.clustered_entropy; + } + void Print(size_t num_inputs) const; + + size_t num_clustered_histograms = 0; + size_t extra_bits = 0; + + // Set via BitsWritten below + size_t histogram_bits = 0; + size_t total_bits = 0; + + double clustered_entropy = 0.0; + }; + + public: + AuxOut() = default; + AuxOut(const AuxOut&) = default; + + void Assimilate(const AuxOut& victim); + + void Print(size_t num_inputs) const; + + size_t TotalBits() const { + size_t total = 0; + for (const auto& layer : layers) { + total += layer.total_bits; + } + return total; + } + + std::array layers; + size_t num_blocks = 0; + + // Number of blocks that use larger DCT (set by ac_strategy). + size_t num_small_blocks = 0; + size_t num_dct4x8_blocks = 0; + size_t num_afv_blocks = 0; + size_t num_dct8_blocks = 0; + size_t num_dct8x16_blocks = 0; + size_t num_dct8x32_blocks = 0; + size_t num_dct16_blocks = 0; + size_t num_dct16x32_blocks = 0; + size_t num_dct32_blocks = 0; + size_t num_dct32x64_blocks = 0; + size_t num_dct64_blocks = 0; + + int num_butteraugli_iters = 0; +}; +} // namespace jxl + +#endif // LIB_JXL_AUX_OUT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.cc b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.cc new file mode 100644 index 0000000000..662aaa5416 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.cc @@ -0,0 +1,201 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_bit_writer.h" + +#include // memcpy + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_aux_out.h" + +namespace jxl { + +BitWriter::Allotment::Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits) + : max_bits_(max_bits) { + if (writer == nullptr) return; + prev_bits_written_ = writer->BitsWritten(); + const size_t prev_bytes = writer->storage_.size(); + const size_t next_bytes = DivCeil(max_bits, kBitsPerByte); + writer->storage_.resize(prev_bytes + next_bytes); + parent_ = writer->current_allotment_; + writer->current_allotment_ = this; +} + +BitWriter::Allotment::~Allotment() { + if (!called_) { + // Not calling is a bug - unused storage will not be reclaimed. + JXL_UNREACHABLE("Did not call Allotment::ReclaimUnused"); + } +} + +void BitWriter::Allotment::FinishedHistogram(BitWriter* JXL_RESTRICT writer) { + if (writer == nullptr) return; + JXL_ASSERT(!called_); // Call before ReclaimUnused + JXL_ASSERT(histogram_bits_ == 0); // Do not call twice + JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_); + histogram_bits_ = writer->BitsWritten() - prev_bits_written_; +} + +void BitWriter::Allotment::ReclaimAndCharge(BitWriter* JXL_RESTRICT writer, + size_t layer, + AuxOut* JXL_RESTRICT aux_out) { + size_t used_bits, unused_bits; + PrivateReclaim(writer, &used_bits, &unused_bits); + +#if 0 + printf("Layer %s bits: max %" PRIuS " used %" PRIuS " unused %" PRIuS "\n", + LayerName(layer), MaxBits(), used_bits, unused_bits); +#endif + + // This may be a nested call with aux_out == null. Whenever we know that + // aux_out is null, we can call ReclaimUnused directly. + if (aux_out != nullptr) { + aux_out->layers[layer].total_bits += used_bits; + aux_out->layers[layer].histogram_bits += HistogramBits(); + } +} + +void BitWriter::Allotment::PrivateReclaim(BitWriter* JXL_RESTRICT writer, + size_t* JXL_RESTRICT used_bits, + size_t* JXL_RESTRICT unused_bits) { + JXL_ASSERT(!called_); // Do not call twice + called_ = true; + if (writer == nullptr) return; + + JXL_ASSERT(writer->BitsWritten() >= prev_bits_written_); + *used_bits = writer->BitsWritten() - prev_bits_written_; + JXL_ASSERT(*used_bits <= max_bits_); + *unused_bits = max_bits_ - *used_bits; + + // Reclaim unused bytes whole bytes from writer's allotment. + const size_t unused_bytes = *unused_bits / kBitsPerByte; // truncate + JXL_ASSERT(writer->storage_.size() >= unused_bytes); + writer->storage_.resize(writer->storage_.size() - unused_bytes); + writer->current_allotment_ = parent_; + // Ensure we don't also charge the parent for these bits. + auto parent = parent_; + while (parent != nullptr) { + parent->prev_bits_written_ += *used_bits; + parent = parent->parent_; + } +} + +void BitWriter::AppendByteAligned(const Span& span) { + if (span.empty()) return; + storage_.resize(storage_.size() + span.size() + 1); // extra zero padding + + // Concatenate by copying bytes because both source and destination are bytes. + JXL_ASSERT(BitsWritten() % kBitsPerByte == 0); + size_t pos = BitsWritten() / kBitsPerByte; + memcpy(storage_.data() + pos, span.data(), span.size()); + pos += span.size(); + storage_[pos++] = 0; // for next Write + JXL_ASSERT(pos <= storage_.size()); + bits_written_ += span.size() * kBitsPerByte; +} + +void BitWriter::AppendByteAligned(const BitWriter& other) { + JXL_ASSERT(other.BitsWritten() % kBitsPerByte == 0); + JXL_ASSERT(other.BitsWritten() / kBitsPerByte != 0); + + AppendByteAligned(other.GetSpan()); +} + +void BitWriter::AppendByteAligned(const std::vector& others) { + // Total size to add so we can preallocate + size_t other_bytes = 0; + for (const BitWriter& writer : others) { + JXL_ASSERT(writer.BitsWritten() % kBitsPerByte == 0); + other_bytes += writer.BitsWritten() / kBitsPerByte; + } + if (other_bytes == 0) { + // No bytes to append: this happens for example when creating per-group + // storage for groups, but not writing anything in them for e.g. lossless + // images with no alpha. Do nothing. + return; + } + storage_.resize(storage_.size() + other_bytes + 1); // extra zero padding + + // Concatenate by copying bytes because both source and destination are bytes. + JXL_ASSERT(BitsWritten() % kBitsPerByte == 0); + size_t pos = BitsWritten() / kBitsPerByte; + for (const BitWriter& writer : others) { + const Span span = writer.GetSpan(); + if (!span.empty()) { + memcpy(storage_.data() + pos, span.data(), span.size()); + pos += span.size(); + } + } + storage_[pos++] = 0; // for next Write + JXL_ASSERT(pos <= storage_.size()); + bits_written_ += other_bytes * kBitsPerByte; +} + +// TODO(lode): avoid code duplication +void BitWriter::AppendByteAligned( + const std::vector>& others) { + // Total size to add so we can preallocate + size_t other_bytes = 0; + for (const auto& writer : others) { + JXL_ASSERT(writer->BitsWritten() % kBitsPerByte == 0); + other_bytes += writer->BitsWritten() / kBitsPerByte; + } + if (other_bytes == 0) { + // No bytes to append: this happens for example when creating per-group + // storage for groups, but not writing anything in them for e.g. lossless + // images with no alpha. Do nothing. + return; + } + storage_.resize(storage_.size() + other_bytes + 1); // extra zero padding + + // Concatenate by copying bytes because both source and destination are bytes. + JXL_ASSERT(BitsWritten() % kBitsPerByte == 0); + size_t pos = BitsWritten() / kBitsPerByte; + for (const auto& writer : others) { + const Span span = writer->GetSpan(); + memcpy(storage_.data() + pos, span.data(), span.size()); + pos += span.size(); + } + storage_[pos++] = 0; // for next Write + JXL_ASSERT(pos <= storage_.size()); + bits_written_ += other_bytes * kBitsPerByte; +} + +// Example: let's assume that 3 bits (Rs below) have been written already: +// BYTE+0 BYTE+1 BYTE+2 +// 0000 0RRR ???? ???? ???? ???? +// +// Now, we could write up to 5 bits by just shifting them left by 3 bits and +// OR'ing to BYTE-0. +// +// For n > 5 bits, we write the lowest 5 bits as above, then write the next +// lowest bits into BYTE+1 starting from its lower bits and so on. +void BitWriter::Write(size_t n_bits, uint64_t bits) { + JXL_DASSERT((bits >> n_bits) == 0); + JXL_DASSERT(n_bits <= kMaxBitsPerCall); + uint8_t* p = &storage_[bits_written_ / kBitsPerByte]; + const size_t bits_in_first_byte = bits_written_ % kBitsPerByte; + bits <<= bits_in_first_byte; +#if JXL_BYTE_ORDER_LITTLE + uint64_t v = *p; + // Last (partial) or next byte to write must be zero-initialized! + // PaddedBytes initializes the first, and Write/Append maintain this. + JXL_DASSERT(v >> bits_in_first_byte == 0); + v |= bits; + memcpy(p, &v, sizeof(v)); // Write bytes: possibly more than n_bits/8 +#else + *p++ |= static_cast(bits & 0xFF); + for (size_t bits_left_to_write = n_bits + bits_in_first_byte; + bits_left_to_write >= 9; bits_left_to_write -= 8) { + bits >>= 8; + *p++ = static_cast(bits & 0xFF); + } + *p = 0; +#endif + bits_written_ += n_bits; +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.h b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.h new file mode 100644 index 0000000000..d3fac15a68 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_bit_writer.h @@ -0,0 +1,129 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_BIT_WRITER_H_ +#define LIB_JXL_ENC_BIT_WRITER_H_ + +// BitWriter class: unbuffered writes using unaligned 64-bit stores. + +#include +#include + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" + +namespace jxl { + +struct AuxOut; + +struct BitWriter { + // Upper bound on `n_bits` in each call to Write. We shift a 64-bit word by + // 7 bits (max already valid bits in the last byte) and at least 1 bit is + // needed to zero-initialize the bit-stream ahead (i.e. if 7 bits are valid + // and we write 57 bits, then the next write will access a byte that was not + // yet zero-initialized). + static constexpr size_t kMaxBitsPerCall = 56; + + BitWriter() : bits_written_(0) {} + + // Disallow copying - may lead to bugs. + BitWriter(const BitWriter&) = delete; + BitWriter& operator=(const BitWriter&) = delete; + BitWriter(BitWriter&&) = default; + BitWriter& operator=(BitWriter&&) = default; + + size_t BitsWritten() const { return bits_written_; } + + Span GetSpan() const { + // Callers must ensure byte alignment to avoid uninitialized bits. + JXL_ASSERT(bits_written_ % kBitsPerByte == 0); + return Span(storage_.data(), bits_written_ / kBitsPerByte); + } + + // Example usage: bytes = std::move(writer).TakeBytes(); Useful for the + // top-level encoder which returns PaddedBytes, not a BitWriter. + // *this must be an rvalue reference and is invalid afterwards. + PaddedBytes&& TakeBytes() && { + // Callers must ensure byte alignment to avoid uninitialized bits. + JXL_ASSERT(bits_written_ % kBitsPerByte == 0); + storage_.resize(bits_written_ / kBitsPerByte); + return std::move(storage_); + } + + private: + // Must be byte-aligned before calling. + void AppendByteAligned(const Span& span); + + public: + // NOTE: no allotment needed, the other BitWriters have already been charged. + void AppendByteAligned(const BitWriter& other); + void AppendByteAligned(const std::vector>& others); + void AppendByteAligned(const std::vector& others); + + class Allotment { + public: + // Expands a BitWriter's storage. Must happen before calling Write or + // ZeroPadToByte. Must call ReclaimUnused after writing to reclaim the + // unused storage so that BitWriter memory use remains tightly bounded. + Allotment(BitWriter* JXL_RESTRICT writer, size_t max_bits); + ~Allotment(); + + size_t MaxBits() const { return max_bits_; } + + // Call after writing a histogram, but before ReclaimUnused. + void FinishedHistogram(BitWriter* JXL_RESTRICT writer); + + size_t HistogramBits() const { + JXL_ASSERT(called_); + return histogram_bits_; + } + + void ReclaimAndCharge(BitWriter* JXL_RESTRICT writer, size_t layer, + AuxOut* JXL_RESTRICT aux_out); + + private: + void PrivateReclaim(BitWriter* JXL_RESTRICT writer, + size_t* JXL_RESTRICT used_bits, + size_t* JXL_RESTRICT unused_bits); + + size_t prev_bits_written_; + const size_t max_bits_; + size_t histogram_bits_ = 0; + bool called_ = false; + Allotment* parent_; + }; + + // Writes bits into bytes in increasing addresses, and within a byte + // least-significant-bit first. + // + // The function can write up to 56 bits in one go. + void Write(size_t n_bits, uint64_t bits); + + // This should only rarely be used - e.g. when the current location will be + // referenced via byte offset (TOCs point to groups), or byte-aligned reading + // is required for speed. + void ZeroPadToByte() { + const size_t remainder_bits = + RoundUpBitsToByteMultiple(bits_written_) - bits_written_; + if (remainder_bits == 0) return; + Write(remainder_bits, 0); + JXL_ASSERT(bits_written_ % kBitsPerByte == 0); + } + + private: + size_t bits_written_; + PaddedBytes storage_; + Allotment* current_allotment_ = nullptr; +}; + +} // namespace jxl + +#endif // LIB_JXL_ENC_BIT_WRITER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.cc b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.cc new file mode 100644 index 0000000000..d378fd2e23 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.cc @@ -0,0 +1,100 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_butteraugli_comparator.h" + +#include +#include + +#include "lib/jxl/color_management.h" +#include "lib/jxl/enc_image_bundle.h" + +namespace jxl { + +JxlButteraugliComparator::JxlButteraugliComparator( + const ButteraugliParams& params, const JxlCmsInterface& cms) + : params_(params), cms_(cms) {} + +Status JxlButteraugliComparator::SetReferenceImage(const ImageBundle& ref) { + const ImageBundle* ref_linear_srgb; + ImageMetadata metadata = *ref.metadata(); + ImageBundle store(&metadata); + if (!TransformIfNeeded(ref, ColorEncoding::LinearSRGB(ref.IsGray()), cms_, + /*pool=*/nullptr, &store, &ref_linear_srgb)) { + return false; + } + + comparator_.reset( + new ButteraugliComparator(ref_linear_srgb->color(), params_)); + xsize_ = ref.xsize(); + ysize_ = ref.ysize(); + return true; +} + +Status JxlButteraugliComparator::CompareWith(const ImageBundle& actual, + ImageF* diffmap, float* score) { + if (!comparator_) { + return JXL_FAILURE("Must set reference image first"); + } + if (xsize_ != actual.xsize() || ysize_ != actual.ysize()) { + return JXL_FAILURE("Images must have same size"); + } + + const ImageBundle* actual_linear_srgb; + ImageMetadata metadata = *actual.metadata(); + ImageBundle store(&metadata); + if (!TransformIfNeeded(actual, ColorEncoding::LinearSRGB(actual.IsGray()), + cms_, + /*pool=*/nullptr, &store, &actual_linear_srgb)) { + return false; + } + + ImageF temp_diffmap(xsize_, ysize_); + comparator_->Diffmap(actual_linear_srgb->color(), temp_diffmap); + + if (score != nullptr) { + *score = ButteraugliScoreFromDiffmap(temp_diffmap, ¶ms_); + } + if (diffmap != nullptr) { + diffmap->Swap(temp_diffmap); + } + + return true; +} + +float JxlButteraugliComparator::GoodQualityScore() const { + return ButteraugliFuzzyInverse(1.5); +} + +float JxlButteraugliComparator::BadQualityScore() const { + return ButteraugliFuzzyInverse(0.5); +} + +float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1, + const ButteraugliParams& params, + const JxlCmsInterface& cms, ImageF* distmap, + ThreadPool* pool, bool ignore_alpha) { + JxlButteraugliComparator comparator(params, cms); + return ComputeScore(rgb0, rgb1, &comparator, cms, distmap, pool, + ignore_alpha); +} + +float ButteraugliDistance(const std::vector& frames0, + const std::vector& frames1, + const ButteraugliParams& params, + const JxlCmsInterface& cms, ImageF* distmap, + ThreadPool* pool) { + JxlButteraugliComparator comparator(params, cms); + JXL_ASSERT(frames0.size() == frames1.size()); + float max_dist = 0.0f; + for (size_t i = 0; i < frames0.size(); ++i) { + max_dist = std::max( + max_dist, + ComputeScore(frames0[i], frames1[i], &comparator, cms, distmap, pool)); + } + return max_dist; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.h b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.h new file mode 100644 index 0000000000..28d9faa2b7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_butteraugli_comparator.h @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_ +#define LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_ + +#include +#include + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/butteraugli/butteraugli.h" +#include "lib/jxl/enc_comparator.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +class JxlButteraugliComparator : public Comparator { + public: + explicit JxlButteraugliComparator(const ButteraugliParams& params, + const JxlCmsInterface& cms); + + Status SetReferenceImage(const ImageBundle& ref) override; + + Status CompareWith(const ImageBundle& actual, ImageF* diffmap, + float* score) override; + + float GoodQualityScore() const override; + float BadQualityScore() const override; + + private: + ButteraugliParams params_; + JxlCmsInterface cms_; + std::unique_ptr comparator_; + size_t xsize_ = 0; + size_t ysize_ = 0; +}; + +// Returns the butteraugli distance between rgb0 and rgb1. +// If distmap is not null, it must be the same size as rgb0 and rgb1. +float ButteraugliDistance(const ImageBundle& rgb0, const ImageBundle& rgb1, + const ButteraugliParams& params, + const JxlCmsInterface& cms, ImageF* distmap = nullptr, + ThreadPool* pool = nullptr, + bool ignore_alpha = false); + +float ButteraugliDistance(const std::vector& frames0, + const std::vector& frames1, + const ButteraugliParams& params, + const JxlCmsInterface& cms, ImageF* distmap = nullptr, + ThreadPool* pool = nullptr); + +} // namespace jxl + +#endif // LIB_JXL_ENC_BUTTERAUGLI_COMPARATOR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cache.cc b/third-party/libjxl/libjxl/lib/jxl/enc_cache.cc new file mode 100644 index 0000000000..97d88e0fca --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_cache.cc @@ -0,0 +1,208 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_cache.h" + +#include +#include + +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/common.h" +#include "lib/jxl/compressed_dc.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/dec_frame.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_frame.h" +#include "lib/jxl/enc_group.h" +#include "lib/jxl/enc_modular.h" +#include "lib/jxl/enc_quant_weights.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/passes_state.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { + +Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms, + ThreadPool* pool, PassesEncoderState* enc_state, + ModularFrameEncoder* modular_frame_encoder, + AuxOut* aux_out) { + PassesSharedState& JXL_RESTRICT shared = enc_state->shared; + + enc_state->histogram_idx.resize(shared.frame_dim.num_groups); + + enc_state->x_qm_multiplier = + std::pow(1.25f, shared.frame_header.x_qm_scale - 2.0f); + enc_state->b_qm_multiplier = + std::pow(1.25f, shared.frame_header.b_qm_scale - 2.0f); + + if (enc_state->coeffs.size() < shared.frame_header.passes.num_passes) { + enc_state->coeffs.reserve(shared.frame_header.passes.num_passes); + for (size_t i = enc_state->coeffs.size(); + i < shared.frame_header.passes.num_passes; i++) { + // Allocate enough coefficients for each group on every row. + enc_state->coeffs.emplace_back(make_unique>( + kGroupDim * kGroupDim, shared.frame_dim.num_groups)); + } + } + while (enc_state->coeffs.size() > shared.frame_header.passes.num_passes) { + enc_state->coeffs.pop_back(); + } + + float scale = + shared.quantizer.ScaleGlobalScale(enc_state->cparams.quant_ac_rescale); + DequantMatricesScaleDC(&shared.matrices, scale); + shared.quantizer.RecomputeFromGlobalScale(); + + Image3F dc(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, shared.frame_dim.num_groups, ThreadPool::NoInit, + [&](size_t group_idx, size_t _) { + ComputeCoefficients(group_idx, enc_state, opsin, &dc); + }, + "Compute coeffs")); + + if (shared.frame_header.flags & FrameHeader::kUseDcFrame) { + CompressParams cparams = enc_state->cparams; + cparams.dots = Override::kOff; + cparams.noise = Override::kOff; + cparams.patches = Override::kOff; + cparams.gaborish = Override::kOff; + cparams.epf = 0; + cparams.resampling = 1; + cparams.ec_resampling = 1; + // The DC frame will have alpha=0. Don't erase its contents. + cparams.keep_invisible = Override::kOn; + JXL_ASSERT(cparams.progressive_dc > 0); + cparams.progressive_dc--; + // Use kVarDCT in max_error_mode for intermediate progressive DC, + // and kModular for the smallest DC (first in the bitstream) + if (cparams.progressive_dc == 0) { + cparams.modular_mode = true; + cparams.speed_tier = + SpeedTier(std::max(static_cast(SpeedTier::kTortoise), + static_cast(cparams.speed_tier) - 1)); + cparams.butteraugli_distance = + std::max(kMinButteraugliDistance, + enc_state->cparams.butteraugli_distance * 0.02f); + } else { + cparams.max_error_mode = true; + for (size_t c = 0; c < 3; c++) { + cparams.max_error[c] = shared.quantizer.MulDC()[c]; + } + // Guess a distance that produces good initial results. + cparams.butteraugli_distance = + std::max(kMinButteraugliDistance, + enc_state->cparams.butteraugli_distance * 0.1f); + } + ImageBundle ib(&shared.metadata->m); + // This is a lie - dc is in XYB + // (but EncodeFrame will skip RGB->XYB conversion anyway) + ib.SetFromImage( + std::move(dc), + ColorEncoding::LinearSRGB(shared.metadata->m.color_encoding.IsGray())); + if (!ib.metadata()->extra_channel_info.empty()) { + // Add dummy extra channels to the patch image: dc_level frames do not yet + // support extra channels, but the codec expects that the amount of extra + // channels in frames matches that in the metadata of the codestream. + std::vector extra_channels; + extra_channels.reserve(ib.metadata()->extra_channel_info.size()); + for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) { + extra_channels.emplace_back(ib.xsize(), ib.ysize()); + // Must initialize the image with data to not affect blending with + // uninitialized memory. + // TODO(lode): dc_level must copy and use the real extra channels + // instead. + ZeroFillImage(&extra_channels.back()); + } + ib.SetExtraChannels(std::move(extra_channels)); + } + std::unique_ptr state = + jxl::make_unique(); + + auto special_frame = std::unique_ptr(new BitWriter()); + FrameInfo dc_frame_info; + dc_frame_info.frame_type = FrameType::kDCFrame; + dc_frame_info.dc_level = shared.frame_header.dc_level + 1; + dc_frame_info.ib_needs_color_transform = false; + dc_frame_info.save_before_color_transform = true; // Implicitly true + AuxOut dc_aux_out; + JXL_CHECK(EncodeFrame(cparams, dc_frame_info, shared.metadata, ib, + state.get(), cms, pool, special_frame.get(), + aux_out ? &dc_aux_out : nullptr)); + if (aux_out) { + for (const auto& l : dc_aux_out.layers) { + aux_out->layers[kLayerDC].Assimilate(l); + } + } + const Span encoded = special_frame->GetSpan(); + enc_state->special_frames.emplace_back(std::move(special_frame)); + + ImageBundle decoded(&shared.metadata->m); + std::unique_ptr dec_state = + jxl::make_unique(); + JXL_CHECK( + dec_state->output_encoding_info.SetFromMetadata(*shared.metadata)); + const uint8_t* frame_start = encoded.data(); + size_t encoded_size = encoded.size(); + for (int i = 0; i <= cparams.progressive_dc; ++i) { + JXL_CHECK(DecodeFrame(dec_state.get(), pool, frame_start, encoded_size, + &decoded, *shared.metadata)); + frame_start += decoded.decoded_bytes(); + encoded_size -= decoded.decoded_bytes(); + } + // TODO(lode): shared.frame_header.dc_level should be equal to + // dec_state.shared->frame_header.dc_level - 1 here, since above we set + // dc_frame_info.dc_level = shared.frame_header.dc_level + 1, and + // dc_frame_info.dc_level is used by EncodeFrame. However, if EncodeFrame + // outputs multiple frames, this assumption could be wrong. + const Image3F& dc_frame = + dec_state->shared->dc_frames[shared.frame_header.dc_level]; + shared.dc_storage = Image3F(dc_frame.xsize(), dc_frame.ysize()); + CopyImageTo(dc_frame, &shared.dc_storage); + ZeroFillImage(&shared.quant_dc); + shared.dc = &shared.dc_storage; + JXL_CHECK(encoded_size == 0); + } else { + auto compute_dc_coeffs = [&](int group_index, int /* thread */) { + modular_frame_encoder->AddVarDCTDC( + dc, group_index, enc_state->cparams.speed_tier < SpeedTier::kFalcon, + enc_state, /*jpeg_transcode=*/false); + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups, + ThreadPool::NoInit, compute_dc_coeffs, + "Compute DC coeffs")); + // TODO(veluca): this is only useful in tests and if inspection is enabled. + if (!(shared.frame_header.flags & FrameHeader::kSkipAdaptiveDCSmoothing)) { + AdaptiveDCSmoothing(shared.quantizer.MulDC(), &shared.dc_storage, pool); + } + } + auto compute_ac_meta = [&](int group_index, int /* thread */) { + modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/false, + enc_state); + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, shared.frame_dim.num_dc_groups, + ThreadPool::NoInit, compute_ac_meta, + "Compute AC Metadata")); + + return true; +} + +void EncCache::InitOnce() { + if (num_nzeroes.xsize() == 0) { + num_nzeroes = Image3I(kGroupDimInBlocks, kGroupDimInBlocks); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cache.h b/third-party/libjxl/libjxl/lib/jxl/enc_cache.h new file mode 100644 index 0000000000..6c7870ba00 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_cache.h @@ -0,0 +1,93 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_CACHE_H_ +#define LIB_JXL_ENC_CACHE_H_ + +#include +#include + +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_heuristics.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_progressive_split.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/passes_state.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { + +struct AuxOut; + +// Contains encoder state. +struct PassesEncoderState { + PassesSharedState shared; + + ImageF initial_quant_field; // Invalid in Falcon mode. + ImageF initial_quant_masking; // Invalid in Falcon mode. + + // Per-pass DCT coefficients for the image. One row per group. + std::vector> coeffs; + + // Raw data for special (reference+DC) frames. + std::vector> special_frames; + + // For splitting into passes. + ProgressiveSplitter progressive_splitter; + + CompressParams cparams; + + struct PassData { + std::vector> ac_tokens; + std::vector context_map; + EntropyEncodingData codes; + }; + + std::vector passes; + std::vector histogram_idx; + + // Coefficient orders that are non-default. + std::vector used_orders; + + // Multiplier to be applied to the quant matrices of the x channel. + float x_qm_multiplier = 1.0f; + float b_qm_multiplier = 1.0f; + + // Heuristics to be used by the encoder. + std::unique_ptr heuristics = + make_unique(); +}; + +// Initialize per-frame information. +class ModularFrameEncoder; +Status InitializePassesEncoder(const Image3F& opsin, const JxlCmsInterface& cms, + ThreadPool* pool, + PassesEncoderState* passes_enc_state, + ModularFrameEncoder* modular_frame_encoder, + AuxOut* aux_out); + +// Working area for ComputeCoefficients (per-group!) +struct EncCache { + // Allocates memory when first called, shrinks images to current group size. + void InitOnce(); + + // TokenizeCoefficients + Image3I num_nzeroes; +}; + +} // namespace jxl + +#endif // LIB_JXL_ENC_CACHE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.cc b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.cc new file mode 100644 index 0000000000..4ed85be536 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.cc @@ -0,0 +1,408 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_chroma_from_luma.h" + +#include +#include + +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_chroma_from_luma.cc" +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_transforms-inl.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_transforms-inl.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/quantizer.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Ge; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::Lt; + +static HWY_FULL(float) df; + +struct CFLFunction { + static constexpr float kCoeff = 1.f / 3; + static constexpr float kThres = 100.0f; + static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor; + CFLFunction(const float* values_m, const float* values_s, size_t num, + float base, float distance_mul) + : values_m(values_m), + values_s(values_s), + num(num), + base(base), + distance_mul(distance_mul) {} + + // Returns f'(x), where f is 1/3 * sum ((|color residual| + 1)^2-1) + + // distance_mul * x^2 * num. + float Compute(float x, float eps, float* fpeps, float* fmeps) const { + float first_derivative = 2 * distance_mul * num * x; + float first_derivative_peps = 2 * distance_mul * num * (x + eps); + float first_derivative_meps = 2 * distance_mul * num * (x - eps); + + const auto inv_color_factor = Set(df, kInvColorFactor); + const auto thres = Set(df, kThres); + const auto coeffx2 = Set(df, kCoeff * 2.0f); + const auto one = Set(df, 1.0f); + const auto zero = Set(df, 0.0f); + const auto base_v = Set(df, base); + const auto x_v = Set(df, x); + const auto xpe_v = Set(df, x + eps); + const auto xme_v = Set(df, x - eps); + auto fd_v = Zero(df); + auto fdpe_v = Zero(df); + auto fdme_v = Zero(df); + JXL_ASSERT(num % Lanes(df) == 0); + + for (size_t i = 0; i < num; i += Lanes(df)) { + // color residual = ax + b + const auto a = Mul(inv_color_factor, Load(df, values_m + i)); + const auto b = + Sub(Mul(base_v, Load(df, values_m + i)), Load(df, values_s + i)); + const auto v = MulAdd(a, x_v, b); + const auto vpe = MulAdd(a, xpe_v, b); + const auto vme = MulAdd(a, xme_v, b); + const auto av = Abs(v); + const auto avpe = Abs(vpe); + const auto avme = Abs(vme); + const auto acoeffx2 = Mul(coeffx2, a); + auto d = Mul(acoeffx2, Add(av, one)); + auto dpe = Mul(acoeffx2, Add(avpe, one)); + auto dme = Mul(acoeffx2, Add(avme, one)); + d = IfThenElse(Lt(v, zero), Sub(zero, d), d); + dpe = IfThenElse(Lt(vpe, zero), Sub(zero, dpe), dpe); + dme = IfThenElse(Lt(vme, zero), Sub(zero, dme), dme); + const auto above = Ge(av, thres); + // TODO(eustas): use IfThenElseZero + fd_v = Add(fd_v, IfThenElse(above, zero, d)); + fdpe_v = Add(fdpe_v, IfThenElse(above, zero, dpe)); + fdme_v = Add(fdme_v, IfThenElse(above, zero, dme)); + } + + *fpeps = first_derivative_peps + GetLane(SumOfLanes(df, fdpe_v)); + *fmeps = first_derivative_meps + GetLane(SumOfLanes(df, fdme_v)); + return first_derivative + GetLane(SumOfLanes(df, fd_v)); + } + + const float* JXL_RESTRICT values_m; + const float* JXL_RESTRICT values_s; + size_t num; + float base; + float distance_mul; +}; + +// Chroma-from-luma search, values_m will have luma -- and values_s chroma. +int32_t FindBestMultiplier(const float* values_m, const float* values_s, + size_t num, float base, float distance_mul, + bool fast) { + if (num == 0) { + return 0; + } + float x; + if (fast) { + static constexpr float kInvColorFactor = 1.0f / kDefaultColorFactor; + auto ca = Zero(df); + auto cb = Zero(df); + const auto inv_color_factor = Set(df, kInvColorFactor); + const auto base_v = Set(df, base); + for (size_t i = 0; i < num; i += Lanes(df)) { + // color residual = ax + b + const auto a = Mul(inv_color_factor, Load(df, values_m + i)); + const auto b = + Sub(Mul(base_v, Load(df, values_m + i)), Load(df, values_s + i)); + ca = MulAdd(a, a, ca); + cb = MulAdd(a, b, cb); + } + // + distance_mul * x^2 * num + x = -GetLane(SumOfLanes(df, cb)) / + (GetLane(SumOfLanes(df, ca)) + num * distance_mul * 0.5f); + } else { + constexpr float eps = 100; + constexpr float kClamp = 20.0f; + CFLFunction fn(values_m, values_s, num, base, distance_mul); + x = 0; + // Up to 20 Newton iterations, with approximate derivatives. + // Derivatives are approximate due to the high amount of noise in the exact + // derivatives. + for (size_t i = 0; i < 20; i++) { + float dfpeps, dfmeps; + float df = fn.Compute(x, eps, &dfpeps, &dfmeps); + float ddf = (dfpeps - dfmeps) / (2 * eps); + float kExperimentalInsignificantStabilizer = 0.85; + float step = df / (ddf + kExperimentalInsignificantStabilizer); + x -= std::min(kClamp, std::max(-kClamp, step)); + if (std::abs(step) < 3e-3) break; + } + } + // CFL seems to be tricky for larger transforms for HF components + // close to zero. This heuristic brings the solutions closer to zero + // and reduces red-green oscillations. + float towards_zero = 2.6; + if (x >= towards_zero) { + x -= towards_zero; + } else if (x <= -towards_zero) { + x += towards_zero; + } else { + x = 0; + } + return std::max(-128.0f, std::min(127.0f, roundf(x))); +} + +void InitDCStorage(size_t num_blocks, ImageF* dc_values) { + // First row: Y channel + // Second row: X channel + // Third row: Y channel + // Fourth row: B channel + *dc_values = ImageF(RoundUpTo(num_blocks, Lanes(df)), 4); + + JXL_ASSERT(dc_values->xsize() != 0); + // Zero-fill the last lanes + for (size_t y = 0; y < 4; y++) { + for (size_t x = dc_values->xsize() - Lanes(df); x < dc_values->xsize(); + x++) { + dc_values->Row(y)[x] = 0; + } + } +} + +void ComputeDC(const ImageF& dc_values, bool fast, int32_t* dc_x, + int32_t* dc_b) { + constexpr float kDistanceMultiplierDC = 1e-5f; + const float* JXL_RESTRICT dc_values_yx = dc_values.Row(0); + const float* JXL_RESTRICT dc_values_x = dc_values.Row(1); + const float* JXL_RESTRICT dc_values_yb = dc_values.Row(2); + const float* JXL_RESTRICT dc_values_b = dc_values.Row(3); + *dc_x = FindBestMultiplier(dc_values_yx, dc_values_x, dc_values.xsize(), 0.0f, + kDistanceMultiplierDC, fast); + *dc_b = FindBestMultiplier(dc_values_yb, dc_values_b, dc_values.xsize(), + kYToBRatio, kDistanceMultiplierDC, fast); +} + +void ComputeTile(const Image3F& opsin, const DequantMatrices& dequant, + const AcStrategyImage* ac_strategy, + const ImageI* raw_quant_field, const Quantizer* quantizer, + const Rect& r, bool fast, bool use_dct8, ImageSB* map_x, + ImageSB* map_b, ImageF* dc_values, float* mem) { + static_assert(kEncTileDimInBlocks == kColorTileDimInBlocks, + "Invalid color tile dim"); + size_t xsize_blocks = opsin.xsize() / kBlockDim; + constexpr float kDistanceMultiplierAC = 1e-9f; + + const size_t y0 = r.y0(); + const size_t x0 = r.x0(); + const size_t x1 = r.x0() + r.xsize(); + const size_t y1 = r.y0() + r.ysize(); + + int ty = y0 / kColorTileDimInBlocks; + int tx = x0 / kColorTileDimInBlocks; + + int8_t* JXL_RESTRICT row_out_x = map_x->Row(ty); + int8_t* JXL_RESTRICT row_out_b = map_b->Row(ty); + + float* JXL_RESTRICT dc_values_yx = dc_values->Row(0); + float* JXL_RESTRICT dc_values_x = dc_values->Row(1); + float* JXL_RESTRICT dc_values_yb = dc_values->Row(2); + float* JXL_RESTRICT dc_values_b = dc_values->Row(3); + + // All are aligned. + float* HWY_RESTRICT block_y = mem; + float* HWY_RESTRICT block_x = block_y + AcStrategy::kMaxCoeffArea; + float* HWY_RESTRICT block_b = block_x + AcStrategy::kMaxCoeffArea; + float* HWY_RESTRICT coeffs_yx = block_b + AcStrategy::kMaxCoeffArea; + float* HWY_RESTRICT coeffs_x = coeffs_yx + kColorTileDim * kColorTileDim; + float* HWY_RESTRICT coeffs_yb = coeffs_x + kColorTileDim * kColorTileDim; + float* HWY_RESTRICT coeffs_b = coeffs_yb + kColorTileDim * kColorTileDim; + float* HWY_RESTRICT scratch_space = coeffs_b + kColorTileDim * kColorTileDim; + JXL_DASSERT(scratch_space + 2 * AcStrategy::kMaxCoeffArea == + block_y + CfLHeuristics::kItemsPerThread); + + // Small (~256 bytes each) + HWY_ALIGN_MAX float + dc_y[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {}; + HWY_ALIGN_MAX float + dc_x[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {}; + HWY_ALIGN_MAX float + dc_b[AcStrategy::kMaxCoeffBlocks * AcStrategy::kMaxCoeffBlocks] = {}; + size_t num_ac = 0; + + for (size_t y = y0; y < y1; ++y) { + const float* JXL_RESTRICT row_y = opsin.ConstPlaneRow(1, y * kBlockDim); + const float* JXL_RESTRICT row_x = opsin.ConstPlaneRow(0, y * kBlockDim); + const float* JXL_RESTRICT row_b = opsin.ConstPlaneRow(2, y * kBlockDim); + size_t stride = opsin.PixelsPerRow(); + + for (size_t x = x0; x < x1; x++) { + AcStrategy acs = use_dct8 + ? AcStrategy::FromRawStrategy(AcStrategy::Type::DCT) + : ac_strategy->ConstRow(y)[x]; + if (!acs.IsFirstBlock()) continue; + size_t xs = acs.covered_blocks_x(); + TransformFromPixels(acs.Strategy(), row_y + x * kBlockDim, stride, + block_y, scratch_space); + DCFromLowestFrequencies(acs.Strategy(), block_y, dc_y, xs); + TransformFromPixels(acs.Strategy(), row_x + x * kBlockDim, stride, + block_x, scratch_space); + DCFromLowestFrequencies(acs.Strategy(), block_x, dc_x, xs); + TransformFromPixels(acs.Strategy(), row_b + x * kBlockDim, stride, + block_b, scratch_space); + DCFromLowestFrequencies(acs.Strategy(), block_b, dc_b, xs); + const float* const JXL_RESTRICT qm_x = + dequant.InvMatrix(acs.Strategy(), 0); + const float* const JXL_RESTRICT qm_b = + dequant.InvMatrix(acs.Strategy(), 2); + float q_dc_x = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(0); + float q_dc_b = use_dct8 ? 1 : 1.0f / quantizer->GetInvDcStep(2); + + // Copy DCs in dc_values. + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < xs; ix++) { + dc_values_yx[(iy + y) * xsize_blocks + ix + x] = + dc_y[iy * xs + ix] * q_dc_x; + dc_values_x[(iy + y) * xsize_blocks + ix + x] = + dc_x[iy * xs + ix] * q_dc_x; + dc_values_yb[(iy + y) * xsize_blocks + ix + x] = + dc_y[iy * xs + ix] * q_dc_b; + dc_values_b[(iy + y) * xsize_blocks + ix + x] = + dc_b[iy * xs + ix] * q_dc_b; + } + } + + // Do not use this block for computing AC CfL. + if (acs.covered_blocks_x() + x0 > x1 || + acs.covered_blocks_y() + y0 > y1) { + continue; + } + + // Copy AC coefficients in the local block. The order in which + // coefficients get stored does not matter. + size_t cx = acs.covered_blocks_x(); + size_t cy = acs.covered_blocks_y(); + CoefficientLayout(&cy, &cx); + // Zero out LFs. This introduces terms in the optimization loop that + // don't affect the result, as they are all 0, but allow for simpler + // SIMDfication. + for (size_t iy = 0; iy < cy; iy++) { + for (size_t ix = 0; ix < cx; ix++) { + block_y[cx * kBlockDim * iy + ix] = 0; + block_x[cx * kBlockDim * iy + ix] = 0; + block_b[cx * kBlockDim * iy + ix] = 0; + } + } + // Unclear why this is like it is. (This works slightly better + // than the previous approach which was also a hack.) + const float qq = + (raw_quant_field == nullptr) ? 1.0f : raw_quant_field->Row(y)[x]; + // Experimentally values 128-130 seem best -- I don't know why we + // need this multiplier. + const float kStrangeMultiplier = 128; + float q = use_dct8 ? 1 : quantizer->Scale() * kStrangeMultiplier * qq; + const auto qv = Set(df, q); + for (size_t i = 0; i < cx * cy * 64; i += Lanes(df)) { + const auto b_y = Load(df, block_y + i); + const auto b_x = Load(df, block_x + i); + const auto b_b = Load(df, block_b + i); + const auto qqm_x = Mul(qv, Load(df, qm_x + i)); + const auto qqm_b = Mul(qv, Load(df, qm_b + i)); + Store(Mul(b_y, qqm_x), df, coeffs_yx + num_ac); + Store(Mul(b_x, qqm_x), df, coeffs_x + num_ac); + Store(Mul(b_y, qqm_b), df, coeffs_yb + num_ac); + Store(Mul(b_b, qqm_b), df, coeffs_b + num_ac); + num_ac += Lanes(df); + } + } + } + JXL_CHECK(num_ac % Lanes(df) == 0); + row_out_x[tx] = FindBestMultiplier(coeffs_yx, coeffs_x, num_ac, 0.0f, + kDistanceMultiplierAC, fast); + row_out_b[tx] = FindBestMultiplier(coeffs_yb, coeffs_b, num_ac, kYToBRatio, + kDistanceMultiplierAC, fast); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(InitDCStorage); +HWY_EXPORT(ComputeDC); +HWY_EXPORT(ComputeTile); + +void CfLHeuristics::Init(const Image3F& opsin) { + size_t xsize_blocks = opsin.xsize() / kBlockDim; + size_t ysize_blocks = opsin.ysize() / kBlockDim; + HWY_DYNAMIC_DISPATCH(InitDCStorage) + (xsize_blocks * ysize_blocks, &dc_values); +} + +void CfLHeuristics::ComputeTile(const Rect& r, const Image3F& opsin, + const DequantMatrices& dequant, + const AcStrategyImage* ac_strategy, + const ImageI* raw_quant_field, + const Quantizer* quantizer, bool fast, + size_t thread, ColorCorrelationMap* cmap) { + bool use_dct8 = ac_strategy == nullptr; + HWY_DYNAMIC_DISPATCH(ComputeTile) + (opsin, dequant, ac_strategy, raw_quant_field, quantizer, r, fast, use_dct8, + &cmap->ytox_map, &cmap->ytob_map, &dc_values, + mem.get() + thread * kItemsPerThread); +} + +void CfLHeuristics::ComputeDC(bool fast, ColorCorrelationMap* cmap) { + int32_t ytob_dc = 0; + int32_t ytox_dc = 0; + HWY_DYNAMIC_DISPATCH(ComputeDC)(dc_values, fast, &ytox_dc, &ytob_dc); + cmap->SetYToBDC(ytob_dc); + cmap->SetYToXDC(ytox_dc); +} + +void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer, + size_t layer, AuxOut* aux_out) { + float color_factor = map->GetColorFactor(); + float base_correlation_x = map->GetBaseCorrelationX(); + float base_correlation_b = map->GetBaseCorrelationB(); + int32_t ytox_dc = map->GetYToXDC(); + int32_t ytob_dc = map->GetYToBDC(); + + BitWriter::Allotment allotment(writer, 1 + 2 * kBitsPerByte + 12 + 32); + if (ytox_dc == 0 && ytob_dc == 0 && color_factor == kDefaultColorFactor && + base_correlation_x == 0.0f && base_correlation_b == kYToBRatio) { + writer->Write(1, 1); + allotment.ReclaimAndCharge(writer, layer, aux_out); + return; + } + writer->Write(1, 0); + JXL_CHECK(U32Coder::Write(kColorFactorDist, color_factor, writer)); + JXL_CHECK(F16Coder::Write(base_correlation_x, writer)); + JXL_CHECK(F16Coder::Write(base_correlation_b, writer)); + writer->Write(kBitsPerByte, ytox_dc - std::numeric_limits::min()); + writer->Write(kBitsPerByte, ytob_dc - std::numeric_limits::min()); + allotment.ReclaimAndCharge(writer, layer, aux_out); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.h b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.h new file mode 100644 index 0000000000..899b91b041 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_chroma_from_luma.h @@ -0,0 +1,68 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_CHROMA_FROM_LUMA_H_ +#define LIB_JXL_ENC_CHROMA_FROM_LUMA_H_ + +// Chroma-from-luma, computed using heuristics to determine the best linear +// model for the X and B channels from the Y channel. + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/quant_weights.h" + +namespace jxl { + +struct AuxOut; +class Quantizer; + +void ColorCorrelationMapEncodeDC(ColorCorrelationMap* map, BitWriter* writer, + size_t layer, AuxOut* aux_out); + +struct CfLHeuristics { + void Init(const Image3F& opsin); + + void PrepareForThreads(size_t num_threads) { + mem = hwy::AllocateAligned(num_threads * kItemsPerThread); + } + + void ComputeTile(const Rect& r, const Image3F& opsin, + const DequantMatrices& dequant, + const AcStrategyImage* ac_strategy, + const ImageI* raw_quant_field, const Quantizer* quantizer, + bool fast, size_t thread, ColorCorrelationMap* cmap); + + void ComputeDC(bool fast, ColorCorrelationMap* cmap); + + ImageF dc_values; + hwy::AlignedFreeUniquePtr mem; + + // Working set is too large for stack; allocate dynamically. + constexpr static size_t kItemsPerThread = + AcStrategy::kMaxCoeffArea * 3 // Blocks + + kColorTileDim * kColorTileDim * 4 // AC coeff storage + + AcStrategy::kMaxCoeffArea * 2; // Scratch space +}; + +} // namespace jxl + +#endif // LIB_JXL_ENC_CHROMA_FROM_LUMA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cluster.cc b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.cc new file mode 100644 index 0000000000..e8acb240c9 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.cc @@ -0,0 +1,293 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_cluster.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_cluster.cc" +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/fast_math-inl.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::IfThenZeroElse; + +template +V Entropy(V count, V inv_total, V total) { + const HWY_CAPPED(float, Histogram::kRounding) d; + const auto zero = Set(d, 0.0f); + // TODO(eustas): why (0 - x) instead of Neg(x)? + return IfThenZeroElse( + Eq(count, total), + Sub(zero, Mul(count, FastLog2f(d, Mul(inv_total, count))))); +} + +void HistogramEntropy(const Histogram& a) { + a.entropy_ = 0.0f; + if (a.total_count_ == 0) return; + + const HWY_CAPPED(float, Histogram::kRounding) df; + const HWY_CAPPED(int32_t, Histogram::kRounding) di; + + const auto inv_tot = Set(df, 1.0f / a.total_count_); + auto entropy_lanes = Zero(df); + auto total = Set(df, a.total_count_); + + for (size_t i = 0; i < a.data_.size(); i += Lanes(di)) { + const auto counts = LoadU(di, &a.data_[i]); + entropy_lanes = + Add(entropy_lanes, Entropy(ConvertTo(df, counts), inv_tot, total)); + } + a.entropy_ += GetLane(SumOfLanes(df, entropy_lanes)); +} + +float HistogramDistance(const Histogram& a, const Histogram& b) { + if (a.total_count_ == 0 || b.total_count_ == 0) return 0; + + const HWY_CAPPED(float, Histogram::kRounding) df; + const HWY_CAPPED(int32_t, Histogram::kRounding) di; + + const auto inv_tot = Set(df, 1.0f / (a.total_count_ + b.total_count_)); + auto distance_lanes = Zero(df); + auto total = Set(df, a.total_count_ + b.total_count_); + + for (size_t i = 0; i < std::max(a.data_.size(), b.data_.size()); + i += Lanes(di)) { + const auto a_counts = + a.data_.size() > i ? LoadU(di, &a.data_[i]) : Zero(di); + const auto b_counts = + b.data_.size() > i ? LoadU(di, &b.data_[i]) : Zero(di); + const auto counts = ConvertTo(df, Add(a_counts, b_counts)); + distance_lanes = Add(distance_lanes, Entropy(counts, inv_tot, total)); + } + const float total_distance = GetLane(SumOfLanes(df, distance_lanes)); + return total_distance - a.entropy_ - b.entropy_; +} + +// First step of a k-means clustering with a fancy distance metric. +void FastClusterHistograms(const std::vector& in, + size_t max_histograms, std::vector* out, + std::vector* histogram_symbols) { + out->clear(); + out->reserve(max_histograms); + histogram_symbols->clear(); + histogram_symbols->resize(in.size(), max_histograms); + + std::vector dists(in.size(), std::numeric_limits::max()); + size_t largest_idx = 0; + for (size_t i = 0; i < in.size(); i++) { + if (in[i].total_count_ == 0) { + (*histogram_symbols)[i] = 0; + dists[i] = 0.0f; + continue; + } + HistogramEntropy(in[i]); + if (in[i].total_count_ > in[largest_idx].total_count_) { + largest_idx = i; + } + } + + constexpr float kMinDistanceForDistinct = 48.0f; + while (out->size() < max_histograms) { + (*histogram_symbols)[largest_idx] = out->size(); + out->push_back(in[largest_idx]); + dists[largest_idx] = 0.0f; + largest_idx = 0; + for (size_t i = 0; i < in.size(); i++) { + if (dists[i] == 0.0f) continue; + dists[i] = std::min(HistogramDistance(in[i], out->back()), dists[i]); + if (dists[i] > dists[largest_idx]) largest_idx = i; + } + if (dists[largest_idx] < kMinDistanceForDistinct) break; + } + + for (size_t i = 0; i < in.size(); i++) { + if ((*histogram_symbols)[i] != max_histograms) continue; + size_t best = 0; + float best_dist = HistogramDistance(in[i], (*out)[best]); + for (size_t j = 1; j < out->size(); j++) { + float dist = HistogramDistance(in[i], (*out)[j]); + if (dist < best_dist) { + best = j; + best_dist = dist; + } + } + (*out)[best].AddHistogram(in[i]); + HistogramEntropy((*out)[best]); + (*histogram_symbols)[i] = best; + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(FastClusterHistograms); // Local function +HWY_EXPORT(HistogramEntropy); // Local function + +float Histogram::ShannonEntropy() const { + HWY_DYNAMIC_DISPATCH(HistogramEntropy)(*this); + return entropy_; +} + +namespace { +// ----------------------------------------------------------------------------- +// Histogram refinement + +// Reorder histograms in *out so that the new symbols in *symbols come in +// increasing order. +void HistogramReindex(std::vector* out, + std::vector* symbols) { + std::vector tmp(*out); + std::map new_index; + int next_index = 0; + for (uint32_t symbol : *symbols) { + if (new_index.find(symbol) == new_index.end()) { + new_index[symbol] = next_index; + (*out)[next_index] = tmp[symbol]; + ++next_index; + } + } + out->resize(next_index); + for (uint32_t& symbol : *symbols) { + symbol = new_index[symbol]; + } +} + +} // namespace + +// Clusters similar histograms in 'in' together, the selected histograms are +// placed in 'out', and for each index in 'in', *histogram_symbols will +// indicate which of the 'out' histograms is the best approximation. +void ClusterHistograms(const HistogramParams params, + const std::vector& in, size_t max_histograms, + std::vector* out, + std::vector* histogram_symbols) { + max_histograms = std::min(max_histograms, params.max_histograms); + max_histograms = std::min(max_histograms, in.size()); + if (params.clustering == HistogramParams::ClusteringType::kFastest) { + max_histograms = std::min(max_histograms, static_cast(4)); + } + + HWY_DYNAMIC_DISPATCH(FastClusterHistograms) + (in, max_histograms, out, histogram_symbols); + + if (params.clustering == HistogramParams::ClusteringType::kBest) { + for (size_t i = 0; i < out->size(); i++) { + (*out)[i].entropy_ = + ANSPopulationCost((*out)[i].data_.data(), (*out)[i].data_.size()); + } + uint32_t next_version = 2; + std::vector version(out->size(), 1); + std::vector renumbering(out->size()); + std::iota(renumbering.begin(), renumbering.end(), 0); + + // Try to pair up clusters if doing so reduces the total cost. + + struct HistogramPair { + // validity of a pair: p.version == max(version[i], version[j]) + float cost; + uint32_t first; + uint32_t second; + uint32_t version; + // We use > because priority queues sort in *decreasing* order, but we + // want lower cost elements to appear first. + bool operator<(const HistogramPair& other) const { + return std::make_tuple(cost, first, second, version) > + std::make_tuple(other.cost, other.first, other.second, + other.version); + } + }; + + // Create list of all pairs by increasing merging cost. + std::priority_queue pairs_to_merge; + for (uint32_t i = 0; i < out->size(); i++) { + for (uint32_t j = i + 1; j < out->size(); j++) { + Histogram histo; + histo.AddHistogram((*out)[i]); + histo.AddHistogram((*out)[j]); + float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) - + (*out)[i].entropy_ - (*out)[j].entropy_; + // Avoid enqueueing pairs that are not advantageous to merge. + if (cost >= 0) continue; + pairs_to_merge.push( + HistogramPair{cost, i, j, std::max(version[i], version[j])}); + } + } + + // Merge the best pair to merge, add new pairs that get formed as a + // consequence. + while (!pairs_to_merge.empty()) { + uint32_t first = pairs_to_merge.top().first; + uint32_t second = pairs_to_merge.top().second; + uint32_t ver = pairs_to_merge.top().version; + pairs_to_merge.pop(); + if (ver != std::max(version[first], version[second]) || + version[first] == 0 || version[second] == 0) { + continue; + } + (*out)[first].AddHistogram((*out)[second]); + (*out)[first].entropy_ = ANSPopulationCost((*out)[first].data_.data(), + (*out)[first].data_.size()); + for (size_t i = 0; i < renumbering.size(); i++) { + if (renumbering[i] == second) { + renumbering[i] = first; + } + } + version[second] = 0; + version[first] = next_version++; + for (uint32_t j = 0; j < out->size(); j++) { + if (j == first) continue; + if (version[j] == 0) continue; + Histogram histo; + histo.AddHistogram((*out)[first]); + histo.AddHistogram((*out)[j]); + float cost = ANSPopulationCost(histo.data_.data(), histo.data_.size()) - + (*out)[first].entropy_ - (*out)[j].entropy_; + // Avoid enqueueing pairs that are not advantageous to merge. + if (cost >= 0) continue; + pairs_to_merge.push( + HistogramPair{cost, std::min(first, j), std::max(first, j), + std::max(version[first], version[j])}); + } + } + std::vector reverse_renumbering(out->size(), -1); + size_t num_alive = 0; + for (size_t i = 0; i < out->size(); i++) { + if (version[i] == 0) continue; + (*out)[num_alive++] = (*out)[i]; + reverse_renumbering[i] = num_alive - 1; + } + out->resize(num_alive); + for (size_t i = 0; i < histogram_symbols->size(); i++) { + (*histogram_symbols)[i] = + reverse_renumbering[renumbering[(*histogram_symbols)[i]]]; + } + } + + // Convert the context map to a canonical form. + HistogramReindex(out, histogram_symbols); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_cluster.h b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.h new file mode 100644 index 0000000000..4b062e820c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_cluster.h @@ -0,0 +1,63 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Functions for clustering similar histograms together. + +#ifndef LIB_JXL_ENC_CLUSTER_H_ +#define LIB_JXL_ENC_CLUSTER_H_ + +#include +#include +#include + +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/enc_ans.h" + +namespace jxl { + +struct Histogram { + Histogram() { + total_count_ = 0; + entropy_ = 0.0; + } + void Clear() { + data_.clear(); + total_count_ = 0; + } + void Add(size_t symbol) { + if (data_.size() <= symbol) { + data_.resize(DivCeil(symbol + 1, kRounding) * kRounding); + } + ++data_[symbol]; + ++total_count_; + } + void AddHistogram(const Histogram& other) { + if (other.data_.size() > data_.size()) { + data_.resize(other.data_.size()); + } + for (size_t i = 0; i < other.data_.size(); ++i) { + data_[i] += other.data_[i]; + } + total_count_ += other.total_count_; + } + float PopulationCost() const { + return ANSPopulationCost(data_.data(), data_.size()); + } + float ShannonEntropy() const; + + std::vector data_; + size_t total_count_; + mutable float entropy_; // WARNING: not kept up-to-date. + static constexpr size_t kRounding = 8; +}; + +void ClusterHistograms(HistogramParams params, const std::vector& in, + size_t max_histograms, std::vector* out, + std::vector* histogram_symbols); +} // namespace jxl + +#endif // LIB_JXL_ENC_CLUSTER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.cc b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.cc new file mode 100644 index 0000000000..e994952731 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.cc @@ -0,0 +1,290 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/lehmer_code.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +struct AuxOut; + +std::pair ComputeUsedOrders( + const SpeedTier speed, const AcStrategyImage& ac_strategy, + const Rect& rect) { + // Only uses DCT8 = 0, so bitfield = 1. + if (speed >= SpeedTier::kFalcon) return {1, 1}; + + uint32_t ret = 0; + uint32_t ret_customize = 0; + size_t xsize_blocks = rect.xsize(); + size_t ysize_blocks = rect.ysize(); + // TODO(veluca): precompute when doing DCT. + for (size_t by = 0; by < ysize_blocks; ++by) { + AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); + for (size_t bx = 0; bx < xsize_blocks; ++bx) { + int ord = kStrategyOrder[acs_row[bx].RawStrategy()]; + // Do not customize coefficient orders for blocks bigger than 32x32. + ret |= 1u << ord; + if (ord > 6) { + continue; + } + ret_customize |= 1u << ord; + } + } + // Use default orders for small images. + if (ac_strategy.xsize() < 5 && ac_strategy.ysize() < 5) return {ret, 0}; + return {ret, ret_customize}; +} + +void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs, + const AcStrategyImage& ac_strategy, + const FrameDimensions& frame_dim, uint32_t& used_orders, + uint16_t used_acs, coeff_order_t* JXL_RESTRICT order) { + std::vector num_zeros(kCoeffOrderMaxSize); + // If compressing at high speed and only using 8x8 DCTs, only consider a + // subset of blocks. + double block_fraction = 1.0f; + // TODO(veluca): figure out why sampling blocks if non-8x8s are used makes + // encoding significantly less dense. + if (speed >= SpeedTier::kSquirrel && used_orders == 1) { + block_fraction = 0.5f; + } + // No need to compute number of zero coefficients if all orders are the + // default. + if (used_orders != 0) { + uint64_t threshold = + (std::numeric_limits::max() >> 32) * block_fraction; + uint64_t s[2] = {static_cast(0x94D049BB133111EBull), + static_cast(0xBF58476D1CE4E5B9ull)}; + // Xorshift128+ adapted from xorshift128+-inl.h + auto use_sample = [&]() { + auto s1 = s[0]; + const auto s0 = s[1]; + const auto bits = s1 + s0; // b, c + s[0] = s0; + s1 ^= s1 << 23; + s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); + s[1] = s1; + return (bits >> 32) <= threshold; + }; + + // Count number of zero coefficients, separately for each DCT band. + // TODO(veluca): precompute when doing DCT. + for (size_t group_index = 0; group_index < frame_dim.num_groups; + group_index++) { + const size_t gx = group_index % frame_dim.xsize_groups; + const size_t gy = group_index / frame_dim.xsize_groups; + const Rect rect(gx * kGroupDimInBlocks, gy * kGroupDimInBlocks, + kGroupDimInBlocks, kGroupDimInBlocks, + frame_dim.xsize_blocks, frame_dim.ysize_blocks); + ConstACPtr rows[3]; + ACType type = acs.Type(); + for (size_t c = 0; c < 3; c++) { + rows[c] = acs.PlaneRow(c, group_index, 0); + } + size_t ac_offset = 0; + + // TODO(veluca): SIMDfy. + for (size_t by = 0; by < rect.ysize(); ++by) { + AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); + for (size_t bx = 0; bx < rect.xsize(); ++bx) { + AcStrategy acs = acs_row[bx]; + if (!acs.IsFirstBlock()) continue; + if (!use_sample()) continue; + size_t size = kDCTBlockSize << acs.log2_covered_blocks(); + for (size_t c = 0; c < 3; ++c) { + const size_t order_offset = + CoeffOrderOffset(kStrategyOrder[acs.RawStrategy()], c); + if (type == ACType::k16) { + for (size_t k = 0; k < size; k++) { + bool is_zero = rows[c].ptr16[ac_offset + k] == 0; + num_zeros[order_offset + k] += is_zero ? 1 : 0; + } + } else { + for (size_t k = 0; k < size; k++) { + bool is_zero = rows[c].ptr32[ac_offset + k] == 0; + num_zeros[order_offset + k] += is_zero ? 1 : 0; + } + } + // Ensure LLFs are first in the order. + size_t cx = acs.covered_blocks_x(); + size_t cy = acs.covered_blocks_y(); + CoefficientLayout(&cy, &cx); + for (size_t iy = 0; iy < cy; iy++) { + for (size_t ix = 0; ix < cx; ix++) { + num_zeros[order_offset + iy * kBlockDim * cx + ix] = -1; + } + } + } + ac_offset += size; + } + } + } + } + struct PosAndCount { + uint32_t pos; + uint32_t count; + }; + auto mem = hwy::AllocateAligned(AcStrategy::kMaxCoeffArea); + + std::vector natural_order_buffer; + + uint16_t computed = 0; + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + uint8_t ord = kStrategyOrder[o]; + if (computed & (1 << ord)) continue; + computed |= 1 << ord; + AcStrategy acs = AcStrategy::FromRawStrategy(o); + size_t sz = kDCTBlockSize * acs.covered_blocks_x() * acs.covered_blocks_y(); + + // Do nothing for transforms that don't appear. + if ((1 << ord) & ~used_acs) continue; + + if (natural_order_buffer.size() < sz) natural_order_buffer.resize(sz); + acs.ComputeNaturalCoeffOrder(natural_order_buffer.data()); + + // Ensure natural coefficient order is not permuted if the order is + // not transmitted. + if ((1 << ord) & ~used_orders) { + for (size_t c = 0; c < 3; c++) { + size_t offset = CoeffOrderOffset(ord, c); + JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz); + memcpy(&order[offset], natural_order_buffer.data(), + sz * sizeof(*order)); + } + continue; + } + + bool is_nondefault = false; + for (uint8_t c = 0; c < 3; c++) { + // Apply zig-zag order. + PosAndCount* pos_and_val = mem.get(); + size_t offset = CoeffOrderOffset(ord, c); + JXL_DASSERT(CoeffOrderOffset(ord, c + 1) - offset == sz); + float inv_sqrt_sz = 1.0f / std::sqrt(sz); + for (size_t i = 0; i < sz; ++i) { + size_t pos = natural_order_buffer[i]; + pos_and_val[i].pos = pos; + // We don't care for the exact number -> quantize number of zeros, + // to get less permuted order. + pos_and_val[i].count = num_zeros[offset + pos] * inv_sqrt_sz + 0.1f; + } + + // Stable-sort -> elements with same number of zeros will preserve their + // order. + auto comparator = [](const PosAndCount& a, const PosAndCount& b) -> bool { + return a.count < b.count; + }; + std::stable_sort(pos_and_val, pos_and_val + sz, comparator); + + // Grab indices. + for (size_t i = 0; i < sz; ++i) { + order[offset + i] = pos_and_val[i].pos; + is_nondefault |= natural_order_buffer[i] != pos_and_val[i].pos; + } + } + if (!is_nondefault) { + used_orders &= ~(1 << ord); + } + } +} + +namespace { + +void TokenizePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, + size_t size, std::vector* tokens) { + std::vector lehmer(size); + std::vector temp(size + 1); + ComputeLehmerCode(order, temp.data(), size, lehmer.data()); + size_t end = size; + while (end > skip && lehmer[end - 1] == 0) { + --end; + } + tokens->emplace_back(CoeffOrderContext(size), end - skip); + uint32_t last = 0; + for (size_t i = skip; i < end; ++i) { + tokens->emplace_back(CoeffOrderContext(last), lehmer[i]); + last = lehmer[i]; + } +} + +} // namespace + +void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, + size_t size, BitWriter* writer, int layer, + AuxOut* aux_out) { + std::vector> tokens(1); + TokenizePermutation(order, skip, size, &tokens[0]); + std::vector context_map; + EntropyEncodingData codes; + BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens, + &codes, &context_map, writer, layer, aux_out); + WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out); +} + +namespace { +void EncodeCoeffOrder(const coeff_order_t* JXL_RESTRICT order, AcStrategy acs, + std::vector* tokens, coeff_order_t* order_zigzag, + std::vector& natural_order_lut) { + const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); + const size_t size = kDCTBlockSize * llf; + for (size_t i = 0; i < size; ++i) { + order_zigzag[i] = natural_order_lut[order[i]]; + } + TokenizePermutation(order_zigzag, llf, size, tokens); +} +} // namespace + +void EncodeCoeffOrders(uint16_t used_orders, + const coeff_order_t* JXL_RESTRICT order, + BitWriter* writer, size_t layer, + AuxOut* JXL_RESTRICT aux_out) { + auto mem = hwy::AllocateAligned(AcStrategy::kMaxCoeffArea); + uint16_t computed = 0; + std::vector> tokens(1); + std::vector natural_order_lut; + for (uint8_t o = 0; o < AcStrategy::kNumValidStrategies; ++o) { + uint8_t ord = kStrategyOrder[o]; + if (computed & (1 << ord)) continue; + computed |= 1 << ord; + if ((used_orders & (1 << ord)) == 0) continue; + AcStrategy acs = AcStrategy::FromRawStrategy(o); + const size_t llf = acs.covered_blocks_x() * acs.covered_blocks_y(); + const size_t size = kDCTBlockSize * llf; + if (natural_order_lut.size() < size) natural_order_lut.resize(size); + acs.ComputeNaturalCoeffOrderLut(natural_order_lut.data()); + for (size_t c = 0; c < 3; c++) { + EncodeCoeffOrder(&order[CoeffOrderOffset(ord, c)], acs, &tokens[0], + mem.get(), natural_order_lut); + } + } + // Do not write anything if no order is used. + if (used_orders != 0) { + std::vector context_map; + EntropyEncodingData codes; + BuildAndEncodeHistograms(HistogramParams(), kPermutationContexts, tokens, + &codes, &context_map, writer, layer, aux_out); + WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.h b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.h new file mode 100644 index 0000000000..3a43f4f986 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_coeff_order.h @@ -0,0 +1,54 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_COEFF_ORDER_H_ +#define LIB_JXL_ENC_COEFF_ORDER_H_ + +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_params.h" + +namespace jxl { + +struct AuxOut; + +// Orders that are actually used in part of image. `rect` is in block units. +// Returns {orders that are used, orders that might be made non-default}. +std::pair ComputeUsedOrders( + SpeedTier speed, const AcStrategyImage& ac_strategy, const Rect& rect); + +// Modify zig-zag order, so that DCT bands with more zeros go later. +// Order of DCT bands with same number of zeros is untouched, so +// permutation will be cheaper to encode. +void ComputeCoeffOrder(SpeedTier speed, const ACImage& acs, + const AcStrategyImage& ac_strategy, + const FrameDimensions& frame_dim, uint32_t& used_orders, + uint16_t used_acs, coeff_order_t* JXL_RESTRICT order); + +void EncodeCoeffOrders(uint16_t used_orders, + const coeff_order_t* JXL_RESTRICT order, + BitWriter* writer, size_t layer, + AuxOut* JXL_RESTRICT aux_out); + +// Encoding/decoding of a single permutation. `size`: number of elements in the +// permutation. `skip`: number of elements to skip from the *beginning* of the +// permutation. +void EncodePermutation(const coeff_order_t* JXL_RESTRICT order, size_t skip, + size_t size, BitWriter* writer, int layer, + AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_COEFF_ORDER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_color_management.cc b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.cc new file mode 100644 index 0000000000..0cb9188bc6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.cc @@ -0,0 +1,1297 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_color_management.h" + +#ifndef JPEGXL_ENABLE_SKCMS +#define JPEGXL_ENABLE_SKCMS 0 +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_color_management.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/matrix_ops.h" +#include "lib/jxl/transfer_functions-inl.h" +#if JPEGXL_ENABLE_SKCMS +#include "lib/jxl/enc_jxl_skcms.h" +#else // JPEGXL_ENABLE_SKCMS +#include "lcms2.h" +#include "lcms2_plugin.h" +#endif // JPEGXL_ENABLE_SKCMS + +#define JXL_CMS_VERBOSE 0 + +// Define these only once. We can't use HWY_ONCE here because it is defined as +// 1 only on the last pass. +#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_CC_ +#define LIB_JXL_ENC_COLOR_MANAGEMENT_CC_ + +namespace jxl { +namespace { +struct JxlCms { +#if JPEGXL_ENABLE_SKCMS + PaddedBytes icc_src, icc_dst; + skcms_ICCProfile profile_src, profile_dst; +#else + void* lcms_transform; +#endif + + // These fields are used when the HLG OOTF or inverse OOTF must be applied. + bool apply_hlg_ootf; + size_t hlg_ootf_num_channels; + // Y component of the primaries. + std::array hlg_ootf_luminances; + + size_t channels_src; + size_t channels_dst; + ImageF buf_src; + ImageF buf_dst; + float intensity_target; + bool skip_lcms = false; + ExtraTF preprocess = ExtraTF::kNone; + ExtraTF postprocess = ExtraTF::kNone; +}; + +Status ApplyHlgOotf(JxlCms* t, float* JXL_RESTRICT buf, size_t xsize, + bool forward); +} // namespace +} // namespace jxl + +#endif // LIB_JXL_ENC_COLOR_MANAGEMENT_CC_ + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +#if JXL_CMS_VERBOSE >= 2 +const size_t kX = 0; // pixel index, multiplied by 3 for RGB +#endif + +// xform_src = UndoGammaCompression(buf_src). +Status BeforeTransform(JxlCms* t, const float* buf_src, float* xform_src, + size_t buf_size) { + switch (t->preprocess) { + case ExtraTF::kNone: + JXL_DASSERT(false); // unreachable + break; + + case ExtraTF::kPQ: { + // By default, PQ content has an intensity target of 10000, stored + // exactly. + HWY_FULL(float) df; + const auto multiplier = Set(df, t->intensity_target == 10000.f + ? 1.0f + : 10000.f / t->intensity_target); + for (size_t i = 0; i < buf_size; i += Lanes(df)) { + const auto val = Load(df, buf_src + i); + const auto result = + Mul(multiplier, TF_PQ().DisplayFromEncoded(df, val)); + Store(result, df, xform_src + i); + } +#if JXL_CMS_VERBOSE >= 2 + printf("pre in %.4f %.4f %.4f undoPQ %.4f %.4f %.4f\n", buf_src[3 * kX], + buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX], + xform_src[3 * kX + 1], xform_src[3 * kX + 2]); +#endif + break; + } + + case ExtraTF::kHLG: + for (size_t i = 0; i < buf_size; ++i) { + xform_src[i] = static_cast( + TF_HLG().DisplayFromEncoded(static_cast(buf_src[i]))); + } + if (t->apply_hlg_ootf) { + JXL_RETURN_IF_ERROR( + ApplyHlgOotf(t, xform_src, buf_size, /*forward=*/true)); + } +#if JXL_CMS_VERBOSE >= 2 + printf("pre in %.4f %.4f %.4f undoHLG %.4f %.4f %.4f\n", buf_src[3 * kX], + buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX], + xform_src[3 * kX + 1], xform_src[3 * kX + 2]); +#endif + break; + + case ExtraTF::kSRGB: + HWY_FULL(float) df; + for (size_t i = 0; i < buf_size; i += Lanes(df)) { + const auto val = Load(df, buf_src + i); + const auto result = TF_SRGB().DisplayFromEncoded(val); + Store(result, df, xform_src + i); + } +#if JXL_CMS_VERBOSE >= 2 + printf("pre in %.4f %.4f %.4f undoSRGB %.4f %.4f %.4f\n", buf_src[3 * kX], + buf_src[3 * kX + 1], buf_src[3 * kX + 2], xform_src[3 * kX], + xform_src[3 * kX + 1], xform_src[3 * kX + 2]); +#endif + break; + } + return true; +} + +// Applies gamma compression in-place. +Status AfterTransform(JxlCms* t, float* JXL_RESTRICT buf_dst, size_t buf_size) { + switch (t->postprocess) { + case ExtraTF::kNone: + JXL_DASSERT(false); // unreachable + break; + case ExtraTF::kPQ: { + HWY_FULL(float) df; + const auto multiplier = + Set(df, t->intensity_target == 10000.f ? 1.0f + : t->intensity_target * 1e-4f); + for (size_t i = 0; i < buf_size; i += Lanes(df)) { + const auto val = Load(df, buf_dst + i); + const auto result = + TF_PQ().EncodedFromDisplay(df, Mul(multiplier, val)); + Store(result, df, buf_dst + i); + } +#if JXL_CMS_VERBOSE >= 2 + printf("after PQ enc %.4f %.4f %.4f\n", buf_dst[3 * kX], + buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]); +#endif + break; + } + case ExtraTF::kHLG: + if (t->apply_hlg_ootf) { + JXL_RETURN_IF_ERROR( + ApplyHlgOotf(t, buf_dst, buf_size, /*forward=*/false)); + } + for (size_t i = 0; i < buf_size; ++i) { + buf_dst[i] = static_cast( + TF_HLG().EncodedFromDisplay(static_cast(buf_dst[i]))); + } +#if JXL_CMS_VERBOSE >= 2 + printf("after HLG enc %.4f %.4f %.4f\n", buf_dst[3 * kX], + buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]); +#endif + break; + case ExtraTF::kSRGB: + HWY_FULL(float) df; + for (size_t i = 0; i < buf_size; i += Lanes(df)) { + const auto val = Load(df, buf_dst + i); + const auto result = + TF_SRGB().EncodedFromDisplay(HWY_FULL(float)(), val); + Store(result, df, buf_dst + i); + } +#if JXL_CMS_VERBOSE >= 2 + printf("after SRGB enc %.4f %.4f %.4f\n", buf_dst[3 * kX], + buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]); +#endif + break; + } + return true; +} + +Status DoColorSpaceTransform(void* cms_data, const size_t thread, + const float* buf_src, float* buf_dst, + size_t xsize) { + // No lock needed. + JxlCms* t = reinterpret_cast(cms_data); + + const float* xform_src = buf_src; // Read-only. + if (t->preprocess != ExtraTF::kNone) { + float* mutable_xform_src = t->buf_src.Row(thread); // Writable buffer. + JXL_RETURN_IF_ERROR(BeforeTransform(t, buf_src, mutable_xform_src, + xsize * t->channels_src)); + xform_src = mutable_xform_src; + } + +#if JPEGXL_ENABLE_SKCMS + if (t->channels_src == 1 && !t->skip_lcms) { + // Expand from 1 to 3 channels, starting from the end in case + // xform_src == t->buf_src.Row(thread). + float* mutable_xform_src = t->buf_src.Row(thread); + for (size_t i = 0; i < xsize; ++i) { + const size_t x = xsize - i - 1; + mutable_xform_src[x * 3] = mutable_xform_src[x * 3 + 1] = + mutable_xform_src[x * 3 + 2] = xform_src[x]; + } + xform_src = mutable_xform_src; + } +#else + if (t->channels_src == 4 && !t->skip_lcms) { + // LCMS does CMYK in a weird way: 0 = white, 100 = max ink + float* mutable_xform_src = t->buf_src.Row(thread); + for (size_t x = 0; x < xsize * 4; ++x) { + mutable_xform_src[x] = 100.f - 100.f * mutable_xform_src[x]; + } + xform_src = mutable_xform_src; + } +#endif + +#if JXL_CMS_VERBOSE >= 2 + // Save inputs for printing before in-place transforms overwrite them. + const float in0 = xform_src[3 * kX + 0]; + const float in1 = xform_src[3 * kX + 1]; + const float in2 = xform_src[3 * kX + 2]; +#endif + + if (t->skip_lcms) { + if (buf_dst != xform_src) { + memcpy(buf_dst, xform_src, xsize * t->channels_src * sizeof(*buf_dst)); + } // else: in-place, no need to copy + } else { +#if JPEGXL_ENABLE_SKCMS + JXL_CHECK( + skcms_Transform(xform_src, + (t->channels_src == 4 ? skcms_PixelFormat_RGBA_ffff + : skcms_PixelFormat_RGB_fff), + skcms_AlphaFormat_Opaque, &t->profile_src, buf_dst, + skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque, + &t->profile_dst, xsize)); +#else // JPEGXL_ENABLE_SKCMS + cmsDoTransform(t->lcms_transform, xform_src, buf_dst, + static_cast(xsize)); +#endif // JPEGXL_ENABLE_SKCMS + } +#if JXL_CMS_VERBOSE >= 2 + printf("xform skip%d: %.4f %.4f %.4f (%p) -> (%p) %.4f %.4f %.4f\n", + t->skip_lcms, in0, in1, in2, xform_src, buf_dst, buf_dst[3 * kX], + buf_dst[3 * kX + 1], buf_dst[3 * kX + 2]); +#endif + +#if JPEGXL_ENABLE_SKCMS + if (t->channels_dst == 1 && !t->skip_lcms) { + // Contract back from 3 to 1 channel, this time forward. + float* grayscale_buf_dst = t->buf_dst.Row(thread); + for (size_t x = 0; x < xsize; ++x) { + grayscale_buf_dst[x] = buf_dst[x * 3]; + } + buf_dst = grayscale_buf_dst; + } +#endif + + if (t->postprocess != ExtraTF::kNone) { + JXL_RETURN_IF_ERROR(AfterTransform(t, buf_dst, xsize * t->channels_dst)); + } + return true; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +namespace { + +HWY_EXPORT(DoColorSpaceTransform); +int DoColorSpaceTransform(void* t, size_t thread, const float* buf_src, + float* buf_dst, size_t xsize) { + return HWY_DYNAMIC_DISPATCH(DoColorSpaceTransform)(t, thread, buf_src, + buf_dst, xsize); +} + +// Define to 1 on OS X as a workaround for older LCMS lacking MD5. +#define JXL_CMS_OLD_VERSION 0 + +#if JPEGXL_ENABLE_SKCMS + +JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const float XYZ[3]) { + const float factor = 1.f / (XYZ[0] + XYZ[1] + XYZ[2]); + CIExy xy; + xy.x = XYZ[0] * factor; + xy.y = XYZ[1] * factor; + return xy; +} + +#else // JPEGXL_ENABLE_SKCMS +// (LCMS interface requires xyY but we omit the Y for white points/primaries.) + +JXL_MUST_USE_RESULT CIExy CIExyFromxyY(const cmsCIExyY& xyY) { + CIExy xy; + xy.x = xyY.x; + xy.y = xyY.y; + return xy; +} + +JXL_MUST_USE_RESULT CIExy CIExyFromXYZ(const cmsCIEXYZ& XYZ) { + cmsCIExyY xyY; + cmsXYZ2xyY(/*Dest=*/&xyY, /*Source=*/&XYZ); + return CIExyFromxyY(xyY); +} + +JXL_MUST_USE_RESULT cmsCIEXYZ D50_XYZ() { + // Quantized D50 as stored in ICC profiles. + return {0.96420288, 1.0, 0.82490540}; +} + +// RAII + +struct ProfileDeleter { + void operator()(void* p) { cmsCloseProfile(p); } +}; +using Profile = std::unique_ptr; + +struct TransformDeleter { + void operator()(void* p) { cmsDeleteTransform(p); } +}; +using Transform = std::unique_ptr; + +struct CurveDeleter { + void operator()(cmsToneCurve* p) { cmsFreeToneCurve(p); } +}; +using Curve = std::unique_ptr; + +Status CreateProfileXYZ(const cmsContext context, + Profile* JXL_RESTRICT profile) { + profile->reset(cmsCreateXYZProfileTHR(context)); + if (profile->get() == nullptr) return JXL_FAILURE("Failed to create XYZ"); + return true; +} + +#endif // !JPEGXL_ENABLE_SKCMS + +#if JPEGXL_ENABLE_SKCMS +// IMPORTANT: icc must outlive profile. +Status DecodeProfile(const uint8_t* icc, size_t size, + skcms_ICCProfile* const profile) { + if (!skcms_Parse(icc, size, profile)) { + return JXL_FAILURE("Failed to parse ICC profile with %" PRIuS " bytes", + size); + } + return true; +} +#else // JPEGXL_ENABLE_SKCMS +Status DecodeProfile(const cmsContext context, Span icc, + Profile* profile) { + profile->reset(cmsOpenProfileFromMemTHR(context, icc.data(), icc.size())); + if (profile->get() == nullptr) { + return JXL_FAILURE("Failed to decode profile"); + } + + // WARNING: due to the LCMS MD5 issue mentioned above, many existing + // profiles have incorrect MD5, so do not even bother checking them nor + // generating warning clutter. + + return true; +} +#endif // JPEGXL_ENABLE_SKCMS + +#if JPEGXL_ENABLE_SKCMS + +ColorSpace ColorSpaceFromProfile(const skcms_ICCProfile& profile) { + switch (profile.data_color_space) { + case skcms_Signature_RGB: + case skcms_Signature_CMYK: + // spec says CMYK is encoded as RGB (the kBlack extra channel signals that + // it is actually CMYK) + return ColorSpace::kRGB; + case skcms_Signature_Gray: + return ColorSpace::kGray; + default: + return ColorSpace::kUnknown; + } +} + +// vector_out := matmul(matrix, vector_in) +void MatrixProduct(const skcms_Matrix3x3& matrix, const float vector_in[3], + float vector_out[3]) { + for (int i = 0; i < 3; ++i) { + vector_out[i] = 0; + for (int j = 0; j < 3; ++j) { + vector_out[i] += matrix.vals[i][j] * vector_in[j]; + } + } +} + +// Returns white point that was specified when creating the profile. +JXL_MUST_USE_RESULT Status UnadaptedWhitePoint(const skcms_ICCProfile& profile, + CIExy* out) { + float media_white_point_XYZ[3]; + if (!skcms_GetWTPT(&profile, media_white_point_XYZ)) { + return JXL_FAILURE("ICC profile does not contain WhitePoint tag"); + } + skcms_Matrix3x3 CHAD; + if (!skcms_GetCHAD(&profile, &CHAD)) { + // If there is no chromatic adaptation matrix, it means that the white point + // is already unadapted. + *out = CIExyFromXYZ(media_white_point_XYZ); + return true; + } + // Otherwise, it has been adapted to the PCS white point using said matrix, + // and the adaptation needs to be undone. + skcms_Matrix3x3 inverse_CHAD; + if (!skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD)) { + return JXL_FAILURE("Non-invertible ChromaticAdaptation matrix"); + } + float unadapted_white_point_XYZ[3]; + MatrixProduct(inverse_CHAD, media_white_point_XYZ, unadapted_white_point_XYZ); + *out = CIExyFromXYZ(unadapted_white_point_XYZ); + return true; +} + +Status IdentifyPrimaries(const skcms_ICCProfile& profile, + const CIExy& wp_unadapted, ColorEncoding* c) { + if (!c->HasPrimaries()) return true; + + skcms_Matrix3x3 CHAD, inverse_CHAD; + if (skcms_GetCHAD(&profile, &CHAD)) { + JXL_RETURN_IF_ERROR(skcms_Matrix3x3_invert(&CHAD, &inverse_CHAD)); + } else { + static constexpr skcms_Matrix3x3 kLMSFromXYZ = { + {{0.8951, 0.2664, -0.1614}, + {-0.7502, 1.7135, 0.0367}, + {0.0389, -0.0685, 1.0296}}}; + static constexpr skcms_Matrix3x3 kXYZFromLMS = { + {{0.9869929, -0.1470543, 0.1599627}, + {0.4323053, 0.5183603, 0.0492912}, + {-0.0085287, 0.0400428, 0.9684867}}}; + static constexpr float kWpD50XYZ[3] = {0.96420288, 1.0, 0.82490540}; + float wp_unadapted_XYZ[3]; + JXL_RETURN_IF_ERROR(CIEXYZFromWhiteCIExy(wp_unadapted, wp_unadapted_XYZ)); + float wp_D50_LMS[3], wp_unadapted_LMS[3]; + MatrixProduct(kLMSFromXYZ, kWpD50XYZ, wp_D50_LMS); + MatrixProduct(kLMSFromXYZ, wp_unadapted_XYZ, wp_unadapted_LMS); + inverse_CHAD = {{{wp_unadapted_LMS[0] / wp_D50_LMS[0], 0, 0}, + {0, wp_unadapted_LMS[1] / wp_D50_LMS[1], 0}, + {0, 0, wp_unadapted_LMS[2] / wp_D50_LMS[2]}}}; + inverse_CHAD = skcms_Matrix3x3_concat(&kXYZFromLMS, &inverse_CHAD); + inverse_CHAD = skcms_Matrix3x3_concat(&inverse_CHAD, &kLMSFromXYZ); + } + + float XYZ[3]; + PrimariesCIExy primaries; + CIExy* const chromaticities[] = {&primaries.r, &primaries.g, &primaries.b}; + for (int i = 0; i < 3; ++i) { + float RGB[3] = {}; + RGB[i] = 1; + skcms_Transform(RGB, skcms_PixelFormat_RGB_fff, skcms_AlphaFormat_Opaque, + &profile, XYZ, skcms_PixelFormat_RGB_fff, + skcms_AlphaFormat_Opaque, skcms_XYZD50_profile(), 1); + float unadapted_XYZ[3]; + MatrixProduct(inverse_CHAD, XYZ, unadapted_XYZ); + *chromaticities[i] = CIExyFromXYZ(unadapted_XYZ); + } + return c->SetPrimaries(primaries); +} + +void DetectTransferFunction(const skcms_ICCProfile& profile, + ColorEncoding* JXL_RESTRICT c) { + if (c->tf.SetImplicit()) return; + + float gamma[3] = {}; + if (profile.has_trc) { + const auto IsGamma = [](const skcms_TransferFunction& tf) { + return tf.a == 1 && tf.b == 0 && + /* if b and d are zero, it is fine for c not to be */ tf.d == 0 && + tf.e == 0 && tf.f == 0; + }; + for (int i = 0; i < 3; ++i) { + if (profile.trc[i].table_entries == 0 && + IsGamma(profile.trc->parametric)) { + gamma[i] = 1.f / profile.trc->parametric.g; + } else { + skcms_TransferFunction approximate_tf; + float max_error; + if (skcms_ApproximateCurve(&profile.trc[i], &approximate_tf, + &max_error)) { + if (IsGamma(approximate_tf)) { + gamma[i] = 1.f / approximate_tf.g; + } + } + } + } + } + if (gamma[0] != 0 && std::abs(gamma[0] - gamma[1]) < 1e-4f && + std::abs(gamma[1] - gamma[2]) < 1e-4f) { + if (c->tf.SetGamma(gamma[0])) { + skcms_ICCProfile profile_test; + PaddedBytes bytes; + if (MaybeCreateProfile(*c, &bytes) && + DecodeProfile(bytes.data(), bytes.size(), &profile_test) && + skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) { + return; + } + } + } + + for (TransferFunction tf : Values()) { + // Can only create profile from known transfer function. + if (tf == TransferFunction::kUnknown) continue; + + c->tf.SetTransferFunction(tf); + + skcms_ICCProfile profile_test; + PaddedBytes bytes; + if (MaybeCreateProfile(*c, &bytes) && + DecodeProfile(bytes.data(), bytes.size(), &profile_test) && + skcms_ApproximatelyEqualProfiles(&profile, &profile_test)) { + return; + } + } + + c->tf.SetTransferFunction(TransferFunction::kUnknown); +} + +#else // JPEGXL_ENABLE_SKCMS + +uint32_t Type32(const ColorEncoding& c, bool cmyk) { + if (cmyk) return TYPE_CMYK_FLT; + if (c.IsGray()) return TYPE_GRAY_FLT; + return TYPE_RGB_FLT; +} + +uint32_t Type64(const ColorEncoding& c) { + if (c.IsGray()) return TYPE_GRAY_DBL; + return TYPE_RGB_DBL; +} + +ColorSpace ColorSpaceFromProfile(const Profile& profile) { + switch (cmsGetColorSpace(profile.get())) { + case cmsSigRgbData: + case cmsSigCmykData: + return ColorSpace::kRGB; + case cmsSigGrayData: + return ColorSpace::kGray; + default: + return ColorSpace::kUnknown; + } +} + +// "profile1" is pre-decoded to save time in DetectTransferFunction. +Status ProfileEquivalentToICC(const cmsContext context, const Profile& profile1, + const PaddedBytes& icc, const ColorEncoding& c) { + const uint32_t type_src = Type64(c); + + Profile profile2; + JXL_RETURN_IF_ERROR( + DecodeProfile(context, Span(icc), &profile2)); + + Profile profile_xyz; + JXL_RETURN_IF_ERROR(CreateProfileXYZ(context, &profile_xyz)); + + const uint32_t intent = INTENT_RELATIVE_COLORIMETRIC; + const uint32_t flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_BLACKPOINTCOMPENSATION | + cmsFLAGS_HIGHRESPRECALC; + Transform xform1(cmsCreateTransformTHR(context, profile1.get(), type_src, + profile_xyz.get(), TYPE_XYZ_DBL, + intent, flags)); + Transform xform2(cmsCreateTransformTHR(context, profile2.get(), type_src, + profile_xyz.get(), TYPE_XYZ_DBL, + intent, flags)); + if (xform1 == nullptr || xform2 == nullptr) { + return JXL_FAILURE("Failed to create transform"); + } + + double in[3]; + double out1[3]; + double out2[3]; + + // Uniformly spaced samples from very dark to almost fully bright. + const double init = 1E-3; + const double step = 0.2; + + if (c.IsGray()) { + // Finer sampling and replicate each component. + for (in[0] = init; in[0] < 1.0; in[0] += step / 8) { + cmsDoTransform(xform1.get(), in, out1, 1); + cmsDoTransform(xform2.get(), in, out2, 1); + if (!ApproxEq(out1[0], out2[0], 2E-4)) { + return false; + } + } + } else { + for (in[0] = init; in[0] < 1.0; in[0] += step) { + for (in[1] = init; in[1] < 1.0; in[1] += step) { + for (in[2] = init; in[2] < 1.0; in[2] += step) { + cmsDoTransform(xform1.get(), in, out1, 1); + cmsDoTransform(xform2.get(), in, out2, 1); + for (size_t i = 0; i < 3; ++i) { + if (!ApproxEq(out1[i], out2[i], 2E-4)) { + return false; + } + } + } + } + } + } + + return true; +} + +// Returns white point that was specified when creating the profile. +// NOTE: we can't just use cmsSigMediaWhitePointTag because its interpretation +// differs between ICC versions. +JXL_MUST_USE_RESULT cmsCIEXYZ UnadaptedWhitePoint(const cmsContext context, + const Profile& profile, + const ColorEncoding& c) { + const cmsCIEXYZ* white_point = static_cast( + cmsReadTag(profile.get(), cmsSigMediaWhitePointTag)); + if (white_point != nullptr && + cmsReadTag(profile.get(), cmsSigChromaticAdaptationTag) == nullptr) { + // No chromatic adaptation matrix: the white point is already unadapted. + return *white_point; + } + + cmsCIEXYZ XYZ = {1.0, 1.0, 1.0}; + Profile profile_xyz; + if (!CreateProfileXYZ(context, &profile_xyz)) return XYZ; + // Array arguments are one per profile. + cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()}; + // Leave white point unchanged - that is what we're trying to extract. + cmsUInt32Number intents[2] = {INTENT_ABSOLUTE_COLORIMETRIC, + INTENT_ABSOLUTE_COLORIMETRIC}; + cmsBool black_compensation[2] = {0, 0}; + cmsFloat64Number adaption[2] = {0.0, 0.0}; + // Only transforming a single pixel, so skip expensive optimizations. + cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC; + Transform xform(cmsCreateExtendedTransform( + context, 2, profiles, black_compensation, intents, adaption, nullptr, 0, + Type64(c), TYPE_XYZ_DBL, flags)); + if (!xform) return XYZ; // TODO(lode): return error + + // xy are relative, so magnitude does not matter if we ignore output Y. + const cmsFloat64Number in[3] = {1.0, 1.0, 1.0}; + cmsDoTransform(xform.get(), in, &XYZ.X, 1); + return XYZ; +} + +Status IdentifyPrimaries(const cmsContext context, const Profile& profile, + const cmsCIEXYZ& wp_unadapted, ColorEncoding* c) { + if (!c->HasPrimaries()) return true; + if (ColorSpaceFromProfile(profile) == ColorSpace::kUnknown) return true; + + // These were adapted to the profile illuminant before storing in the profile. + const cmsCIEXYZ* adapted_r = static_cast( + cmsReadTag(profile.get(), cmsSigRedColorantTag)); + const cmsCIEXYZ* adapted_g = static_cast( + cmsReadTag(profile.get(), cmsSigGreenColorantTag)); + const cmsCIEXYZ* adapted_b = static_cast( + cmsReadTag(profile.get(), cmsSigBlueColorantTag)); + + cmsCIEXYZ converted_rgb[3]; + if (adapted_r == nullptr || adapted_g == nullptr || adapted_b == nullptr) { + // No colorant tag, determine the XYZ coordinates of the primaries by + // converting from the colorspace. + Profile profile_xyz; + if (!CreateProfileXYZ(context, &profile_xyz)) { + return JXL_FAILURE("Failed to retrieve colorants"); + } + // Array arguments are one per profile. + cmsHPROFILE profiles[2] = {profile.get(), profile_xyz.get()}; + cmsUInt32Number intents[2] = {INTENT_RELATIVE_COLORIMETRIC, + INTENT_RELATIVE_COLORIMETRIC}; + cmsBool black_compensation[2] = {0, 0}; + cmsFloat64Number adaption[2] = {0.0, 0.0}; + // Only transforming three pixels, so skip expensive optimizations. + cmsUInt32Number flags = cmsFLAGS_NOOPTIMIZE | cmsFLAGS_HIGHRESPRECALC; + Transform xform(cmsCreateExtendedTransform( + context, 2, profiles, black_compensation, intents, adaption, nullptr, 0, + Type64(*c), TYPE_XYZ_DBL, flags)); + if (!xform) return JXL_FAILURE("Failed to retrieve colorants"); + + const cmsFloat64Number in[9] = {1.0, 0.0, 0.0, 0.0, 1.0, + 0.0, 0.0, 0.0, 1.0}; + cmsDoTransform(xform.get(), in, &converted_rgb->X, 3); + adapted_r = &converted_rgb[0]; + adapted_g = &converted_rgb[1]; + adapted_b = &converted_rgb[2]; + } + + // TODO(janwas): no longer assume Bradford and D50. + // Undo the chromatic adaptation. + const cmsCIEXYZ d50 = D50_XYZ(); + + cmsCIEXYZ r, g, b; + cmsAdaptToIlluminant(&r, &d50, &wp_unadapted, adapted_r); + cmsAdaptToIlluminant(&g, &d50, &wp_unadapted, adapted_g); + cmsAdaptToIlluminant(&b, &d50, &wp_unadapted, adapted_b); + + const PrimariesCIExy rgb = {CIExyFromXYZ(r), CIExyFromXYZ(g), + CIExyFromXYZ(b)}; + return c->SetPrimaries(rgb); +} + +void DetectTransferFunction(const cmsContext context, const Profile& profile, + ColorEncoding* JXL_RESTRICT c) { + if (c->tf.SetImplicit()) return; + + float gamma = 0; + if (const auto* gray_trc = reinterpret_cast( + cmsReadTag(profile.get(), cmsSigGrayTRCTag))) { + const double estimated_gamma = + cmsEstimateGamma(gray_trc, /*precision=*/1e-4); + if (estimated_gamma > 0) { + gamma = 1. / estimated_gamma; + } + } else { + float rgb_gamma[3] = {}; + int i = 0; + for (const auto tag : + {cmsSigRedTRCTag, cmsSigGreenTRCTag, cmsSigBlueTRCTag}) { + if (const auto* trc = reinterpret_cast( + cmsReadTag(profile.get(), tag))) { + const double estimated_gamma = + cmsEstimateGamma(trc, /*precision=*/1e-4); + if (estimated_gamma > 0) { + rgb_gamma[i] = 1. / estimated_gamma; + } + } + ++i; + } + if (rgb_gamma[0] != 0 && std::abs(rgb_gamma[0] - rgb_gamma[1]) < 1e-4f && + std::abs(rgb_gamma[1] - rgb_gamma[2]) < 1e-4f) { + gamma = rgb_gamma[0]; + } + } + + if (gamma != 0 && c->tf.SetGamma(gamma)) { + PaddedBytes icc_test; + if (MaybeCreateProfile(*c, &icc_test) && + ProfileEquivalentToICC(context, profile, icc_test, *c)) { + return; + } + } + + for (TransferFunction tf : Values()) { + // Can only create profile from known transfer function. + if (tf == TransferFunction::kUnknown) continue; + + c->tf.SetTransferFunction(tf); + + PaddedBytes icc_test; + if (MaybeCreateProfile(*c, &icc_test) && + ProfileEquivalentToICC(context, profile, icc_test, *c)) { + return; + } + } + + c->tf.SetTransferFunction(TransferFunction::kUnknown); +} + +void ErrorHandler(cmsContext context, cmsUInt32Number code, const char* text) { + JXL_WARNING("LCMS error %u: %s", code, text); +} + +// Returns a context for the current thread, creating it if necessary. +cmsContext GetContext() { + static thread_local void* context_; + if (context_ == nullptr) { + context_ = cmsCreateContext(nullptr, nullptr); + JXL_ASSERT(context_ != nullptr); + + cmsSetLogErrorHandlerTHR(static_cast(context_), &ErrorHandler); + } + return static_cast(context_); +} + +#endif // JPEGXL_ENABLE_SKCMS + +Status GetPrimariesLuminances(const ColorEncoding& encoding, + float luminances[3]) { + // Explanation: + // We know that the three primaries must sum to white: + // + // [Xr, Xg, Xb; [1; [Xw; + // Yr, Yg, Yb; × 1; = Yw; + // Zr, Zg, Zb] 1] Zw] + // + // By noting that X = x·(X+Y+Z), Y = y·(X+Y+Z) and Z = z·(X+Y+Z) (note the + // lower case indicating chromaticity), and factoring the totals (X+Y+Z) out + // of the left matrix and into the all-ones vector, we get: + // + // [xr, xg, xb; [Xr + Yr + Zr; [Xw; + // yr, yg, yb; × Xg + Yg + Zg; = Yw; + // zr, zg, zb] Xb + Yb + Zb] Zw] + // + // Which makes it apparent that we can compute those totals as: + // + // [Xr + Yr + Zr; inv([xr, xg, xb; [Xw; + // Xg + Yg + Zg; = yr, yg, yb; × Yw; + // Xb + Yb + Zb] zr, zg, zb]) Zw] + // + // From there, by multiplying each total by its corresponding y, we get Y for + // that primary. + + float white_XYZ[3]; + JXL_RETURN_IF_ERROR( + CIEXYZFromWhiteCIExy(encoding.GetWhitePoint(), white_XYZ)); + + const PrimariesCIExy primaries = encoding.GetPrimaries(); + double chromaticities[3][3] = { + {primaries.r.x, primaries.g.x, primaries.b.x}, + {primaries.r.y, primaries.g.y, primaries.b.y}, + {1 - primaries.r.x - primaries.r.y, 1 - primaries.g.x - primaries.g.y, + 1 - primaries.b.x - primaries.b.y}}; + JXL_RETURN_IF_ERROR(Inv3x3Matrix(&chromaticities[0][0])); + const double ys[3] = {primaries.r.y, primaries.g.y, primaries.b.y}; + for (size_t i = 0; i < 3; ++i) { + luminances[i] = ys[i] * (chromaticities[i][0] * white_XYZ[0] + + chromaticities[i][1] * white_XYZ[1] + + chromaticities[i][2] * white_XYZ[2]); + } + return true; +} + +Status ApplyHlgOotf(JxlCms* t, float* JXL_RESTRICT buf, size_t xsize, + bool forward) { + if (295 <= t->intensity_target && t->intensity_target <= 305) { + // The gamma is approximately 1 so this can essentially be skipped. + return true; + } + float gamma = 1.2f * std::pow(1.111f, std::log2(t->intensity_target * 1e-3f)); + if (!forward) gamma = 1.f / gamma; + + switch (t->hlg_ootf_num_channels) { + case 1: + for (size_t x = 0; x < xsize; ++x) { + buf[x] = std::pow(buf[x], gamma); + } + break; + + case 3: + for (size_t x = 0; x < xsize; x += 3) { + const float luminance = buf[x] * t->hlg_ootf_luminances[0] + + buf[x + 1] * t->hlg_ootf_luminances[1] + + buf[x + 2] * t->hlg_ootf_luminances[2]; + const float ratio = std::pow(luminance, gamma - 1); + if (std::isfinite(ratio)) { + buf[x] *= ratio; + buf[x + 1] *= ratio; + buf[x + 2] *= ratio; + if (forward && gamma < 1) { + // If gamma < 1, the ratio above will be > 1 which can push bright + // saturated highlights out of gamut. There are several possible + // ways to bring them back in-gamut; this one preserves hue and + // saturation at the slight expense of luminance. If !forward, the + // previously-applied forward OOTF with gamma > 1 already pushed + // those highlights down and we are simply putting them back where + // they were so this is not necessary. + const float maximum = + std::max(buf[x], std::max(buf[x + 1], buf[x + 2])); + if (maximum > 1) { + const float normalizer = 1.f / maximum; + buf[x] *= normalizer; + buf[x + 1] *= normalizer; + buf[x + 2] *= normalizer; + } + } + } + } + break; + + default: + return JXL_FAILURE("HLG OOTF not implemented for %" PRIuS " channels", + t->hlg_ootf_num_channels); + } + return true; +} + +bool ApplyCICP(const uint8_t color_primaries, + const uint8_t transfer_characteristics, + const uint8_t matrix_coefficients, const uint8_t full_range, + ColorEncoding* JXL_RESTRICT c) { + if (matrix_coefficients != 0) return false; + if (full_range != 1) return false; + + const auto primaries = static_cast(color_primaries); + const auto tf = static_cast(transfer_characteristics); + if (tf == TransferFunction::kUnknown || !EnumValid(tf)) return false; + if (primaries == Primaries::kCustom || + !(color_primaries == 12 || EnumValid(primaries))) { + return false; + } + c->SetColorSpace(ColorSpace::kRGB); + c->tf.SetTransferFunction(tf); + if (primaries == Primaries::kP3) { + c->white_point = WhitePoint::kDCI; + c->primaries = Primaries::kP3; + } else if (color_primaries == 12) { + c->white_point = WhitePoint::kD65; + c->primaries = Primaries::kP3; + } else { + c->white_point = WhitePoint::kD65; + c->primaries = primaries; + } + return true; +} + +JXL_BOOL JxlCmsSetFieldsFromICC(void* user_data, const uint8_t* icc_data, + size_t icc_size, JxlColorEncoding* c, + JXL_BOOL* cmyk) { + if (c == nullptr) return JXL_FALSE; + if (cmyk == nullptr) return JXL_FALSE; + + *cmyk = JXL_FALSE; + + // In case parsing fails, mark the ColorEncoding as invalid. + c->color_space = JXL_COLOR_SPACE_UNKNOWN; + c->transfer_function = JXL_TRANSFER_FUNCTION_UNKNOWN; + + if (icc_size == 0) return JXL_FAILURE("Empty ICC profile"); + + ColorEncoding c_enc; + +#if JPEGXL_ENABLE_SKCMS + if (icc_size < 128) { + return JXL_FAILURE("ICC file too small"); + } + + skcms_ICCProfile profile; + JXL_RETURN_IF_ERROR(skcms_Parse(icc_data, icc_size, &profile)); + + // skcms does not return the rendering intent, so get it from the file. It + // is encoded as big-endian 32-bit integer in bytes 60..63. + uint32_t rendering_intent32 = icc_data[67]; + if (rendering_intent32 > 3 || icc_data[64] != 0 || icc_data[65] != 0 || + icc_data[66] != 0) { + return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32); + } + // ICC and RenderingIntent have the same values (0..3). + c_enc.rendering_intent = static_cast(rendering_intent32); + + if (profile.has_CICP && + ApplyCICP(profile.CICP.color_primaries, + profile.CICP.transfer_characteristics, + profile.CICP.matrix_coefficients, + profile.CICP.video_full_range_flag, &c_enc)) { + ConvertInternalToExternalColorEncoding(c_enc, c); + return true; + } + + c_enc.SetColorSpace(ColorSpaceFromProfile(profile)); + *cmyk = (profile.data_color_space == skcms_Signature_CMYK); + + CIExy wp_unadapted; + JXL_RETURN_IF_ERROR(UnadaptedWhitePoint(profile, &wp_unadapted)); + JXL_RETURN_IF_ERROR(c_enc.SetWhitePoint(wp_unadapted)); + + // Relies on color_space. + JXL_RETURN_IF_ERROR(IdentifyPrimaries(profile, wp_unadapted, &c_enc)); + + // Relies on color_space/white point/primaries being set already. + DetectTransferFunction(profile, &c_enc); +#else // JPEGXL_ENABLE_SKCMS + + const cmsContext context = GetContext(); + + Profile profile; + JXL_RETURN_IF_ERROR(DecodeProfile( + context, Span(icc_data, icc_size), &profile)); + + const cmsUInt32Number rendering_intent32 = + cmsGetHeaderRenderingIntent(profile.get()); + if (rendering_intent32 > 3) { + return JXL_FAILURE("Invalid rendering intent %u\n", rendering_intent32); + } + // ICC and RenderingIntent have the same values (0..3). + c_enc.rendering_intent = static_cast(rendering_intent32); + + static constexpr size_t kCICPSize = 12; + static constexpr auto kCICPSignature = + static_cast(0x63696370); + uint8_t cicp_buffer[kCICPSize]; + if (cmsReadRawTag(profile.get(), kCICPSignature, cicp_buffer, kCICPSize) == + kCICPSize && + ApplyCICP(cicp_buffer[8], cicp_buffer[9], cicp_buffer[10], + cicp_buffer[11], &c_enc)) { + ConvertInternalToExternalColorEncoding(c_enc, c); + return true; + } + + c_enc.SetColorSpace(ColorSpaceFromProfile(profile)); + if (cmsGetColorSpace(profile.get()) == cmsSigCmykData) { + *cmyk = JXL_TRUE; + ConvertInternalToExternalColorEncoding(c_enc, c); + return true; + } + + const cmsCIEXYZ wp_unadapted = UnadaptedWhitePoint(context, profile, c_enc); + JXL_RETURN_IF_ERROR(c_enc.SetWhitePoint(CIExyFromXYZ(wp_unadapted))); + + // Relies on color_space. + JXL_RETURN_IF_ERROR( + IdentifyPrimaries(context, profile, wp_unadapted, &c_enc)); + + // Relies on color_space/white point/primaries being set already. + DetectTransferFunction(context, profile, &c_enc); + +#endif // JPEGXL_ENABLE_SKCMS + + ConvertInternalToExternalColorEncoding(c_enc, c); + return true; +} + +} // namespace + +namespace { + +void JxlCmsDestroy(void* cms_data) { + if (cms_data == nullptr) return; + JxlCms* t = reinterpret_cast(cms_data); +#if !JPEGXL_ENABLE_SKCMS + TransformDeleter()(t->lcms_transform); +#endif + delete t; +} + +void* JxlCmsInit(void* init_data, size_t num_threads, size_t xsize, + const JxlColorProfile* input, const JxlColorProfile* output, + float intensity_target) { + auto cms = static_cast(init_data); + auto t = jxl::make_unique(); + PaddedBytes icc_src, icc_dst; + icc_src.assign(input->icc.data, input->icc.data + input->icc.size); + ColorEncoding c_src; + if (!c_src.SetICC(std::move(icc_src), cms)) { + JXL_NOTIFY_ERROR("JxlCmsInit: failed to parse input ICC"); + return nullptr; + } + icc_dst.assign(output->icc.data, output->icc.data + output->icc.size); + ColorEncoding c_dst; + if (!c_dst.SetICC(std::move(icc_dst), cms)) { + JXL_NOTIFY_ERROR("JxlCmsInit: failed to parse output ICC"); + return nullptr; + } +#if JXL_CMS_VERBOSE + printf("%s -> %s\n", Description(c_src).c_str(), Description(c_dst).c_str()); +#endif + +#if JPEGXL_ENABLE_SKCMS + if (!DecodeProfile(input->icc.data, input->icc.size, &t->profile_src)) { + JXL_NOTIFY_ERROR("JxlCmsInit: skcms failed to parse input ICC"); + return nullptr; + } + if (!DecodeProfile(output->icc.data, output->icc.size, &t->profile_dst)) { + JXL_NOTIFY_ERROR("JxlCmsInit: skcms failed to parse output ICC"); + return nullptr; + } +#else // JPEGXL_ENABLE_SKCMS + const cmsContext context = GetContext(); + Profile profile_src, profile_dst; + if (!DecodeProfile(context, Span(c_src.ICC()), &profile_src)) { + JXL_NOTIFY_ERROR("JxlCmsInit: lcms failed to parse input ICC"); + return nullptr; + } + if (!DecodeProfile(context, Span(c_dst.ICC()), &profile_dst)) { + JXL_NOTIFY_ERROR("JxlCmsInit: lcms failed to parse output ICC"); + return nullptr; + } +#endif // JPEGXL_ENABLE_SKCMS + + t->skip_lcms = false; + if (c_src.SameColorEncoding(c_dst)) { + t->skip_lcms = true; +#if JXL_CMS_VERBOSE + printf("Skip CMS\n"); +#endif + } + + t->apply_hlg_ootf = c_src.tf.IsHLG() != c_dst.tf.IsHLG(); + if (t->apply_hlg_ootf) { + const ColorEncoding* c_hlg = c_src.tf.IsHLG() ? &c_src : &c_dst; + t->hlg_ootf_num_channels = c_hlg->Channels(); + if (t->hlg_ootf_num_channels == 3 && + !GetPrimariesLuminances(*c_hlg, t->hlg_ootf_luminances.data())) { + JXL_NOTIFY_ERROR( + "JxlCmsInit: failed to compute the luminances of primaries"); + return nullptr; + } + } + + // Special-case SRGB <=> linear if the primaries / white point are the same, + // or any conversion where PQ or HLG is involved: + bool src_linear = c_src.tf.IsLinear(); + const bool dst_linear = c_dst.tf.IsLinear(); + + if (c_src.tf.IsPQ() || c_src.tf.IsHLG() || + (c_src.tf.IsSRGB() && dst_linear && c_src.SameColorSpace(c_dst))) { + // Construct new profile as if the data were already/still linear. + ColorEncoding c_linear_src = c_src; + c_linear_src.tf.SetTransferFunction(TransferFunction::kLinear); +#if JPEGXL_ENABLE_SKCMS + skcms_ICCProfile new_src; +#else // JPEGXL_ENABLE_SKCMS + Profile new_src; +#endif // JPEGXL_ENABLE_SKCMS + // Only enable ExtraTF if profile creation succeeded. + if (MaybeCreateProfile(c_linear_src, &icc_src) && +#if JPEGXL_ENABLE_SKCMS + DecodeProfile(icc_src.data(), icc_src.size(), &new_src)) { +#else // JPEGXL_ENABLE_SKCMS + DecodeProfile(context, Span(icc_src), &new_src)) { +#endif // JPEGXL_ENABLE_SKCMS +#if JXL_CMS_VERBOSE + printf("Special HLG/PQ/sRGB -> linear\n"); +#endif +#if JPEGXL_ENABLE_SKCMS + t->icc_src = std::move(icc_src); + t->profile_src = new_src; +#else // JPEGXL_ENABLE_SKCMS + profile_src.swap(new_src); +#endif // JPEGXL_ENABLE_SKCMS + t->preprocess = c_src.tf.IsSRGB() + ? ExtraTF::kSRGB + : (c_src.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG); + c_src = c_linear_src; + src_linear = true; + } else { + if (t->apply_hlg_ootf) { + JXL_NOTIFY_ERROR( + "Failed to create extra linear source profile, and HLG OOTF " + "required"); + return nullptr; + } + JXL_WARNING("Failed to create extra linear destination profile"); + } + } + + if (c_dst.tf.IsPQ() || c_dst.tf.IsHLG() || + (c_dst.tf.IsSRGB() && src_linear && c_src.SameColorSpace(c_dst))) { + ColorEncoding c_linear_dst = c_dst; + c_linear_dst.tf.SetTransferFunction(TransferFunction::kLinear); +#if JPEGXL_ENABLE_SKCMS + skcms_ICCProfile new_dst; +#else // JPEGXL_ENABLE_SKCMS + Profile new_dst; +#endif // JPEGXL_ENABLE_SKCMS + // Only enable ExtraTF if profile creation succeeded. + if (MaybeCreateProfile(c_linear_dst, &icc_dst) && +#if JPEGXL_ENABLE_SKCMS + DecodeProfile(icc_dst.data(), icc_dst.size(), &new_dst)) { +#else // JPEGXL_ENABLE_SKCMS + DecodeProfile(context, Span(icc_dst), &new_dst)) { +#endif // JPEGXL_ENABLE_SKCMS +#if JXL_CMS_VERBOSE + printf("Special linear -> HLG/PQ/sRGB\n"); +#endif +#if JPEGXL_ENABLE_SKCMS + t->icc_dst = std::move(icc_dst); + t->profile_dst = new_dst; +#else // JPEGXL_ENABLE_SKCMS + profile_dst.swap(new_dst); +#endif // JPEGXL_ENABLE_SKCMS + t->postprocess = c_dst.tf.IsSRGB() + ? ExtraTF::kSRGB + : (c_dst.tf.IsPQ() ? ExtraTF::kPQ : ExtraTF::kHLG); + c_dst = c_linear_dst; + } else { + if (t->apply_hlg_ootf) { + JXL_NOTIFY_ERROR( + "Failed to create extra linear destination profile, and inverse " + "HLG OOTF required"); + return nullptr; + } + JXL_WARNING("Failed to create extra linear destination profile"); + } + } + + if (c_src.SameColorEncoding(c_dst)) { +#if JXL_CMS_VERBOSE + printf("Same intermediary linear profiles, skipping CMS\n"); +#endif + t->skip_lcms = true; + } + +#if JPEGXL_ENABLE_SKCMS + if (!skcms_MakeUsableAsDestination(&t->profile_dst)) { + JXL_NOTIFY_ERROR( + "Failed to make %s usable as a color transform destination", + Description(c_dst).c_str()); + return nullptr; + } +#endif // JPEGXL_ENABLE_SKCMS + + // Not including alpha channel (copied separately). + const size_t channels_src = (c_src.IsCMYK() ? 4 : c_src.Channels()); + const size_t channels_dst = c_dst.Channels(); + JXL_CHECK(channels_src == channels_dst || + (channels_src == 4 && channels_dst == 3)); +#if JXL_CMS_VERBOSE + printf("Channels: %" PRIuS "; Threads: %" PRIuS "\n", channels_src, + num_threads); +#endif + +#if !JPEGXL_ENABLE_SKCMS + // Type includes color space (XYZ vs RGB), so can be different. + const uint32_t type_src = Type32(c_src, channels_src == 4); + const uint32_t type_dst = Type32(c_dst, false); + const uint32_t intent = static_cast(c_dst.rendering_intent); + // Use cmsFLAGS_NOCACHE to disable the 1-pixel cache and make calling + // cmsDoTransform() thread-safe. + const uint32_t flags = cmsFLAGS_NOCACHE | cmsFLAGS_BLACKPOINTCOMPENSATION | + cmsFLAGS_HIGHRESPRECALC; + t->lcms_transform = + cmsCreateTransformTHR(context, profile_src.get(), type_src, + profile_dst.get(), type_dst, intent, flags); + if (t->lcms_transform == nullptr) { + JXL_NOTIFY_ERROR("Failed to create transform"); + return nullptr; + } +#endif // !JPEGXL_ENABLE_SKCMS + + // Ideally LCMS would convert directly from External to Image3. However, + // cmsDoTransformLineStride only accepts 32-bit BytesPerPlaneIn, whereas our + // planes can be more than 4 GiB apart. Hence, transform inputs/outputs must + // be interleaved. Calling cmsDoTransform for each pixel is expensive + // (indirect call). We therefore transform rows, which requires per-thread + // buffers. To avoid separate allocations, we use the rows of an image. + // Because LCMS apparently also cannot handle <= 16 bit inputs and 32-bit + // outputs (or vice versa), we use floating point input/output. + t->channels_src = channels_src; + t->channels_dst = channels_dst; +#if JPEGXL_ENABLE_SKCMS + // SkiaCMS doesn't support grayscale float buffers, so we create space for RGB + // float buffers anyway. + t->buf_src = ImageF(xsize * (channels_src == 4 ? 4 : 3), num_threads); + t->buf_dst = ImageF(xsize * 3, num_threads); +#else + t->buf_src = ImageF(xsize * channels_src, num_threads); + t->buf_dst = ImageF(xsize * channels_dst, num_threads); +#endif + t->intensity_target = intensity_target; + return t.release(); +} + +float* JxlCmsGetSrcBuf(void* cms_data, size_t thread) { + JxlCms* t = reinterpret_cast(cms_data); + return t->buf_src.Row(thread); +} + +float* JxlCmsGetDstBuf(void* cms_data, size_t thread) { + JxlCms* t = reinterpret_cast(cms_data); + return t->buf_dst.Row(thread); +} + +} // namespace + +const JxlCmsInterface& GetJxlCms() { + static constexpr JxlCmsInterface kInterface = { + /*set_fields_data=*/nullptr, + /*set_fields_from_icc=*/&JxlCmsSetFieldsFromICC, + /*init_data=*/const_cast(static_cast(&kInterface)), + /*init=*/&JxlCmsInit, + /*get_src_buf=*/&JxlCmsGetSrcBuf, + /*get_dst_buf=*/&JxlCmsGetDstBuf, + /*run=*/&DoColorSpaceTransform, + /*destroy=*/&JxlCmsDestroy}; + return kInterface; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_color_management.h b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.h new file mode 100644 index 0000000000..6f6e9023a6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_color_management.h @@ -0,0 +1,90 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_COLOR_MANAGEMENT_H_ +#define LIB_JXL_ENC_COLOR_MANAGEMENT_H_ + +// ICC profiles and color space conversions. + +#include +#include +#include + +#include + +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" + +namespace jxl { + +// Internal C++ wrapper for a JxlCmsInterface. +class ColorSpaceTransform { + public: + explicit ColorSpaceTransform(const JxlCmsInterface& cms) : cms_(cms) {} + ~ColorSpaceTransform() { + if (cms_data_ != nullptr) { + cms_.destroy(cms_data_); + } + } + + // Cannot copy. + ColorSpaceTransform(const ColorSpaceTransform&) = delete; + ColorSpaceTransform& operator=(const ColorSpaceTransform&) = delete; + + Status Init(const ColorEncoding& c_src, const ColorEncoding& c_dst, + float intensity_target, size_t xsize, size_t num_threads) { + xsize_ = xsize; + JxlColorProfile input_profile; + icc_src_ = c_src.ICC(); + input_profile.icc.data = icc_src_.data(); + input_profile.icc.size = icc_src_.size(); + ConvertInternalToExternalColorEncoding(c_src, + &input_profile.color_encoding); + input_profile.num_channels = c_src.IsCMYK() ? 4 : c_src.Channels(); + JxlColorProfile output_profile; + icc_dst_ = c_dst.ICC(); + output_profile.icc.data = icc_dst_.data(); + output_profile.icc.size = icc_dst_.size(); + ConvertInternalToExternalColorEncoding(c_dst, + &output_profile.color_encoding); + if (c_dst.IsCMYK()) + return JXL_FAILURE("Conversion to CMYK is not supported"); + output_profile.num_channels = c_dst.Channels(); + cms_data_ = cms_.init(cms_.init_data, num_threads, xsize, &input_profile, + &output_profile, intensity_target); + JXL_RETURN_IF_ERROR(cms_data_ != nullptr); + return true; + } + + float* BufSrc(const size_t thread) const { + return cms_.get_src_buf(cms_data_, thread); + } + + float* BufDst(const size_t thread) const { + return cms_.get_dst_buf(cms_data_, thread); + } + + Status Run(const size_t thread, const float* buf_src, float* buf_dst) { + return cms_.run(cms_data_, thread, buf_src, buf_dst, xsize_); + } + + private: + JxlCmsInterface cms_; + void* cms_data_ = nullptr; + // The interface may retain pointers into these. + PaddedBytes icc_src_; + PaddedBytes icc_dst_; + size_t xsize_; +}; + +const JxlCmsInterface& GetJxlCms(); + +} // namespace jxl + +#endif // LIB_JXL_ENC_COLOR_MANAGEMENT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_comparator.cc b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.cc new file mode 100644 index 0000000000..79989e2db4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.cc @@ -0,0 +1,128 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_comparator.h" + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/enc_gamma_correct.h" +#include "lib/jxl/enc_image_bundle.h" + +namespace jxl { +namespace { + +// color is linear, but blending happens in gamma-compressed space using +// (gamma-compressed) grayscale background color, alpha image represents +// weights of the sRGB colors in the [0 .. (1 << bit_depth) - 1] interval, +// output image is in linear space. +void AlphaBlend(const Image3F& in, const size_t c, float background_linear, + const ImageF& alpha, Image3F* out) { + const float background = LinearToSrgb8Direct(background_linear); + + for (size_t y = 0; y < out->ysize(); ++y) { + const float* JXL_RESTRICT row_a = alpha.ConstRow(y); + const float* JXL_RESTRICT row_i = in.ConstPlaneRow(c, y); + float* JXL_RESTRICT row_o = out->PlaneRow(c, y); + for (size_t x = 0; x < out->xsize(); ++x) { + const float a = row_a[x]; + if (a <= 0.f) { + row_o[x] = background_linear; + } else if (a >= 1.f) { + row_o[x] = row_i[x]; + } else { + const float w_fg = a; + const float w_bg = 1.0f - w_fg; + const float fg = w_fg * LinearToSrgb8Direct(row_i[x]); + const float bg = w_bg * background; + row_o[x] = Srgb8ToLinearDirect(fg + bg); + } + } + } +} + +void AlphaBlend(float background_linear, ImageBundle* io_linear_srgb) { + // No alpha => all opaque. + if (!io_linear_srgb->HasAlpha()) return; + + for (size_t c = 0; c < 3; ++c) { + AlphaBlend(*io_linear_srgb->color(), c, background_linear, + *io_linear_srgb->alpha(), io_linear_srgb->color()); + } +} + +float ComputeScoreImpl(const ImageBundle& rgb0, const ImageBundle& rgb1, + Comparator* comparator, ImageF* distmap) { + JXL_CHECK(comparator->SetReferenceImage(rgb0)); + float score; + JXL_CHECK(comparator->CompareWith(rgb1, distmap, &score)); + return score; +} + +} // namespace + +float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1, + Comparator* comparator, const JxlCmsInterface& cms, + ImageF* diffmap, ThreadPool* pool, bool ignore_alpha) { + // Convert to linear sRGB (unless already in that space) + ImageMetadata metadata0 = *rgb0.metadata(); + ImageBundle store0(&metadata0); + const ImageBundle* linear_srgb0; + JXL_CHECK(TransformIfNeeded(rgb0, ColorEncoding::LinearSRGB(rgb0.IsGray()), + cms, pool, &store0, &linear_srgb0)); + ImageMetadata metadata1 = *rgb1.metadata(); + ImageBundle store1(&metadata1); + const ImageBundle* linear_srgb1; + JXL_CHECK(TransformIfNeeded(rgb1, ColorEncoding::LinearSRGB(rgb1.IsGray()), + cms, pool, &store1, &linear_srgb1)); + + // No alpha: skip blending, only need a single call to Butteraugli. + if (ignore_alpha || (!rgb0.HasAlpha() && !rgb1.HasAlpha())) { + return ComputeScoreImpl(*linear_srgb0, *linear_srgb1, comparator, diffmap); + } + + // Blend on black and white backgrounds + + const float black = 0.0f; + ImageBundle blended_black0 = linear_srgb0->Copy(); + ImageBundle blended_black1 = linear_srgb1->Copy(); + AlphaBlend(black, &blended_black0); + AlphaBlend(black, &blended_black1); + + const float white = 1.0f; + ImageBundle blended_white0 = linear_srgb0->Copy(); + ImageBundle blended_white1 = linear_srgb1->Copy(); + + AlphaBlend(white, &blended_white0); + AlphaBlend(white, &blended_white1); + + ImageF diffmap_black, diffmap_white; + const float dist_black = ComputeScoreImpl(blended_black0, blended_black1, + comparator, &diffmap_black); + const float dist_white = ComputeScoreImpl(blended_white0, blended_white1, + comparator, &diffmap_white); + + // diffmap and return values are the max of diffmap_black/white. + if (diffmap != nullptr) { + const size_t xsize = rgb0.xsize(); + const size_t ysize = rgb0.ysize(); + *diffmap = ImageF(xsize, ysize); + for (size_t y = 0; y < ysize; ++y) { + const float* JXL_RESTRICT row_black = diffmap_black.ConstRow(y); + const float* JXL_RESTRICT row_white = diffmap_white.ConstRow(y); + float* JXL_RESTRICT row_out = diffmap->Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = std::max(row_black[x], row_white[x]); + } + } + } + return std::max(dist_black, dist_white); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_comparator.h b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.h new file mode 100644 index 0000000000..c545ea6111 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_comparator.h @@ -0,0 +1,53 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_COMPARATOR_H_ +#define LIB_JXL_ENC_COMPARATOR_H_ + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +class Comparator { + public: + virtual ~Comparator() = default; + + // Sets the reference image, the first to compare + // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as + // the range from standard black point to standard white point, but values + // outside permitted. + virtual Status SetReferenceImage(const ImageBundle& ref) = 0; + + // Sets the actual image (with loss), the second to compare + // Image must be in linear sRGB (gamma expanded) in range 0.0f-1.0f as + // the range from standard black point to standard white point, but values + // outside permitted. + // In diffmap it outputs the local score per pixel, while in score it outputs + // a single score. Any one may be set to nullptr to not compute it. + virtual Status CompareWith(const ImageBundle& actual, ImageF* diffmap, + float* score) = 0; + + // Quality thresholds for diffmap and score values. + // The good score must represent a value where the images are considered to + // be perceptually indistinguishable (but not identical) + // The bad value must be larger than good to indicate "lower means better" + // and smaller than good to indicate "higher means better" + virtual float GoodQualityScore() const = 0; + virtual float BadQualityScore() const = 0; +}; + +// Computes the score given images in any RGB color model, optionally with +// alpha channel. +float ComputeScore(const ImageBundle& rgb0, const ImageBundle& rgb1, + Comparator* comparator, const JxlCmsInterface& cms, + ImageF* diffmap = nullptr, ThreadPool* pool = nullptr, + bool ignore_alpha = false); + +} // namespace jxl + +#endif // LIB_JXL_ENC_COMPARATOR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_context_map.cc b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.cc new file mode 100644 index 0000000000..842dd12423 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.cc @@ -0,0 +1,141 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Library to encode the context map. + +#include "lib/jxl/enc_context_map.h" + +#include + +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/entropy_coder.h" + +namespace jxl { + +namespace { + +size_t IndexOf(const std::vector& v, uint8_t value) { + size_t i = 0; + for (; i < v.size(); ++i) { + if (v[i] == value) return i; + } + return i; +} + +void MoveToFront(std::vector* v, size_t index) { + uint8_t value = (*v)[index]; + for (size_t i = index; i != 0; --i) { + (*v)[i] = (*v)[i - 1]; + } + (*v)[0] = value; +} + +std::vector MoveToFrontTransform(const std::vector& v) { + if (v.empty()) return v; + uint8_t max_value = *std::max_element(v.begin(), v.end()); + std::vector mtf(max_value + 1); + for (size_t i = 0; i <= max_value; ++i) mtf[i] = i; + std::vector result(v.size()); + for (size_t i = 0; i < v.size(); ++i) { + size_t index = IndexOf(mtf, v[i]); + JXL_ASSERT(index < mtf.size()); + result[i] = static_cast(index); + MoveToFront(&mtf, index); + } + return result; +} + +} // namespace + +void EncodeContextMap(const std::vector& context_map, + size_t num_histograms, BitWriter* writer, size_t layer, + AuxOut* aux_out) { + if (num_histograms == 1) { + // Simple code + writer->Write(1, 1); + // 0 bits per entry. + writer->Write(2, 0); + return; + } + + std::vector transformed_symbols = MoveToFrontTransform(context_map); + std::vector> tokens(1), mtf_tokens(1); + EntropyEncodingData codes; + std::vector dummy_context_map; + for (size_t i = 0; i < context_map.size(); i++) { + tokens[0].emplace_back(0, context_map[i]); + } + for (size_t i = 0; i < transformed_symbols.size(); i++) { + mtf_tokens[0].emplace_back(0, transformed_symbols[i]); + } + HistogramParams params; + params.uint_method = HistogramParams::HybridUintMethod::kContextMap; + size_t ans_cost = BuildAndEncodeHistograms( + params, 1, tokens, &codes, &dummy_context_map, nullptr, 0, nullptr); + size_t mtf_cost = BuildAndEncodeHistograms( + params, 1, mtf_tokens, &codes, &dummy_context_map, nullptr, 0, nullptr); + bool use_mtf = mtf_cost < ans_cost; + // Rebuild token list. + tokens[0].clear(); + for (size_t i = 0; i < transformed_symbols.size(); i++) { + tokens[0].emplace_back(0, + use_mtf ? transformed_symbols[i] : context_map[i]); + } + size_t entry_bits = CeilLog2Nonzero(num_histograms); + size_t simple_cost = entry_bits * context_map.size(); + if (entry_bits < 4 && simple_cost < ans_cost && simple_cost < mtf_cost) { + writer->Write(1, 1); + writer->Write(2, entry_bits); + for (size_t i = 0; i < context_map.size(); i++) { + writer->Write(entry_bits, context_map[i]); + } + } else { + writer->Write(1, 0); + writer->Write(1, use_mtf); // Use/don't use MTF. + BuildAndEncodeHistograms(params, 1, tokens, &codes, &dummy_context_map, + writer, layer, aux_out); + WriteTokens(tokens[0], codes, dummy_context_map, writer); + } +} + +void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer, + AuxOut* aux_out) { + auto& dct = block_ctx_map.dc_thresholds; + auto& qft = block_ctx_map.qf_thresholds; + auto& ctx_map = block_ctx_map.ctx_map; + BitWriter::Allotment allotment( + writer, + (dct[0].size() + dct[1].size() + dct[2].size() + qft.size()) * 34 + 1 + + 4 + 4 + ctx_map.size() * 10 + 1024); + if (dct[0].empty() && dct[1].empty() && dct[2].empty() && qft.empty() && + ctx_map.size() == 21 && + std::equal(ctx_map.begin(), ctx_map.end(), BlockCtxMap::kDefaultCtxMap)) { + writer->Write(1, 1); // default + allotment.ReclaimAndCharge(writer, kLayerAC, aux_out); + return; + } + writer->Write(1, 0); + for (int j : {0, 1, 2}) { + writer->Write(4, dct[j].size()); + for (int i : dct[j]) { + JXL_CHECK(U32Coder::Write(kDCThresholdDist, PackSigned(i), writer)); + } + } + writer->Write(4, qft.size()); + for (uint32_t i : qft) { + JXL_CHECK(U32Coder::Write(kQFThresholdDist, i - 1, writer)); + } + EncodeContextMap(ctx_map, block_ctx_map.num_ctxs, writer, kLayerAC, aux_out); + allotment.ReclaimAndCharge(writer, kLayerAC, aux_out); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_context_map.h b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.h new file mode 100644 index 0000000000..041e71de7a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_context_map.h @@ -0,0 +1,35 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_CONTEXT_MAP_H_ +#define LIB_JXL_ENC_CONTEXT_MAP_H_ + +#include +#include + +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/enc_bit_writer.h" + +namespace jxl { + +struct AuxOut; + +// Max limit is 255 because encoding assumes numbers < 255 +// More clusters can help compression, but makes encode/decode somewhat slower +static const size_t kClustersLimit = 128; + +// Encodes the given context map to the bit stream. The number of different +// histogram ids is given by num_histograms. +void EncodeContextMap(const std::vector& context_map, + size_t num_histograms, BitWriter* writer, size_t layer, + AuxOut* aux_out); + +void EncodeBlockCtxMap(const BlockCtxMap& block_ctx_map, BitWriter* writer, + AuxOut* aux_out); +} // namespace jxl + +#endif // LIB_JXL_ENC_CONTEXT_MAP_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.cc b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.cc new file mode 100644 index 0000000000..706bcd69ba --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.cc @@ -0,0 +1,95 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_debug_image.h" + +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +namespace { +template +void DumpImageT(const CompressParams& cparams, const char* label, + const ColorEncoding& color_encoding, const Image3& image) { + if (!cparams.debug_image) return; + Image3F float_image = ConvertToFloat(image); + JxlColorEncoding color; + ConvertInternalToExternalColorEncoding(color_encoding, &color); + size_t num_pixels = 3 * image.xsize() * image.ysize(); + std::vector pixels(num_pixels); + const ImageF* channels[3]; + for (int c = 0; c < 3; ++c) { + channels[c] = &float_image.Plane(c); + } + JXL_CHECK(ConvertChannelsToExternal( + channels, 3, 16, false, JXL_BIG_ENDIAN, 6 * image.xsize(), nullptr, + &pixels[0], 2 * num_pixels, PixelCallback(), Orientation::kIdentity)); + (*cparams.debug_image)(cparams.debug_image_opaque, label, image.xsize(), + image.ysize(), &color, &pixels[0]); +} + +template +void DumpPlaneNormalizedT(const CompressParams& cparams, const char* label, + const Plane& image) { + T min; + T max; + ImageMinMax(image, &min, &max); + Image3B normalized(image.xsize(), image.ysize()); + for (size_t c = 0; c < 3; ++c) { + float mul = min == max ? 0 : (255.0f / (max - min)); + for (size_t y = 0; y < image.ysize(); ++y) { + const T* JXL_RESTRICT row_in = image.ConstRow(y); + uint8_t* JXL_RESTRICT row_out = normalized.PlaneRow(c, y); + for (size_t x = 0; x < image.xsize(); ++x) { + row_out[x] = static_cast((row_in[x] - min) * mul); + } + } + } + DumpImageT(cparams, label, ColorEncoding::SRGB(), normalized); +} + +} // namespace + +void DumpImage(const CompressParams& cparams, const char* label, + const Image3& image) { + DumpImageT(cparams, label, ColorEncoding::SRGB(), image); +} + +void DumpImage(const CompressParams& cparams, const char* label, + const Image3& image) { + DumpImageT(cparams, label, ColorEncoding::SRGB(), image); +} + +void DumpXybImage(const CompressParams& cparams, const char* label, + const Image3F& image) { + if (!cparams.debug_image) return; + + Image3F linear(image.xsize(), image.ysize()); + OpsinParams opsin_params; + opsin_params.Init(kDefaultIntensityTarget); + OpsinToLinear(image, Rect(linear), nullptr, &linear, opsin_params); + + DumpImageT(cparams, label, ColorEncoding::LinearSRGB(), linear); +} + +void DumpPlaneNormalized(const CompressParams& cparams, const char* label, + const Plane& image) { + DumpPlaneNormalizedT(cparams, label, image); +} + +void DumpPlaneNormalized(const CompressParams& cparams, const char* label, + const Plane& image) { + DumpPlaneNormalizedT(cparams, label, image); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.h b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.h new file mode 100644 index 0000000000..33799a5f7f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_debug_image.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_DEBUG_IMAGE_H_ +#define LIB_JXL_ENC_DEBUG_IMAGE_H_ + +// Optional output images for debugging. + +#include +#include + +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" + +namespace jxl { + +void DumpImage(const CompressParams& cparams, const char* label, + const Image3& image); +void DumpImage(const CompressParams& cparams, const char* label, + const Image3& image); +void DumpXybImage(const CompressParams& cparams, const char* label, + const Image3& image); +void DumpPlaneNormalized(const CompressParams& cparams, const char* label, + const Plane& image); +void DumpPlaneNormalized(const CompressParams& cparams, const char* label, + const Plane& image); + +// Used to skip image creation if they won't be written to debug directory. +static inline bool WantDebugOutput(const CompressParams& cparams) { + return cparams.debug_image != nullptr; +} + +} // namespace jxl + +#endif // LIB_JXL_ENC_DEBUG_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.cc b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.cc new file mode 100644 index 0000000000..eaea1db7b8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.cc @@ -0,0 +1,587 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_detect_dots.h" + +#include + +#include +#include +#include +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_detect_dots.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/enc_linalg.h" +#include "lib/jxl/enc_optimize.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" + +// Set JXL_DEBUG_DOT_DETECT to 1 to enable debugging. +#ifndef JXL_DEBUG_DOT_DETECT +#define JXL_DEBUG_DOT_DETECT 0 +#endif + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::Sub; + +ImageF SumOfSquareDifferences(const Image3F& forig, const Image3F& smooth, + ThreadPool* pool) { + const HWY_FULL(float) d; + const auto color_coef0 = Set(d, 0.0f); + const auto color_coef1 = Set(d, 10.0f); + const auto color_coef2 = Set(d, 0.0f); + + ImageF sum_of_squares(forig.xsize(), forig.ysize()); + JXL_CHECK(RunOnPool( + pool, 0, forig.ysize(), ThreadPool::NoInit, + [&](const uint32_t task, size_t thread) { + const size_t y = static_cast(task); + const float* JXL_RESTRICT orig_row0 = forig.Plane(0).ConstRow(y); + const float* JXL_RESTRICT orig_row1 = forig.Plane(1).ConstRow(y); + const float* JXL_RESTRICT orig_row2 = forig.Plane(2).ConstRow(y); + const float* JXL_RESTRICT smooth_row0 = smooth.Plane(0).ConstRow(y); + const float* JXL_RESTRICT smooth_row1 = smooth.Plane(1).ConstRow(y); + const float* JXL_RESTRICT smooth_row2 = smooth.Plane(2).ConstRow(y); + float* JXL_RESTRICT sos_row = sum_of_squares.Row(y); + + for (size_t x = 0; x < forig.xsize(); x += Lanes(d)) { + auto v0 = Sub(Load(d, orig_row0 + x), Load(d, smooth_row0 + x)); + auto v1 = Sub(Load(d, orig_row1 + x), Load(d, smooth_row1 + x)); + auto v2 = Sub(Load(d, orig_row2 + x), Load(d, smooth_row2 + x)); + v0 = Mul(Mul(v0, v0), color_coef0); + v1 = Mul(Mul(v1, v1), color_coef1); + v2 = Mul(Mul(v2, v2), color_coef2); + const auto sos = + Add(v0, Add(v1, v2)); // weighted sum of square diffs + Store(sos, d, sos_row + x); + } + }, + "ComputeEnergyImage")); + return sum_of_squares; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(SumOfSquareDifferences); // Local function + +const int kEllipseWindowSize = 5; + +namespace { +struct GaussianEllipse { + double x; // position in x + double y; // position in y + double sigma_x; // scale in x + double sigma_y; // scale in y + double angle; // ellipse rotation in radians + std::array intensity; // intensity in each channel + + // The following variables do not need to be encoded + double l2_loss; // error after the Gaussian was fit + double l1_loss; + double ridge_loss; // the l2_loss plus regularization term + double custom_loss; // experimental custom loss + std::array bgColor; // best background color + size_t neg_pixels; // number of negative pixels when subtracting dot + std::array neg_value; // debt due to channel truncation +}; +double DotGaussianModel(double dx, double dy, double ct, double st, + double sigma_x, double sigma_y, double intensity) { + double rx = ct * dx + st * dy; + double ry = -st * dx + ct * dy; + double md = (rx * rx / sigma_x) + (ry * ry / sigma_y); + double value = intensity * exp(-0.5 * md); + return value; +} + +constexpr bool kOptimizeBackground = true; + +// Gaussian that smooths noise but preserves dots +const WeightsSeparable5& WeightsSeparable5Gaussian0_65() { + constexpr float w0 = 0.558311f; + constexpr float w1 = 0.210395f; + constexpr float w2 = 0.010449f; + static constexpr WeightsSeparable5 weights = { + {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, + {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}}; + return weights; +} + +// (Iterated) Gaussian that removes dots. +const WeightsSeparable5& WeightsSeparable5Gaussian3() { + constexpr float w0 = 0.222338f; + constexpr float w1 = 0.210431f; + constexpr float w2 = 0.1784f; + static constexpr WeightsSeparable5 weights = { + {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}, + {HWY_REP4(w0), HWY_REP4(w1), HWY_REP4(w2)}}; + return weights; +} + +ImageF ComputeEnergyImage(const Image3F& orig, Image3F* smooth, + ThreadPool* pool) { + // Prepare guidance images for dot selection. + Image3F forig(orig.xsize(), orig.ysize()); + *smooth = Image3F(orig.xsize(), orig.ysize()); + Rect rect(orig); + + const auto& weights1 = WeightsSeparable5Gaussian0_65(); + const auto& weights3 = WeightsSeparable5Gaussian3(); + + for (size_t c = 0; c < 3; ++c) { + // Use forig as temporary storage to reduce memory and keep it warmer. + Separable5(orig.Plane(c), rect, weights3, pool, &forig.Plane(c)); + Separable5(forig.Plane(c), rect, weights3, pool, &smooth->Plane(c)); + Separable5(orig.Plane(c), rect, weights1, pool, &forig.Plane(c)); + } + + return HWY_DYNAMIC_DISPATCH(SumOfSquareDifferences)(forig, *smooth, pool); +} + +struct Pixel { + int x; + int y; +}; + +Pixel operator+(const Pixel& a, const Pixel& b) { + return Pixel{a.x + b.x, a.y + b.y}; +} + +// Maximum area in pixels of a ellipse +const size_t kMaxCCSize = 1000; + +// Extracts a connected component from a Binary image where seed is part +// of the component +bool ExtractComponent(ImageF* img, std::vector* pixels, + const Pixel& seed, double threshold) { + static const std::vector neighbors{{1, -1}, {1, 0}, {1, 1}, {0, -1}, + {0, 1}, {-1, -1}, {-1, 1}, {1, 0}}; + std::vector q{seed}; + while (!q.empty()) { + Pixel current = q.back(); + q.pop_back(); + pixels->push_back(current); + if (pixels->size() > kMaxCCSize) return false; + for (const Pixel& delta : neighbors) { + Pixel child = current + delta; + if (child.x >= 0 && static_cast(child.x) < img->xsize() && + child.y >= 0 && static_cast(child.y) < img->ysize()) { + float* value = &img->Row(child.y)[child.x]; + if (*value > threshold) { + *value = 0.0; + q.push_back(child); + } + } + } + } + return true; +} + +inline bool PointInRect(const Rect& r, const Pixel& p) { + return (static_cast(p.x) >= r.x0() && + static_cast(p.x) < (r.x0() + r.xsize()) && + static_cast(p.y) >= r.y0() && + static_cast(p.y) < (r.y0() + r.ysize())); +} + +struct ConnectedComponent { + ConnectedComponent(const Rect& bounds, const std::vector&& pixels) + : bounds(bounds), pixels(pixels) {} + Rect bounds; + std::vector pixels; + float maxEnergy; + float meanEnergy; + float varEnergy; + float meanBg; + float varBg; + float score; + Pixel mode; + + void CompStats(const ImageF& energy, int extra) { + maxEnergy = 0.0; + meanEnergy = 0.0; + varEnergy = 0.0; + meanBg = 0.0; + varBg = 0.0; + int nIn = 0; + int nOut = 0; + mode.x = 0; + mode.y = 0; + for (int sy = -extra; sy < (static_cast(bounds.ysize()) + extra); + sy++) { + int y = sy + static_cast(bounds.y0()); + if (y < 0 || static_cast(y) >= energy.ysize()) continue; + const float* JXL_RESTRICT erow = energy.ConstRow(y); + for (int sx = -extra; sx < (static_cast(bounds.xsize()) + extra); + sx++) { + int x = sx + static_cast(bounds.x0()); + if (x < 0 || static_cast(x) >= energy.xsize()) continue; + if (erow[x] > maxEnergy) { + maxEnergy = erow[x]; + mode.x = x; + mode.y = y; + } + if (PointInRect(bounds, Pixel{x, y})) { + meanEnergy += erow[x]; + varEnergy += erow[x] * erow[x]; + nIn++; + } else { + meanBg += erow[x]; + varBg += erow[x] * erow[x]; + nOut++; + } + } + } + meanEnergy = meanEnergy / nIn; + meanBg = meanBg / nOut; + varEnergy = (varEnergy / nIn) - meanEnergy * meanEnergy; + varBg = (varBg / nOut) - meanBg * meanBg; + score = (meanEnergy - meanBg) / std::sqrt(varBg); + } +}; + +Rect BoundingRectangle(const std::vector& pixels) { + JXL_ASSERT(!pixels.empty()); + int low_x, high_x, low_y, high_y; + low_x = high_x = pixels[0].x; + low_y = high_y = pixels[0].y; + for (const Pixel& p : pixels) { + low_x = std::min(low_x, p.x); + high_x = std::max(high_x, p.x); + low_y = std::min(low_y, p.y); + high_y = std::max(high_y, p.y); + } + return Rect(low_x, low_y, high_x - low_x + 1, high_y - low_y + 1); +} + +std::vector FindCC(const ImageF& energy, double t_low, + double t_high, uint32_t maxWindow, + double minScore) { + const int kExtraRect = 4; + ImageF img(energy.xsize(), energy.ysize()); + CopyImageTo(energy, &img); + std::vector ans; + for (size_t y = 0; y < img.ysize(); y++) { + float* JXL_RESTRICT row = img.Row(y); + for (size_t x = 0; x < img.xsize(); x++) { + if (row[x] > t_high) { + std::vector pixels; + row[x] = 0.0; + bool success = ExtractComponent( + &img, &pixels, Pixel{static_cast(x), static_cast(y)}, + t_low); + if (!success) continue; +#if JXL_DEBUG_DOT_DETECT + for (size_t i = 0; i < pixels.size(); i++) { + fprintf(stderr, "(%d,%d) ", pixels[i].x, pixels[i].y); + } + fprintf(stderr, "\n"); +#endif // JXL_DEBUG_DOT_DETECT + Rect bounds = BoundingRectangle(pixels); + if (bounds.xsize() < maxWindow && bounds.ysize() < maxWindow) { + ConnectedComponent cc{bounds, std::move(pixels)}; + cc.CompStats(energy, kExtraRect); + if (cc.score < minScore) continue; + JXL_DEBUG(JXL_DEBUG_DOT_DETECT, + "cc mode: (%d,%d), max: %f, bgMean: %f bgVar: " + "%f bound:(%" PRIuS ",%" PRIuS ",%" PRIuS ",%" PRIuS ")\n", + cc.mode.x, cc.mode.y, cc.maxEnergy, cc.meanEnergy, + cc.varEnergy, cc.bounds.x0(), cc.bounds.y0(), + cc.bounds.xsize(), cc.bounds.ysize()); + ans.push_back(cc); + } + } + } + } + return ans; +} + +// TODO (sggonzalez): Adapt this function for the different color spaces or +// remove it if the color space with the best performance does not need it +void ComputeDotLosses(GaussianEllipse* ellipse, const ConnectedComponent& cc, + const Image3F& img, const Image3F& background) { + const int rectBounds = 2; + const double kIntensityR = 0.0; // 0.015; + const double kSigmaR = 0.0; // 0.01; + const double kZeroEpsilon = 0.1; // Tolerance to consider a value negative + double ct = cos(ellipse->angle), st = sin(ellipse->angle); + const std::array channelGains{{1.0, 1.0, 1.0}}; + int N = 0; + ellipse->l1_loss = 0.0; + ellipse->l2_loss = 0.0; + ellipse->neg_pixels = 0; + ellipse->neg_value.fill(0.0); + double distMeanModeSq = (cc.mode.x - ellipse->x) * (cc.mode.x - ellipse->x) + + (cc.mode.y - ellipse->y) * (cc.mode.y - ellipse->y); + ellipse->custom_loss = 0.0; + for (int c = 0; c < 3; c++) { + for (int sy = -rectBounds; + sy < (static_cast(cc.bounds.ysize()) + rectBounds); sy++) { + int y = sy + cc.bounds.y0(); + if (y < 0 || static_cast(y) >= img.ysize()) continue; + const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y); + // bgrow is only used if kOptimizeBackground is false. + // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) + const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y); + for (int sx = -rectBounds; + sx < (static_cast(cc.bounds.xsize()) + rectBounds); sx++) { + int x = sx + cc.bounds.x0(); + if (x < 0 || static_cast(x) >= img.xsize()) continue; + double target = row[x]; + double dotDelta = DotGaussianModel( + x - ellipse->x, y - ellipse->y, ct, st, ellipse->sigma_x, + ellipse->sigma_y, ellipse->intensity[c]); + if (dotDelta > target + kZeroEpsilon) { + ellipse->neg_pixels++; + ellipse->neg_value[c] += dotDelta - target; + } + double bkg = kOptimizeBackground ? ellipse->bgColor[c] : bgrow[x]; + double pred = bkg + dotDelta; + double diff = target - pred; + double l2 = channelGains[c] * diff * diff; + double l1 = channelGains[c] * std::fabs(diff); + ellipse->l2_loss += l2; + ellipse->l1_loss += l1; + double w = DotGaussianModel(x - cc.mode.x, y - cc.mode.y, 1.0, 0.0, + 1.0 + ellipse->sigma_x, + 1.0 + ellipse->sigma_y, 1.0); + ellipse->custom_loss += w * l2; + N++; + } + } + } + ellipse->l2_loss /= N; + ellipse->custom_loss /= N; + ellipse->custom_loss += 20.0 * distMeanModeSq + ellipse->neg_value[1]; + ellipse->l1_loss /= N; + double ridgeTerm = kSigmaR * ellipse->sigma_x + kSigmaR * ellipse->sigma_y; + for (int c = 0; c < 3; c++) { + ridgeTerm += kIntensityR * ellipse->intensity[c] * ellipse->intensity[c]; + } + ellipse->ridge_loss = ellipse->l2_loss + ridgeTerm; +} + +GaussianEllipse FitGaussianFast(const ConnectedComponent& cc, + const ImageF& energy, const Image3F& img, + const Image3F& background) { + constexpr bool leastSqIntensity = true; + constexpr double kEpsilon = 1e-6; + GaussianEllipse ans; + constexpr int kRectBounds = (kEllipseWindowSize >> 1); + + // Compute the 1st and 2nd moments of the CC + double sum = 0.0; + int N = 0; + std::array m1{{0.0, 0.0, 0.0}}; + std::array m2{{0.0, 0.0, 0.0}}; + std::array color{{0.0, 0.0, 0.0}}; + std::array bgColor{{0.0, 0.0, 0.0}}; + + JXL_DEBUG(JXL_DEBUG_DOT_DETECT, + "%" PRIuS " %" PRIuS " %" PRIuS " %" PRIuS "\n", cc.bounds.x0(), + cc.bounds.y0(), cc.bounds.xsize(), cc.bounds.ysize()); + for (int c = 0; c < 3; c++) { + color[c] = img.ConstPlaneRow(c, cc.mode.y)[cc.mode.x] - + background.ConstPlaneRow(c, cc.mode.y)[cc.mode.x]; + } + double sign = (color[1] > 0) ? 1 : -1; + for (int sy = -kRectBounds; sy <= kRectBounds; sy++) { + int y = sy + cc.mode.y; + if (y < 0 || static_cast(y) >= energy.ysize()) continue; + const float* JXL_RESTRICT row = img.ConstPlaneRow(1, y); + const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(1, y); + for (int sx = -kRectBounds; sx <= kRectBounds; sx++) { + int x = sx + cc.mode.x; + if (x < 0 || static_cast(x) >= energy.xsize()) continue; + double w = std::max(kEpsilon, sign * (row[x] - bgrow[x])); + sum += w; + + m1[0] += w * x; + m1[1] += w * y; + m2[0] += w * x * x; + m2[1] += w * x * y; + m2[2] += w * y * y; + for (int c = 0; c < 3; c++) { + bgColor[c] += background.ConstPlaneRow(c, y)[x]; + } + N++; + } + } + JXL_CHECK(N > 0); + + for (int i = 0; i < 3; i++) { + m1[i] /= sum; + m2[i] /= sum; + bgColor[i] /= N; + } + + // Some magic constants + constexpr double kSigmaMult = 1.0; + constexpr std::array kScaleMult{{1.1, 1.1, 1.1}}; + + // Now set the parameters of the Gaussian + ans.x = m1[0]; + ans.y = m1[1]; + for (int j = 0; j < 3; j++) { + ans.intensity[j] = kScaleMult[j] * color[j]; + } + + ImageD Sigma(2, 2), D(1, 2), U(2, 2); + Sigma.Row(0)[0] = m2[0] - m1[0] * m1[0]; + Sigma.Row(1)[1] = m2[2] - m1[1] * m1[1]; + Sigma.Row(0)[1] = Sigma.Row(1)[0] = m2[1] - m1[0] * m1[1]; + ConvertToDiagonal(Sigma, &D, &U); + const double* JXL_RESTRICT d = D.ConstRow(0); + const double* JXL_RESTRICT u = U.ConstRow(1); + int p1 = 0, p2 = 1; + if (d[0] < d[1]) std::swap(p1, p2); + ans.sigma_x = kSigmaMult * d[p1]; + ans.sigma_y = kSigmaMult * d[p2]; + ans.angle = std::atan2(u[p1], u[p2]); + ans.l2_loss = 0.0; + ans.bgColor = bgColor; + if (leastSqIntensity) { + GaussianEllipse* ellipse = &ans; + double ct = cos(ans.angle), st = sin(ans.angle); + // Estimate intensity with least squares (fixed background) + for (int c = 0; c < 3; c++) { + double gg = 0.0; + double gd = 0.0; + int yc = static_cast(cc.mode.y); + int xc = static_cast(cc.mode.x); + for (int y = yc - kRectBounds; y <= yc + kRectBounds; y++) { + if (y < 0 || static_cast(y) >= img.ysize()) continue; + const float* JXL_RESTRICT row = img.ConstPlaneRow(c, y); + const float* JXL_RESTRICT bgrow = background.ConstPlaneRow(c, y); + for (int x = xc - kRectBounds; x <= xc + kRectBounds; x++) { + if (x < 0 || static_cast(x) >= img.xsize()) continue; + double target = row[x] - bgrow[x]; + double gaussian = + DotGaussianModel(x - ellipse->x, y - ellipse->y, ct, st, + ellipse->sigma_x, ellipse->sigma_y, 1.0); + gg += gaussian * gaussian; + gd += gaussian * target; + } + } + ans.intensity[c] = gd / (gg + 1e-6); // Regularized least squares + } + } + ComputeDotLosses(&ans, cc, img, background); + return ans; +} + +GaussianEllipse FitGaussian(const ConnectedComponent& cc, const ImageF& energy, + const Image3F& img, const Image3F& background) { + auto ellipse = FitGaussianFast(cc, energy, img, background); + if (ellipse.sigma_x < ellipse.sigma_y) { + std::swap(ellipse.sigma_x, ellipse.sigma_y); + ellipse.angle += kPi / 2.0; + } + ellipse.angle -= kPi * std::floor(ellipse.angle / kPi); + if (fabs(ellipse.angle - kPi) < 1e-6 || fabs(ellipse.angle) < 1e-6) { + ellipse.angle = 0.0; + } + JXL_CHECK(ellipse.angle >= 0 && ellipse.angle <= kPi && + ellipse.sigma_x >= ellipse.sigma_y); + JXL_DEBUG(JXL_DEBUG_DOT_DETECT, + "Ellipse mu=(%lf,%lf) sigma=(%lf,%lf) angle=%lf " + "intensity=(%lf,%lf,%lf) bg=(%lf,%lf,%lf) l2_loss=%lf " + "custom_loss=%lf, neg_pix=%" PRIuS ", neg_v=(%lf,%lf,%lf)\n", + ellipse.x, ellipse.y, ellipse.sigma_x, ellipse.sigma_y, + ellipse.angle, ellipse.intensity[0], ellipse.intensity[1], + ellipse.intensity[2], ellipse.bgColor[0], ellipse.bgColor[1], + ellipse.bgColor[2], ellipse.l2_loss, ellipse.custom_loss, + ellipse.neg_pixels, ellipse.neg_value[0], ellipse.neg_value[1], + ellipse.neg_value[2]); + return ellipse; +} + +} // namespace + +std::vector DetectGaussianEllipses( + const Image3F& opsin, const GaussianDetectParams& params, + const EllipseQuantParams& qParams, ThreadPool* pool) { + std::vector dots; + Image3F smooth(opsin.xsize(), opsin.ysize()); + ImageF energy = ComputeEnergyImage(opsin, &smooth, pool); + std::vector components = FindCC( + energy, params.t_low, params.t_high, params.maxWinSize, params.minScore); + size_t numCC = + std::min(params.maxCC, (components.size() * params.percCC) / 100); + if (components.size() > numCC) { + std::sort( + components.begin(), components.end(), + [](const ConnectedComponent& a, const ConnectedComponent& b) -> bool { + return a.score > b.score; + }); + components.erase(components.begin() + numCC, components.end()); + } + for (const auto& cc : components) { + GaussianEllipse ellipse = FitGaussian(cc, energy, opsin, smooth); + if (ellipse.x < 0.0 || + std::ceil(ellipse.x) >= static_cast(opsin.xsize()) || + ellipse.y < 0.0 || + std::ceil(ellipse.y) >= static_cast(opsin.ysize())) { + continue; + } + if (ellipse.neg_pixels > params.maxNegPixels) continue; + double intensity = 0.21 * ellipse.intensity[0] + + 0.72 * ellipse.intensity[1] + + 0.07 * ellipse.intensity[2]; + double intensitySq = intensity * intensity; + // for (int c = 0; c < 3; c++) { + // intensitySq += ellipse.intensity[c] * ellipse.intensity[c]; + //} + double sqDistMeanMode = (ellipse.x - cc.mode.x) * (ellipse.x - cc.mode.x) + + (ellipse.y - cc.mode.y) * (ellipse.y - cc.mode.y); + if (ellipse.l2_loss < params.maxL2Loss && + ellipse.custom_loss < params.maxCustomLoss && + intensitySq > (params.minIntensity * params.minIntensity) && + sqDistMeanMode < params.maxDistMeanMode * params.maxDistMeanMode) { + size_t x0 = cc.bounds.x0(); + size_t y0 = cc.bounds.y0(); + dots.emplace_back(); + dots.back().second.emplace_back(x0, y0); + QuantizedPatch& patch = dots.back().first; + patch.xsize = cc.bounds.xsize(); + patch.ysize = cc.bounds.ysize(); + for (size_t y = 0; y < patch.ysize; y++) { + for (size_t x = 0; x < patch.xsize; x++) { + for (size_t c = 0; c < 3; c++) { + patch.fpixels[c][y * patch.xsize + x] = + opsin.ConstPlaneRow(c, y0 + y)[x0 + x] - + smooth.ConstPlaneRow(c, y0 + y)[x0 + x]; + } + } + } + } + } + return dots; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.h b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.h new file mode 100644 index 0000000000..c3071d9a2f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_detect_dots.h @@ -0,0 +1,67 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// We attempt to remove dots, or speckle from images using Gaussian blur. +#ifndef LIB_JXL_ENC_DETECT_DOTS_H_ +#define LIB_JXL_ENC_DETECT_DOTS_H_ + +#include +#include + +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/enc_patch_dictionary.h" +#include "lib/jxl/image.h" + +namespace jxl { + +struct GaussianDetectParams { + double t_high = 0; // at least one pixel must have larger energy than t_high + double t_low = 0; // all pixels must have a larger energy than tLow + uint32_t maxWinSize = 0; // discard dots larger than this containing window + double maxL2Loss = 0; + double maxCustomLoss = 0; + double minIntensity = 0; // If the intensity is too low, discard it + double maxDistMeanMode = 0; // The mean and the mode must be close + size_t maxNegPixels = 0; // Maximum number of negative pixel + size_t minScore = 0; + size_t maxCC = 50; // Maximum number of CC to keep + size_t percCC = 15; // Percentage in [0,100] of CC to keep +}; + +// Ellipse Quantization Params +struct EllipseQuantParams { + size_t xsize; // Image size in x + size_t ysize; // Image size in y + size_t qPosition; // Position quantization delta + // Quantization for the Gaussian sigma parameters + double minSigma; + double maxSigma; + size_t qSigma; // number of quantization levels + // Quantization for the rotation angle (between -pi and pi) + size_t qAngle; + // Quantization for the intensity + std::array minIntensity; + std::array maxIntensity; + std::array qIntensity; // number of quantization levels + // Extra parameters for the encoding + bool subtractQuantized; // Should we subtract quantized or detected dots? + float ytox; + float ytob; + + void QuantPositionSize(size_t* xsize, size_t* ysize) const; +}; + +// Detects dots in XYB image. +std::vector DetectGaussianEllipses( + const Image3F& opsin, const GaussianDetectParams& params, + const EllipseQuantParams& qParams, ThreadPool* pool); + +} // namespace jxl + +#endif // LIB_JXL_ENC_DETECT_DOTS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.cc b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.cc new file mode 100644 index 0000000000..2d22c1edb8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.cc @@ -0,0 +1,71 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_dot_dictionary.h" + +#include +#include + +#include +#include + +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_detect_dots.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_xyb.h" +#include "lib/jxl/image.h" + +namespace jxl { + +// Private implementation of Dictionary Encode/Decode +namespace { + +/* Quantization constants for Ellipse dots */ +const size_t kEllipsePosQ = 2; // Quantization level for the position +const double kEllipseMinSigma = 0.1; // Minimum sigma value +const double kEllipseMaxSigma = 3.1; // Maximum Sigma value +const size_t kEllipseSigmaQ = 16; // Number of quantization levels for sigma +const size_t kEllipseAngleQ = 8; // Quantization level for the angle +// TODO: fix these values. +const std::array kEllipseMinIntensity{{-0.05, 0.0, -0.5}}; +const std::array kEllipseMaxIntensity{{0.05, 1.0, 0.4}}; +const std::array kEllipseIntensityQ{{10, 36, 10}}; +} // namespace + +std::vector FindDotDictionary(const CompressParams& cparams, + const Image3F& opsin, + const ColorCorrelationMap& cmap, + ThreadPool* pool) { + if (ApplyOverride(cparams.dots, + cparams.butteraugli_distance >= kMinButteraugliForDots)) { + GaussianDetectParams ellipse_params; + ellipse_params.t_high = 0.04; + ellipse_params.t_low = 0.02; + ellipse_params.maxWinSize = 5; + ellipse_params.maxL2Loss = 0.005; + ellipse_params.maxCustomLoss = 300; + ellipse_params.minIntensity = 0.12; + ellipse_params.maxDistMeanMode = 1.0; + ellipse_params.maxNegPixels = 0; + ellipse_params.minScore = 12.0; + ellipse_params.maxCC = 100; + ellipse_params.percCC = 100; + EllipseQuantParams qParams{ + opsin.xsize(), opsin.ysize(), kEllipsePosQ, + kEllipseMinSigma, kEllipseMaxSigma, kEllipseSigmaQ, + kEllipseAngleQ, kEllipseMinIntensity, kEllipseMaxIntensity, + kEllipseIntensityQ, kEllipsePosQ <= 5, cmap.YtoXRatio(0), + cmap.YtoBRatio(0)}; + + return DetectGaussianEllipses(opsin, ellipse_params, qParams, pool); + } + return {}; +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.h b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.h new file mode 100644 index 0000000000..2ba4393f30 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_dot_dictionary.h @@ -0,0 +1,34 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_DOT_DICTIONARY_H_ +#define LIB_JXL_ENC_DOT_DICTIONARY_H_ + +// Dots are stored in a dictionary to avoid storing similar dots multiple +// times. + +#include + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_patch_dictionary.h" +#include "lib/jxl/image.h" + +namespace jxl { + +std::vector FindDotDictionary(const CompressParams& cparams, + const Image3F& opsin, + const ColorCorrelationMap& cmap, + ThreadPool* pool); + +} // namespace jxl + +#endif // LIB_JXL_ENC_DOT_DICTIONARY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.cc b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.cc new file mode 100644 index 0000000000..0c293b91d5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.cc @@ -0,0 +1,273 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_entropy_coder.h" + +#include +#include + +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_entropy_coder.cc" +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_context_map.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/epf.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::AndNot; +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::GetLane; + +// Returns number of non-zero coefficients (but skip LLF). +// We cannot rely on block[] being all-zero bits, so first truncate to integer. +// Also writes the per-8x8 block nzeros starting at nzeros_pos. +int32_t NumNonZeroExceptLLF(const size_t cx, const size_t cy, + const AcStrategy acs, const size_t covered_blocks, + const size_t log2_covered_blocks, + const int32_t* JXL_RESTRICT block, + const size_t nzeros_stride, + int32_t* JXL_RESTRICT nzeros_pos) { + const HWY_CAPPED(int32_t, kBlockDim) di; + + const auto zero = Zero(di); + // Add FF..FF for every zero coefficient, negate to get #zeros. + auto neg_sum_zero = zero; + + { + // Mask sufficient for one row of coefficients. + HWY_ALIGN const int32_t + llf_mask_lanes[AcStrategy::kMaxCoeffBlocks * (1 + kBlockDim)] = { + -1, -1, -1, -1}; + // First cx=1,2,4 elements are FF..FF, others 0. + const int32_t* llf_mask_pos = + llf_mask_lanes + AcStrategy::kMaxCoeffBlocks - cx; + + // Rows with LLF: mask out the LLF + for (size_t y = 0; y < cy; y++) { + for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { + const auto llf_mask = LoadU(di, llf_mask_pos + x); + + // LLF counts as zero so we don't include it in nzeros. + const auto coef = + AndNot(llf_mask, Load(di, &block[y * cx * kBlockDim + x])); + + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + } + + // Remaining rows: no mask + for (size_t y = cy; y < cy * kBlockDim; y++) { + for (size_t x = 0; x < cx * kBlockDim; x += Lanes(di)) { + const auto coef = Load(di, &block[y * cx * kBlockDim + x]); + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + + // We want area - sum_zero, add because neg_sum_zero is already negated. + const int32_t nzeros = + int32_t(cx * cy * kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero)); + + const int32_t shifted_nzeros = static_cast( + (nzeros + covered_blocks - 1) >> log2_covered_blocks); + // Need non-canonicalized dimensions! + for (size_t y = 0; y < acs.covered_blocks_y(); y++) { + for (size_t x = 0; x < acs.covered_blocks_x(); x++) { + nzeros_pos[x + y * nzeros_stride] = shifted_nzeros; + } + } + + return nzeros; +} + +// Specialization for 8x8, where only top-left is LLF/DC. +// About 1% overall speedup vs. NumNonZeroExceptLLF. +int32_t NumNonZero8x8ExceptDC(const int32_t* JXL_RESTRICT block, + int32_t* JXL_RESTRICT nzeros_pos) { + const HWY_CAPPED(int32_t, kBlockDim) di; + + const auto zero = Zero(di); + // Add FF..FF for every zero coefficient, negate to get #zeros. + auto neg_sum_zero = zero; + + { + // First row has DC, so mask + const size_t y = 0; + HWY_ALIGN const int32_t dc_mask_lanes[kBlockDim] = {-1}; + + for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { + const auto dc_mask = Load(di, dc_mask_lanes + x); + + // DC counts as zero so we don't include it in nzeros. + const auto coef = AndNot(dc_mask, Load(di, &block[y * kBlockDim + x])); + + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + + // Remaining rows: no mask + for (size_t y = 1; y < kBlockDim; y++) { + for (size_t x = 0; x < kBlockDim; x += Lanes(di)) { + const auto coef = Load(di, &block[y * kBlockDim + x]); + neg_sum_zero = Add(neg_sum_zero, VecFromMask(di, Eq(coef, zero))); + } + } + + // We want 64 - sum_zero, add because neg_sum_zero is already negated. + const int32_t nzeros = + int32_t(kDCTBlockSize) + GetLane(SumOfLanes(di, neg_sum_zero)); + + *nzeros_pos = nzeros; + + return nzeros; +} + +// The number of nonzeros of each block is predicted from the top and the left +// blocks, with opportune scaling to take into account the number of blocks of +// each strategy. The predicted number of nonzeros divided by two is used as a +// context; if this number is above 63, a specific context is used. If the +// number of nonzeros of a strategy is above 63, it is written directly using a +// fixed number of bits (that depends on the size of the strategy). +void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders, + const Rect& rect, + const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows, + const AcStrategyImage& ac_strategy, + YCbCrChromaSubsampling cs, + Image3I* JXL_RESTRICT tmp_num_nzeroes, + std::vector* JXL_RESTRICT output, + const ImageB& qdc, const ImageI& qf, + const BlockCtxMap& block_ctx_map) { + const size_t xsize_blocks = rect.xsize(); + const size_t ysize_blocks = rect.ysize(); + + // TODO(user): update the estimate: usually less coefficients are used. + output->reserve(output->size() + + 3 * xsize_blocks * ysize_blocks * kDCTBlockSize); + + size_t offset[3] = {}; + const size_t nzeros_stride = tmp_num_nzeroes->PixelsPerRow(); + for (size_t by = 0; by < ysize_blocks; ++by) { + size_t sby[3] = {by >> cs.VShift(0), by >> cs.VShift(1), + by >> cs.VShift(2)}; + int32_t* JXL_RESTRICT row_nzeros[3] = { + tmp_num_nzeroes->PlaneRow(0, sby[0]), + tmp_num_nzeroes->PlaneRow(1, sby[1]), + tmp_num_nzeroes->PlaneRow(2, sby[2]), + }; + const int32_t* JXL_RESTRICT row_nzeros_top[3] = { + sby[0] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(0, sby[0] - 1), + sby[1] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(1, sby[1] - 1), + sby[2] == 0 ? nullptr : tmp_num_nzeroes->ConstPlaneRow(2, sby[2] - 1), + }; + const uint8_t* JXL_RESTRICT row_qdc = + qdc.ConstRow(rect.y0() + by) + rect.x0(); + const int32_t* JXL_RESTRICT row_qf = rect.ConstRow(qf, by); + AcStrategyRow acs_row = ac_strategy.ConstRow(rect, by); + for (size_t bx = 0; bx < xsize_blocks; ++bx) { + AcStrategy acs = acs_row[bx]; + if (!acs.IsFirstBlock()) continue; + size_t sbx[3] = {bx >> cs.HShift(0), bx >> cs.HShift(1), + bx >> cs.HShift(2)}; + size_t cx = acs.covered_blocks_x(); + size_t cy = acs.covered_blocks_y(); + const size_t covered_blocks = cx * cy; // = #LLF coefficients + const size_t log2_covered_blocks = + Num0BitsBelowLS1Bit_Nonzero(covered_blocks); + const size_t size = covered_blocks * kDCTBlockSize; + + CoefficientLayout(&cy, &cx); // swap cx/cy to canonical order + + for (int c : {1, 0, 2}) { + if (sbx[c] << cs.HShift(c) != bx) continue; + if (sby[c] << cs.VShift(c) != by) continue; + const int32_t* JXL_RESTRICT block = ac_rows[c] + offset[c]; + + int32_t nzeros = + (covered_blocks == 1) + ? NumNonZero8x8ExceptDC(block, row_nzeros[c] + sbx[c]) + : NumNonZeroExceptLLF(cx, cy, acs, covered_blocks, + log2_covered_blocks, block, nzeros_stride, + row_nzeros[c] + sbx[c]); + + int ord = kStrategyOrder[acs.RawStrategy()]; + const coeff_order_t* JXL_RESTRICT order = + &orders[CoeffOrderOffset(ord, c)]; + + int32_t predicted_nzeros = + PredictFromTopAndLeft(row_nzeros_top[c], row_nzeros[c], sbx[c], 32); + size_t block_ctx = + block_ctx_map.Context(row_qdc[bx], row_qf[sbx[c]], ord, c); + const int32_t nzero_ctx = + block_ctx_map.NonZeroContext(predicted_nzeros, block_ctx); + + output->emplace_back(nzero_ctx, nzeros); + const size_t histo_offset = + block_ctx_map.ZeroDensityContextsOffset(block_ctx); + // Skip LLF. + size_t prev = (nzeros > static_cast(size / 16) ? 0 : 1); + for (size_t k = covered_blocks; k < size && nzeros != 0; ++k) { + int32_t coeff = block[order[k]]; + size_t ctx = + histo_offset + ZeroDensityContext(nzeros, k, covered_blocks, + log2_covered_blocks, prev); + uint32_t u_coeff = PackSigned(coeff); + output->emplace_back(ctx, u_coeff); + prev = coeff != 0; + nzeros -= prev; + } + JXL_DASSERT(nzeros == 0); + offset[c] += size; + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(TokenizeCoefficients); +void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders, + const Rect& rect, + const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows, + const AcStrategyImage& ac_strategy, + YCbCrChromaSubsampling cs, + Image3I* JXL_RESTRICT tmp_num_nzeroes, + std::vector* JXL_RESTRICT output, + const ImageB& qdc, const ImageI& qf, + const BlockCtxMap& block_ctx_map) { + return HWY_DYNAMIC_DISPATCH(TokenizeCoefficients)( + orders, rect, ac_rows, ac_strategy, cs, tmp_num_nzeroes, output, qdc, qf, + block_ctx_map); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.h b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.h new file mode 100644 index 0000000000..7dfc71c726 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_entropy_coder.h @@ -0,0 +1,46 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_ENTROPY_CODER_H_ +#define LIB_JXL_ENC_ENTROPY_CODER_H_ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/ac_context.h" // BlockCtxMap +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/frame_header.h" // YCbCrChromaSubsampling +#include "lib/jxl/image.h" + +// Entropy coding and context modeling of DC and AC coefficients, as well as AC +// strategy and quantization field. + +namespace jxl { + +// Generate DCT NxN quantized AC values tokens. +// Only the subset "rect" [in units of blocks] within all images. +// See also DecodeACVarBlock. +void TokenizeCoefficients(const coeff_order_t* JXL_RESTRICT orders, + const Rect& rect, + const int32_t* JXL_RESTRICT* JXL_RESTRICT ac_rows, + const AcStrategyImage& ac_strategy, + YCbCrChromaSubsampling cs, + Image3I* JXL_RESTRICT tmp_num_nzeroes, + std::vector* JXL_RESTRICT output, + const ImageB& qdc, const ImageI& qf, + const BlockCtxMap& block_ctx_map); + +} // namespace jxl + +#endif // LIB_JXL_ENC_ENTROPY_CODER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image.cc b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.cc new file mode 100644 index 0000000000..473e71adda --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.cc @@ -0,0 +1,180 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_external_image.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/alpha.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/float.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" + +namespace jxl { +namespace { + +size_t JxlDataTypeBytes(JxlDataType data_type) { + switch (data_type) { + case JXL_TYPE_UINT8: + return 1; + case JXL_TYPE_UINT16: + return 2; + case JXL_TYPE_FLOAT16: + return 2; + case JXL_TYPE_FLOAT: + return 4; + default: + return 0; + } +} + +} // namespace + +Status ConvertFromExternal(Span bytes, size_t xsize, + size_t ysize, size_t bits_per_sample, + JxlPixelFormat format, size_t c, ThreadPool* pool, + ImageF* channel) { + if (format.data_type == JXL_TYPE_UINT8) { + JXL_RETURN_IF_ERROR(bits_per_sample > 0 && bits_per_sample <= 8); + } else if (format.data_type == JXL_TYPE_UINT16) { + JXL_RETURN_IF_ERROR(bits_per_sample > 8 && bits_per_sample <= 16); + } else if (format.data_type != JXL_TYPE_FLOAT16 && + format.data_type != JXL_TYPE_FLOAT) { + JXL_FAILURE("unsupported pixel format data type %d", format.data_type); + } + size_t bytes_per_channel = JxlDataTypeBytes(format.data_type); + size_t bytes_per_pixel = format.num_channels * bytes_per_channel; + size_t pixel_offset = c * bytes_per_channel; + // Only for uint8/16. + float scale = 1. / ((1ull << bits_per_sample) - 1); + + const size_t last_row_size = xsize * bytes_per_pixel; + const size_t align = format.align; + const size_t row_size = + (align > 1 ? jxl::DivCeil(last_row_size, align) * align : last_row_size); + const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size; + if (xsize == 0 || ysize == 0) return JXL_FAILURE("Empty image"); + if (bytes.size() < bytes_to_read) { + return JXL_FAILURE("Buffer size is too small, expected: %" PRIuS + " got: %" PRIuS " (Image: %" PRIuS "x%" PRIuS + "x%u, bytes_per_channel: %" PRIuS ")", + bytes_to_read, bytes.size(), xsize, ysize, + format.num_channels, bytes_per_channel); + } + JXL_ASSERT(channel->xsize() == xsize); + JXL_ASSERT(channel->ysize() == ysize); + // Too large buffer is likely an application bug, so also fail for that. + // Do allow padding to stride in last row though. + if (bytes.size() > row_size * ysize) { + return JXL_FAILURE("Buffer size is too large"); + } + + const bool little_endian = + format.endianness == JXL_LITTLE_ENDIAN || + (format.endianness == JXL_NATIVE_ENDIAN && IsLittleEndian()); + + const uint8_t* const in = bytes.data(); + + std::atomic error_count = {0}; + + const auto convert_row = [&](const uint32_t task, size_t /*thread*/) { + const size_t y = task; + size_t offset = row_size * task + pixel_offset; + float* JXL_RESTRICT row_out = channel->Row(y); + const auto save_value = [&](size_t index, float value) { + row_out[index] = value; + }; + if (!LoadFloatRow(in + offset, xsize, bytes_per_pixel, format.data_type, + little_endian, scale, save_value)) { + error_count++; + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, static_cast(ysize), + ThreadPool::NoInit, convert_row, + "ConvertExtraChannel")); + + if (error_count) { + JXL_FAILURE("unsupported pixel format data type"); + } + + return true; +} +Status ConvertFromExternal(Span bytes, size_t xsize, + size_t ysize, const ColorEncoding& c_current, + size_t bits_per_sample, JxlPixelFormat format, + ThreadPool* pool, ImageBundle* ib) { + const size_t color_channels = c_current.Channels(); + bool has_alpha = format.num_channels == 2 || format.num_channels == 4; + if (format.num_channels < color_channels) { + return JXL_FAILURE("Expected %" PRIuS + " color channels, received only %u channels", + color_channels, format.num_channels); + } + + Image3F color(xsize, ysize); + for (size_t c = 0; c < color_channels; ++c) { + JXL_RETURN_IF_ERROR(ConvertFromExternal(bytes, xsize, ysize, + bits_per_sample, format, c, pool, + &color.Plane(c))); + } + if (color_channels == 1) { + CopyImageTo(color.Plane(0), &color.Plane(1)); + CopyImageTo(color.Plane(0), &color.Plane(2)); + } + ib->SetFromImage(std::move(color), c_current); + + // Passing an interleaved image with an alpha channel to an image that doesn't + // have alpha channel just discards the passed alpha channel. + if (has_alpha && ib->HasAlpha()) { + ImageF alpha(xsize, ysize); + JXL_RETURN_IF_ERROR( + ConvertFromExternal(bytes, xsize, ysize, bits_per_sample, format, + format.num_channels - 1, pool, &alpha)); + ib->SetAlpha(std::move(alpha)); + } else if (!has_alpha && ib->HasAlpha()) { + // if alpha is not passed, but it is expected, then assume + // it is all-opaque + ImageF alpha(xsize, ysize); + FillImage(1.0f, &alpha); + ib->SetAlpha(std::move(alpha)); + } + + return true; +} + +Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize, + size_t ysize, const void* buffer, size_t size, + ThreadPool* pool, ImageF* channel) { + size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte; + return ConvertFromExternal( + jxl::Span(static_cast(buffer), size), + xsize, ysize, bitdepth, pixel_format, 0, pool, channel); +} + +Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize, + uint32_t ysize, const void* buffer, size_t size, + jxl::ThreadPool* pool, + const jxl::ColorEncoding& c_current, + jxl::ImageBundle* ib) { + size_t bitdepth = JxlDataTypeBytes(pixel_format.data_type) * kBitsPerByte; + JXL_RETURN_IF_ERROR(ConvertFromExternal( + jxl::Span(static_cast(buffer), size), + xsize, ysize, c_current, bitdepth, pixel_format, pool, ib)); + ib->VerifyMetadata(); + + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image.h b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.h new file mode 100644 index 0000000000..3b2b295076 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image.h @@ -0,0 +1,45 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_EXTERNAL_IMAGE_H_ +#define LIB_JXL_ENC_EXTERNAL_IMAGE_H_ + +// Interleaved image for color transforms and Codec. + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { +Status ConvertFromExternal(Span bytes, size_t xsize, + size_t ysize, size_t bits_per_sample, + JxlPixelFormat format, size_t c, ThreadPool* pool, + ImageF* channel); + +// Convert an interleaved pixel buffer to the internal ImageBundle +// representation. This is the opposite of ConvertToExternal(). +Status ConvertFromExternal(Span bytes, size_t xsize, + size_t ysize, const ColorEncoding& c_current, + size_t bits_per_sample, JxlPixelFormat format, + ThreadPool* pool, ImageBundle* ib); +Status BufferToImageF(const JxlPixelFormat& pixel_format, size_t xsize, + size_t ysize, const void* buffer, size_t size, + ThreadPool* pool, ImageF* channel); +Status BufferToImageBundle(const JxlPixelFormat& pixel_format, uint32_t xsize, + uint32_t ysize, const void* buffer, size_t size, + jxl::ThreadPool* pool, + const jxl::ColorEncoding& c_current, + jxl::ImageBundle* ib); + +} // namespace jxl + +#endif // LIB_JXL_ENC_EXTERNAL_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_gbench.cc new file mode 100644 index 0000000000..4b7147817a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_gbench.cc @@ -0,0 +1,46 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "benchmark/benchmark.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { +namespace { + +// Encoder case, deinterleaves a buffer. +void BM_EncExternalImage_ConvertImageRGBA(benchmark::State& state) { + const size_t kNumIter = 5; + size_t xsize = state.range(); + size_t ysize = state.range(); + + ImageMetadata im; + im.SetAlphaBits(8); + ImageBundle ib(&im); + + std::vector interleaved(xsize * ysize * 4); + JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + for (auto _ : state) { + for (size_t i = 0; i < kNumIter; ++i) { + JXL_CHECK(ConvertFromExternal( + Span(interleaved.data(), interleaved.size()), xsize, + ysize, + /*c_current=*/ColorEncoding::SRGB(), + /*bits_per_sample=*/8, format, + /*pool=*/nullptr, &ib)); + } + } + + // Pixels per second. + state.SetItemsProcessed(kNumIter * state.iterations() * xsize * ysize); + state.SetBytesProcessed(kNumIter * state.iterations() * interleaved.size()); +} + +BENCHMARK(BM_EncExternalImage_ConvertImageRGBA) + ->RangeMultiplier(2) + ->Range(256, 2048); + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_external_image_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_test.cc new file mode 100644 index 0000000000..7be8d45f2d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_external_image_test.cc @@ -0,0 +1,79 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_external_image.h" + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +#if !defined(JXL_CRASH_ON_ERROR) +TEST(ExternalImageTest, InvalidSize) { + ImageMetadata im; + im.SetAlphaBits(8); + ImageBundle ib(&im); + + JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + const uint8_t buf[10 * 100 * 8] = {}; + EXPECT_FALSE(ConvertFromExternal( + Span(buf, 10), /*xsize=*/10, /*ysize=*/100, + /*c_current=*/ColorEncoding::SRGB(), + /*bits_per_sample=*/16, format, nullptr, &ib)); + EXPECT_FALSE(ConvertFromExternal( + Span(buf, sizeof(buf) - 1), /*xsize=*/10, /*ysize=*/100, + /*c_current=*/ColorEncoding::SRGB(), + /*bits_per_sample=*/16, format, nullptr, &ib)); + EXPECT_TRUE( + ConvertFromExternal(Span(buf, sizeof(buf)), /*xsize=*/10, + /*ysize=*/100, /*c_current=*/ColorEncoding::SRGB(), + /*bits_per_sample=*/16, format, nullptr, &ib)); +} +#endif + +TEST(ExternalImageTest, AlphaMissing) { + ImageMetadata im; + im.SetAlphaBits(0); // No alpha + ImageBundle ib(&im); + + const size_t xsize = 10; + const size_t ysize = 20; + const uint8_t buf[xsize * ysize * 4] = {}; + + JxlPixelFormat format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0}; + // has_alpha is true but the ImageBundle has no alpha. Alpha channel should + // be ignored. + EXPECT_TRUE(ConvertFromExternal(Span(buf, sizeof(buf)), xsize, + ysize, + /*c_current=*/ColorEncoding::SRGB(), + /*bits_per_sample=*/8, format, nullptr, &ib)); + EXPECT_FALSE(ib.HasAlpha()); +} + +TEST(ExternalImageTest, AlphaPremultiplied) { + ImageMetadata im; + im.SetAlphaBits(8, true); + + ImageBundle ib(&im); + const size_t xsize = 10; + const size_t ysize = 20; + const size_t size = xsize * ysize * 8; + const uint8_t buf[size] = {}; + + JxlPixelFormat format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + EXPECT_TRUE(BufferToImageBundle(format, xsize, ysize, buf, size, nullptr, + ColorEncoding::SRGB(), &ib)); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.cc b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.cc new file mode 100644 index 0000000000..62acfbcf65 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.cc @@ -0,0 +1,3870 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef FJXL_SELF_INCLUDE + +#include "lib/jxl/enc_fast_lossless.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// Enable NEON and AVX2/AVX512 if not asked to do otherwise and the compilers +// support it. +#if defined(__aarch64__) || defined(_M_ARM64) +#include + +#ifndef FJXL_ENABLE_NEON +#define FJXL_ENABLE_NEON 1 +#endif + +#elif (defined(__x86_64__) || defined(_M_X64)) && !defined(_MSC_VER) +#include + +// manually add _mm512_cvtsi512_si32 definition if missing +// (e.g. with Xcode on macOS Mojave) +// copied from gcc 11.1.0 include/avx512fintrin.h line 14367-14373 +#if defined(__clang__) && \ + ((!defined(__apple_build_version__) && __clang_major__ < 10) || \ + (defined(__apple_build_version__) && __apple_build_version__ < 12000032)) +inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_cvtsi512_si32(__m512i __A) { + __v16si __B = (__v16si)__A; + return __B[0]; +} +#endif + +// TODO(veluca): MSVC support for dynamic dispatch. +#if defined(__clang__) || defined(__GNUC__) + +#ifndef FJXL_ENABLE_AVX2 +#define FJXL_ENABLE_AVX2 1 +#endif + +#ifndef FJXL_ENABLE_AVX512 +// On clang-7 or earlier, and gcc-10 or earlier, AVX512 seems broken. +#if (defined(__clang__) && \ + (!defined(__apple_build_version__) && __clang_major__ > 7) || \ + (defined(__apple_build_version__) && \ + __apple_build_version__ > 10010046)) || \ + (defined(__GNUC__) && __GNUC__ > 10) +#define FJXL_ENABLE_AVX512 1 +#endif +#endif + +#endif + +#endif + +#ifndef FJXL_ENABLE_NEON +#define FJXL_ENABLE_NEON 0 +#endif + +#ifndef FJXL_ENABLE_AVX2 +#define FJXL_ENABLE_AVX2 0 +#endif + +#ifndef FJXL_ENABLE_AVX512 +#define FJXL_ENABLE_AVX512 0 +#endif + +namespace { +#if defined(_MSC_VER) && !defined(__clang__) +#define FJXL_INLINE __forceinline +FJXL_INLINE uint32_t FloorLog2(uint32_t v) { + unsigned long index; + _BitScanReverse(&index, v); + return index; +} +FJXL_INLINE uint32_t CtzNonZero(uint64_t v) { + unsigned long index; + _BitScanForward(&index, v); + return index; +} +#else +#define FJXL_INLINE inline __attribute__((always_inline)) +FJXL_INLINE uint32_t FloorLog2(uint32_t v) { + return v ? 31 - __builtin_clz(v) : 0; +} +FJXL_INLINE uint32_t CtzNonZero(uint64_t v) { return __builtin_ctzll(v); } +#endif + +// Compiles to a memcpy on little-endian systems. +FJXL_INLINE void StoreLE64(uint8_t* tgt, uint64_t data) { +#if (!defined(__BYTE_ORDER__) || (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__)) + for (int i = 0; i < 8; i++) { + tgt[i] = (data >> (i * 8)) & 0xFF; + } +#else + memcpy(tgt, &data, 8); +#endif +} + +FJXL_INLINE size_t AddBits(uint32_t count, uint64_t bits, uint8_t* data_buf, + size_t& bits_in_buffer, uint64_t& bit_buffer) { + bit_buffer |= bits << bits_in_buffer; + bits_in_buffer += count; + StoreLE64(data_buf, bit_buffer); + size_t bytes_in_buffer = bits_in_buffer / 8; + bits_in_buffer -= bytes_in_buffer * 8; + bit_buffer >>= bytes_in_buffer * 8; + return bytes_in_buffer; +} + +struct BitWriter { + void Allocate(size_t maximum_bit_size) { + assert(data == nullptr); + // Leave some padding. + data.reset(static_cast(malloc(maximum_bit_size / 8 + 64))); + } + + void Write(uint32_t count, uint64_t bits) { + bytes_written += AddBits(count, bits, data.get() + bytes_written, + bits_in_buffer, buffer); + } + + void ZeroPadToByte() { + if (bits_in_buffer != 0) { + Write(8 - bits_in_buffer, 0); + } + } + + FJXL_INLINE void WriteMultiple(const uint64_t* nbits, const uint64_t* bits, + size_t n) { + // Necessary because Write() is only guaranteed to work with <=56 bits. + // Trying to SIMD-fy this code results in lower speed (and definitely less + // clarity). + { + for (size_t i = 0; i < n; i++) { + this->buffer |= bits[i] << this->bits_in_buffer; + memcpy(this->data.get() + this->bytes_written, &this->buffer, 8); + uint64_t shift = 64 - this->bits_in_buffer; + this->bits_in_buffer += nbits[i]; + // This `if` seems to be faster than using ternaries. + if (this->bits_in_buffer >= 64) { + uint64_t next_buffer = bits[i] >> shift; + this->buffer = next_buffer; + this->bits_in_buffer -= 64; + this->bytes_written += 8; + } + } + memcpy(this->data.get() + this->bytes_written, &this->buffer, 8); + size_t bytes_in_buffer = this->bits_in_buffer / 8; + this->bits_in_buffer -= bytes_in_buffer * 8; + this->buffer >>= bytes_in_buffer * 8; + this->bytes_written += bytes_in_buffer; + } + } + + std::unique_ptr data = {nullptr, free}; + size_t bytes_written = 0; + size_t bits_in_buffer = 0; + uint64_t buffer = 0; +}; + +} // namespace + +extern "C" { + +struct JxlFastLosslessFrameState { + size_t width; + size_t height; + size_t nb_chans; + size_t bitdepth; + BitWriter header; + std::vector> group_data; + size_t current_bit_writer = 0; + size_t bit_writer_byte_pos = 0; + size_t bits_in_buffer = 0; + uint64_t bit_buffer = 0; +}; + +size_t JxlFastLosslessOutputSize(const JxlFastLosslessFrameState* frame) { + size_t total_size_groups = 0; + for (size_t i = 0; i < frame->group_data.size(); i++) { + size_t sz = 0; + for (size_t j = 0; j < frame->nb_chans; j++) { + const auto& writer = frame->group_data[i][j]; + sz += writer.bytes_written * 8 + writer.bits_in_buffer; + } + sz = (sz + 7) / 8; + total_size_groups += sz; + } + return frame->header.bytes_written + total_size_groups; +} + +size_t JxlFastLosslessMaxRequiredOutput( + const JxlFastLosslessFrameState* frame) { + return JxlFastLosslessOutputSize(frame) + 32; +} + +void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame, + int add_image_header, int is_last) { + BitWriter* output = &frame->header; + output->Allocate(1000 + frame->group_data.size() * 32); + + std::vector group_sizes(frame->group_data.size()); + for (size_t i = 0; i < frame->group_data.size(); i++) { + size_t sz = 0; + for (size_t j = 0; j < frame->nb_chans; j++) { + const auto& writer = frame->group_data[i][j]; + sz += writer.bytes_written * 8 + writer.bits_in_buffer; + } + sz = (sz + 7) / 8; + group_sizes[i] = sz; + } + + bool have_alpha = (frame->nb_chans == 2 || frame->nb_chans == 4); + +#if FJXL_STANDALONE + if (add_image_header) { + // Signature + output->Write(16, 0x0AFF); + + // Size header, hand-crafted. + // Not small + output->Write(1, 0); + + auto wsz = [output](size_t size) { + if (size - 1 < (1 << 9)) { + output->Write(2, 0b00); + output->Write(9, size - 1); + } else if (size - 1 < (1 << 13)) { + output->Write(2, 0b01); + output->Write(13, size - 1); + } else if (size - 1 < (1 << 18)) { + output->Write(2, 0b10); + output->Write(18, size - 1); + } else { + output->Write(2, 0b11); + output->Write(30, size - 1); + } + }; + + wsz(frame->height); + + // No special ratio. + output->Write(3, 0); + + wsz(frame->width); + + // Hand-crafted ImageMetadata. + output->Write(1, 0); // all_default + output->Write(1, 0); // extra_fields + output->Write(1, 0); // bit_depth.floating_point_sample + if (frame->bitdepth == 8) { + output->Write(2, 0b00); // bit_depth.bits_per_sample = 8 + } else if (frame->bitdepth == 10) { + output->Write(2, 0b01); // bit_depth.bits_per_sample = 10 + } else if (frame->bitdepth == 12) { + output->Write(2, 0b10); // bit_depth.bits_per_sample = 12 + } else { + output->Write(2, 0b11); // 1 + u(6) + output->Write(6, frame->bitdepth - 1); + } + if (frame->bitdepth <= 14) { + output->Write(1, 1); // 16-bit-buffer sufficient + } else { + output->Write(1, 0); // 16-bit-buffer NOT sufficient + } + if (have_alpha) { + output->Write(2, 0b01); // One extra channel + output->Write(1, 1); // ... all_default (ie. 8-bit alpha) + } else { + output->Write(2, 0b00); // No extra channel + } + output->Write(1, 0); // Not XYB + if (frame->nb_chans > 2) { + output->Write(1, 1); // color_encoding.all_default (sRGB) + } else { + output->Write(1, 0); // color_encoding.all_default false + output->Write(1, 0); // color_encoding.want_icc false + output->Write(2, 1); // grayscale + output->Write(2, 1); // D65 + output->Write(1, 0); // no gamma transfer function + output->Write(2, 0b10); // tf: 2 + u(4) + output->Write(4, 11); // tf of sRGB + output->Write(2, 1); // relative rendering intent + } + output->Write(2, 0b00); // No extensions. + + output->Write(1, 1); // all_default transform data + + // No ICC, no preview. Frame should start at byte boundery. + output->ZeroPadToByte(); + } +#else + assert(!add_image_header); +#endif + + // Handcrafted frame header. + output->Write(1, 0); // all_default + output->Write(2, 0b00); // regular frame + output->Write(1, 1); // modular + output->Write(2, 0b00); // default flags + output->Write(1, 0); // not YCbCr + output->Write(2, 0b00); // no upsampling + if (have_alpha) { + output->Write(2, 0b00); // no alpha upsampling + } + output->Write(2, 0b01); // default group size + output->Write(2, 0b00); // exactly one pass + output->Write(1, 0); // no custom size or origin + output->Write(2, 0b00); // kReplace blending mode + if (have_alpha) { + output->Write(2, 0b00); // kReplace blending mode for alpha channel + } + output->Write(1, is_last); // is_last + output->Write(2, 0b00); // a frame has no name + output->Write(1, 0); // loop filter is not all_default + output->Write(1, 0); // no gaborish + output->Write(2, 0); // 0 EPF iters + output->Write(2, 0b00); // No LF extensions + output->Write(2, 0b00); // No FH extensions + + output->Write(1, 0); // No TOC permutation + output->ZeroPadToByte(); // TOC is byte-aligned. + for (size_t i = 0; i < frame->group_data.size(); i++) { + size_t sz = group_sizes[i]; + if (sz < (1 << 10)) { + output->Write(2, 0b00); + output->Write(10, sz); + } else if (sz - 1024 < (1 << 14)) { + output->Write(2, 0b01); + output->Write(14, sz - 1024); + } else if (sz - 17408 < (1 << 22)) { + output->Write(2, 0b10); + output->Write(22, sz - 17408); + } else { + output->Write(2, 0b11); + output->Write(30, sz - 4211712); + } + } + output->ZeroPadToByte(); // Groups are byte-aligned. +} + +#if FJXL_ENABLE_AVX512 +__attribute__((target("avx512vbmi2"))) static size_t AppendBytesWithBitOffset( + const uint8_t* data, size_t n, size_t bit_buffer_nbits, + unsigned char* output, uint64_t& bit_buffer) { + if (n < 128) { + return 0; + } + + size_t i = 0; + __m512i shift = _mm512_set1_epi64(64 - bit_buffer_nbits); + __m512i carry = _mm512_set1_epi64(bit_buffer << (64 - bit_buffer_nbits)); + + for (; i + 64 <= n; i += 64) { + __m512i current = _mm512_loadu_si512(data + i); + __m512i previous_u64 = _mm512_alignr_epi64(current, carry, 7); + carry = current; + __m512i out = _mm512_shrdv_epi64(previous_u64, current, shift); + _mm512_storeu_si512(output + i, out); + } + + bit_buffer = data[i - 1] >> (8 - bit_buffer_nbits); + + return i; +} +#endif + +size_t JxlFastLosslessWriteOutput(JxlFastLosslessFrameState* frame, + unsigned char* output, size_t output_size) { + assert(output_size >= 32); + unsigned char* initial_output = output; + size_t (*append_bytes_with_bit_offset)(const uint8_t*, size_t, size_t, + unsigned char*, uint64_t&) = nullptr; + +#if FJXL_ENABLE_AVX512 + if (__builtin_cpu_supports("avx512vbmi2")) { + append_bytes_with_bit_offset = AppendBytesWithBitOffset; + } +#endif + + while (true) { + size_t& cur = frame->current_bit_writer; + size_t& bw_pos = frame->bit_writer_byte_pos; + if (cur >= 1 + frame->group_data.size() * frame->nb_chans) { + return output - initial_output; + } + if (output_size <= 8) { + return output - initial_output; + } + size_t nbc = frame->nb_chans; + const BitWriter& writer = + cur == 0 ? frame->header + : frame->group_data[(cur - 1) / nbc][(cur - 1) % nbc]; + size_t full_byte_count = + std::min(output_size - 8, writer.bytes_written - bw_pos); + if (frame->bits_in_buffer == 0) { + memcpy(output, writer.data.get() + bw_pos, full_byte_count); + } else { + size_t i = 0; + if (append_bytes_with_bit_offset) { + i += append_bytes_with_bit_offset( + writer.data.get() + bw_pos, full_byte_count, frame->bits_in_buffer, + output, frame->bit_buffer); + } +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + // Copy 8 bytes at a time until we reach the border. + for (; i + 8 < full_byte_count; i += 8) { + uint64_t chunk; + memcpy(&chunk, writer.data.get() + bw_pos + i, 8); + uint64_t out = frame->bit_buffer | (chunk << frame->bits_in_buffer); + memcpy(output + i, &out, 8); + frame->bit_buffer = chunk >> (64 - frame->bits_in_buffer); + } +#endif + for (; i < full_byte_count; i++) { + AddBits(8, writer.data.get()[bw_pos + i], output + i, + frame->bits_in_buffer, frame->bit_buffer); + } + } + output += full_byte_count; + output_size -= full_byte_count; + bw_pos += full_byte_count; + if (bw_pos == writer.bytes_written) { + auto write = [&](size_t num, uint64_t bits) { + size_t n = AddBits(num, bits, output, frame->bits_in_buffer, + frame->bit_buffer); + output += n; + output_size -= n; + }; + if (writer.bits_in_buffer) { + write(writer.bits_in_buffer, writer.buffer); + } + bw_pos = 0; + cur++; + if ((cur - 1) % nbc == 0 && frame->bits_in_buffer != 0) { + write(8 - frame->bits_in_buffer, 0); + } + } + } +} + +void JxlFastLosslessFreeFrameState(JxlFastLosslessFrameState* frame) { + delete frame; +} + +} // extern "C" + +#endif + +#ifdef FJXL_SELF_INCLUDE + +namespace { + +constexpr size_t kNumRawSymbols = 19; +constexpr size_t kNumLZ77 = 33; +constexpr size_t kLZ77CacheSize = 32; + +constexpr size_t kLZ77Offset = 224; +constexpr size_t kLZ77MinLength = 7; + +void EncodeHybridUintLZ77(uint32_t value, uint32_t* token, uint32_t* nbits, + uint32_t* bits) { + // 400 config + uint32_t n = FloorLog2(value); + *token = value < 16 ? value : 16 + n - 4; + *nbits = value < 16 ? 0 : n; + *bits = value < 16 ? 0 : value - (1 << *nbits); +} + +struct PrefixCode { + uint8_t raw_nbits[kNumRawSymbols] = {}; + uint8_t raw_bits[kNumRawSymbols] = {}; + + alignas(64) uint8_t raw_nbits_simd[16] = {}; + alignas(64) uint8_t raw_bits_simd[16] = {}; + + uint8_t lz77_nbits[kNumLZ77] = {}; + uint16_t lz77_bits[kNumLZ77] = {}; + + uint64_t lz77_cache_bits[kLZ77CacheSize] = {}; + uint8_t lz77_cache_nbits[kLZ77CacheSize] = {}; + + static uint16_t BitReverse(size_t nbits, uint16_t bits) { + constexpr uint16_t kNibbleLookup[16] = { + 0b0000, 0b1000, 0b0100, 0b1100, 0b0010, 0b1010, 0b0110, 0b1110, + 0b0001, 0b1001, 0b0101, 0b1101, 0b0011, 0b1011, 0b0111, 0b1111, + }; + uint16_t rev16 = (kNibbleLookup[bits & 0xF] << 12) | + (kNibbleLookup[(bits >> 4) & 0xF] << 8) | + (kNibbleLookup[(bits >> 8) & 0xF] << 4) | + (kNibbleLookup[bits >> 12]); + return rev16 >> (16 - nbits); + } + + // Create the prefix codes given the code lengths. + // Supports the code lengths being split into two halves. + static void ComputeCanonicalCode(const uint8_t* first_chunk_nbits, + uint8_t* first_chunk_bits, + size_t first_chunk_size, + const uint8_t* second_chunk_nbits, + uint16_t* second_chunk_bits, + size_t second_chunk_size) { + constexpr size_t kMaxCodeLength = 15; + uint8_t code_length_counts[kMaxCodeLength + 1] = {}; + for (size_t i = 0; i < first_chunk_size; i++) { + code_length_counts[first_chunk_nbits[i]]++; + assert(first_chunk_nbits[i] <= kMaxCodeLength); + assert(first_chunk_nbits[i] <= 8); + assert(first_chunk_nbits[i] > 0); + } + for (size_t i = 0; i < second_chunk_size; i++) { + code_length_counts[second_chunk_nbits[i]]++; + assert(second_chunk_nbits[i] <= kMaxCodeLength); + } + + uint16_t next_code[kMaxCodeLength + 1] = {}; + + uint16_t code = 0; + for (size_t i = 1; i < kMaxCodeLength + 1; i++) { + code = (code + code_length_counts[i - 1]) << 1; + next_code[i] = code; + } + + for (size_t i = 0; i < first_chunk_size; i++) { + first_chunk_bits[i] = + BitReverse(first_chunk_nbits[i], next_code[first_chunk_nbits[i]]++); + } + for (size_t i = 0; i < second_chunk_size; i++) { + second_chunk_bits[i] = + BitReverse(second_chunk_nbits[i], next_code[second_chunk_nbits[i]]++); + } + } + + template + static void ComputeCodeLengthsNonZeroImpl(const uint64_t* freqs, size_t n, + size_t precision, T infty, + uint8_t* min_limit, + uint8_t* max_limit, + uint8_t* nbits) { + std::vector dynp(((1U << precision) + 1) * (n + 1), infty); + auto d = [&](size_t sym, size_t off) -> T& { + return dynp[sym * ((1 << precision) + 1) + off]; + }; + d(0, 0) = 0; + for (size_t sym = 0; sym < n; sym++) { + for (T bits = min_limit[sym]; bits <= max_limit[sym]; bits++) { + size_t off_delta = 1U << (precision - bits); + for (size_t off = 0; off + off_delta <= (1U << precision); off++) { + d(sym + 1, off + off_delta) = + std::min(d(sym, off) + static_cast(freqs[sym]) * bits, + d(sym + 1, off + off_delta)); + } + } + } + + size_t sym = n; + size_t off = 1U << precision; + + assert(d(sym, off) != infty); + + while (sym-- > 0) { + assert(off > 0); + for (size_t bits = min_limit[sym]; bits <= max_limit[sym]; bits++) { + size_t off_delta = 1U << (precision - bits); + if (off_delta <= off && + d(sym + 1, off) == d(sym, off - off_delta) + freqs[sym] * bits) { + off -= off_delta; + nbits[sym] = bits; + break; + } + } + } + } + + // Computes nbits[i] for i <= n, subject to min_limit[i] <= nbits[i] <= + // max_limit[i] and sum 2**-nbits[i] == 1, so to minimize sum(nbits[i] * + // freqs[i]). + static void ComputeCodeLengthsNonZero(const uint64_t* freqs, size_t n, + uint8_t* min_limit, uint8_t* max_limit, + uint8_t* nbits) { + size_t precision = 0; + size_t shortest_length = 255; + uint64_t freqsum = 0; + for (size_t i = 0; i < n; i++) { + assert(freqs[i] != 0); + freqsum += freqs[i]; + if (min_limit[i] < 1) min_limit[i] = 1; + assert(min_limit[i] <= max_limit[i]); + precision = std::max(max_limit[i], precision); + shortest_length = std::min(min_limit[i], shortest_length); + } + // If all the minimum limits are greater than 1, shift precision so that we + // behave as if the shortest was 1. + precision -= shortest_length - 1; + uint64_t infty = freqsum * precision; + if (infty < std::numeric_limits::max() / 2) { + ComputeCodeLengthsNonZeroImpl(freqs, n, precision, + static_cast(infty), min_limit, + max_limit, nbits); + } else { + ComputeCodeLengthsNonZeroImpl(freqs, n, precision, infty, min_limit, + max_limit, nbits); + } + } + + static constexpr size_t kMaxNumSymbols = + kNumRawSymbols + 1 < kNumLZ77 ? kNumLZ77 : kNumRawSymbols + 1; + static void ComputeCodeLengths(const uint64_t* freqs, size_t n, + const uint8_t* min_limit_in, + const uint8_t* max_limit_in, uint8_t* nbits) { + assert(n <= kMaxNumSymbols); + uint64_t compact_freqs[kMaxNumSymbols]; + uint8_t min_limit[kMaxNumSymbols]; + uint8_t max_limit[kMaxNumSymbols]; + size_t ni = 0; + for (size_t i = 0; i < n; i++) { + if (freqs[i]) { + compact_freqs[ni] = freqs[i]; + min_limit[ni] = min_limit_in[i]; + max_limit[ni] = max_limit_in[i]; + ni++; + } + } + uint8_t num_bits[kMaxNumSymbols] = {}; + ComputeCodeLengthsNonZero(compact_freqs, ni, min_limit, max_limit, + num_bits); + ni = 0; + for (size_t i = 0; i < n; i++) { + nbits[i] = 0; + if (freqs[i]) { + nbits[i] = num_bits[ni++]; + } + } + } + + // Invalid code, used to construct arrays. + PrefixCode() {} + + template + PrefixCode(BitDepth, uint64_t* raw_counts, uint64_t* lz77_counts) { + // "merge" together all the lz77 counts in a single symbol for the level 1 + // table (containing just the raw symbols, up to length 7). + uint64_t level1_counts[kNumRawSymbols + 1]; + memcpy(level1_counts, raw_counts, kNumRawSymbols * sizeof(uint64_t)); + size_t numraw = kNumRawSymbols; + while (numraw > 0 && level1_counts[numraw - 1] == 0) numraw--; + + level1_counts[numraw] = 0; + for (size_t i = 0; i < kNumLZ77; i++) { + level1_counts[numraw] += lz77_counts[i]; + } + uint8_t level1_nbits[kNumRawSymbols + 1] = {}; + ComputeCodeLengths(level1_counts, numraw + 1, BitDepth::kMinRawLength, + BitDepth::kMaxRawLength, level1_nbits); + + uint8_t level2_nbits[kNumLZ77] = {}; + uint8_t min_lengths[kNumLZ77] = {}; + uint8_t l = 15 - level1_nbits[numraw]; + uint8_t max_lengths[kNumLZ77]; + for (size_t i = 0; i < kNumLZ77; i++) { + max_lengths[i] = l; + } + size_t num_lz77 = kNumLZ77; + while (num_lz77 > 0 && lz77_counts[num_lz77 - 1] == 0) num_lz77--; + ComputeCodeLengths(lz77_counts, num_lz77, min_lengths, max_lengths, + level2_nbits); + for (size_t i = 0; i < numraw; i++) { + raw_nbits[i] = level1_nbits[i]; + } + for (size_t i = 0; i < num_lz77; i++) { + lz77_nbits[i] = + level2_nbits[i] ? level1_nbits[numraw] + level2_nbits[i] : 0; + } + + ComputeCanonicalCode(raw_nbits, raw_bits, numraw, lz77_nbits, lz77_bits, + kNumLZ77); + BitDepth::PrepareForSimd(raw_nbits, raw_bits, numraw, raw_nbits_simd, + raw_bits_simd); + + // Prepare lz77 cache + for (size_t count = 0; count < kLZ77CacheSize; count++) { + unsigned token, nbits, bits; + EncodeHybridUintLZ77(count, &token, &nbits, &bits); + lz77_cache_nbits[count] = lz77_nbits[token] + nbits + raw_nbits[0]; + lz77_cache_bits[count] = + (((bits << lz77_nbits[token]) | lz77_bits[token]) << raw_nbits[0]) | + raw_bits[0]; + } + } + + void WriteTo(BitWriter* writer) const { + uint64_t code_length_counts[18] = {}; + code_length_counts[17] = 3 + 2 * (kNumLZ77 - 1); + for (size_t i = 0; i < kNumRawSymbols; i++) { + code_length_counts[raw_nbits[i]]++; + } + for (size_t i = 0; i < kNumLZ77; i++) { + code_length_counts[lz77_nbits[i]]++; + } + uint8_t code_length_nbits[18] = {}; + uint8_t code_length_nbits_min[18] = {}; + uint8_t code_length_nbits_max[18] = { + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + }; + ComputeCodeLengths(code_length_counts, 18, code_length_nbits_min, + code_length_nbits_max, code_length_nbits); + writer->Write(2, 0b00); // HSKIP = 0, i.e. don't skip code lengths. + + // As per Brotli RFC. + uint8_t code_length_order[18] = {1, 2, 3, 4, 0, 5, 17, 6, 16, + 7, 8, 9, 10, 11, 12, 13, 14, 15}; + uint8_t code_length_length_nbits[] = {2, 4, 3, 2, 2, 4}; + uint8_t code_length_length_bits[] = {0, 7, 3, 2, 1, 15}; + + // Encode lengths of code lengths. + size_t num_code_lengths = 18; + while (code_length_nbits[code_length_order[num_code_lengths - 1]] == 0) { + num_code_lengths--; + } + for (size_t i = 0; i < num_code_lengths; i++) { + int symbol = code_length_nbits[code_length_order[i]]; + writer->Write(code_length_length_nbits[symbol], + code_length_length_bits[symbol]); + } + + // Compute the canonical codes for the codes that represent the lengths of + // the actual codes for data. + uint16_t code_length_bits[18] = {}; + ComputeCanonicalCode(nullptr, nullptr, 0, code_length_nbits, + code_length_bits, 18); + // Encode raw bit code lengths. + for (size_t i = 0; i < kNumRawSymbols; i++) { + writer->Write(code_length_nbits[raw_nbits[i]], + code_length_bits[raw_nbits[i]]); + } + size_t num_lz77 = kNumLZ77; + while (lz77_nbits[num_lz77 - 1] == 0) { + num_lz77--; + } + // Encode 0s until 224 (start of LZ77 symbols). This is in total 224-19 = + // 205. + static_assert(kLZ77Offset == 224, ""); + static_assert(kNumRawSymbols == 19, ""); + writer->Write(code_length_nbits[17], code_length_bits[17]); + writer->Write(3, 0b010); // 5 + writer->Write(code_length_nbits[17], code_length_bits[17]); + writer->Write(3, 0b000); // (5-2)*8 + 3 = 27 + writer->Write(code_length_nbits[17], code_length_bits[17]); + writer->Write(3, 0b010); // (27-2)*8 + 5 = 205 + // Encode LZ77 symbols, with values 224+i. + for (size_t i = 0; i < num_lz77; i++) { + writer->Write(code_length_nbits[lz77_nbits[i]], + code_length_bits[lz77_nbits[i]]); + } + } +}; + +template +struct VecPair { + T low; + T hi; +}; + +#ifdef FJXL_GENERIC_SIMD +#undef FJXL_GENERIC_SIMD +#endif + +#ifdef FJXL_AVX512 +#define FJXL_GENERIC_SIMD +struct SIMDVec32; +struct Mask32 { + __mmask16 mask; + SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false); + size_t CountPrefix() const { + return CtzNonZero(~uint64_t{_cvtmask16_u32(mask)}); + } +}; + +struct SIMDVec32 { + __m512i vec; + + static constexpr size_t kLanes = 16; + + FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) { + return SIMDVec32{_mm512_loadu_si512((__m512i*)data)}; + } + FJXL_INLINE void Store(uint32_t* data) { + _mm512_storeu_si512((__m512i*)data, vec); + } + FJXL_INLINE static SIMDVec32 Val(uint32_t v) { + return SIMDVec32{_mm512_set1_epi32(v)}; + } + FJXL_INLINE SIMDVec32 ValToToken() const { + return SIMDVec32{ + _mm512_sub_epi32(_mm512_set1_epi32(32), _mm512_lzcnt_epi32(vec))}; + } + FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const { + return SIMDVec32{_mm512_sub_epi32(_mm512_max_epu32(vec, to_subtract.vec), + to_subtract.vec)}; + } + FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const { + return SIMDVec32{_mm512_sub_epi32(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const { + return SIMDVec32{_mm512_add_epi32(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const { + return SIMDVec32{_mm512_xor_epi32(vec, oth.vec)}; + } + FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const { + return Mask32{_mm512_cmpeq_epi32_mask(vec, oth.vec)}; + } + FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const { + return Mask32{_mm512_cmpgt_epi32_mask(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec32 Pow2() const { + return SIMDVec32{_mm512_sllv_epi32(_mm512_set1_epi32(1), vec)}; + } + template + FJXL_INLINE SIMDVec32 SignedShiftRight() const { + return SIMDVec32{_mm512_srai_epi32(vec, i)}; + } +}; + +struct SIMDVec16; + +struct Mask16 { + __mmask32 mask; + SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false); + Mask16 And(const Mask16& oth) const { + return Mask16{_kand_mask32(mask, oth.mask)}; + } + size_t CountPrefix() const { + return CtzNonZero(~uint64_t{_cvtmask32_u32(mask)}); + } +}; + +struct SIMDVec16 { + __m512i vec; + + static constexpr size_t kLanes = 32; + + FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) { + return SIMDVec16{_mm512_loadu_si512((__m512i*)data)}; + } + FJXL_INLINE void Store(uint16_t* data) { + _mm512_storeu_si512((__m512i*)data, vec); + } + FJXL_INLINE static SIMDVec16 Val(uint16_t v) { + return SIMDVec16{_mm512_set1_epi16(v)}; + } + FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo, + const SIMDVec32& hi) { + auto tmp = _mm512_packus_epi32(lo.vec, hi.vec); + alignas(64) uint64_t perm[8] = {0, 2, 4, 6, 1, 3, 5, 7}; + return SIMDVec16{ + _mm512_permutex2var_epi64(tmp, _mm512_load_si512((__m512i*)perm), tmp)}; + } + + FJXL_INLINE SIMDVec16 ValToToken() const { + auto c16 = _mm512_set1_epi32(16); + auto c32 = _mm512_set1_epi32(32); + auto low16bit = _mm512_set1_epi32(0x0000FFFF); + auto lzhi = + _mm512_sub_epi32(c16, _mm512_min_epu32(c16, _mm512_lzcnt_epi32(vec))); + auto lzlo = _mm512_sub_epi32( + c32, _mm512_lzcnt_epi32(_mm512_and_si512(low16bit, vec))); + return SIMDVec16{_mm512_or_si512(lzlo, _mm512_slli_epi32(lzhi, 16))}; + } + + FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const { + return SIMDVec16{_mm512_subs_epu16(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const { + return SIMDVec16{_mm512_sub_epi16(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const { + return SIMDVec16{_mm512_add_epi16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const { + return SIMDVec16{_mm512_min_epu16(vec, oth.vec)}; + } + FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const { + return Mask16{_mm512_cmpeq_epi16_mask(vec, oth.vec)}; + } + FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const { + return Mask16{_mm512_cmpgt_epi16_mask(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Pow2() const { + return SIMDVec16{_mm512_sllv_epi16(_mm512_set1_epi16(1), vec)}; + } + FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const { + return SIMDVec16{_mm512_or_si512(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const { + return SIMDVec16{_mm512_xor_si512(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const { + return SIMDVec16{_mm512_and_si512(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const { + return SIMDVec16{_mm512_srai_epi16(_mm512_add_epi16(vec, oth.vec), 1)}; + } + FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const { + return SIMDVec16{_mm512_or_si512(vec, _mm512_set1_epi16(0xFF00))}; + } + FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const { + return SIMDVec16{_mm512_shuffle_epi8( + _mm512_broadcast_i32x4(_mm_loadu_si128((__m128i*)table)), vec)}; + } + FJXL_INLINE VecPair Interleave(const SIMDVec16& low) const { + auto lo = _mm512_unpacklo_epi16(low.vec, vec); + auto hi = _mm512_unpackhi_epi16(low.vec, vec); + alignas(64) uint64_t perm1[8] = {0, 1, 8, 9, 2, 3, 10, 11}; + alignas(64) uint64_t perm2[8] = {4, 5, 12, 13, 6, 7, 14, 15}; + return {SIMDVec16{_mm512_permutex2var_epi64( + lo, _mm512_load_si512((__m512i*)perm1), hi)}, + SIMDVec16{_mm512_permutex2var_epi64( + lo, _mm512_load_si512((__m512i*)perm2), hi)}}; + } + FJXL_INLINE VecPair Upcast() const { + auto lo = _mm512_unpacklo_epi16(vec, _mm512_setzero_si512()); + auto hi = _mm512_unpackhi_epi16(vec, _mm512_setzero_si512()); + alignas(64) uint64_t perm1[8] = {0, 1, 8, 9, 2, 3, 10, 11}; + alignas(64) uint64_t perm2[8] = {4, 5, 12, 13, 6, 7, 14, 15}; + return {SIMDVec32{_mm512_permutex2var_epi64( + lo, _mm512_load_si512((__m512i*)perm1), hi)}, + SIMDVec32{_mm512_permutex2var_epi64( + lo, _mm512_load_si512((__m512i*)perm2), hi)}}; + } + template + FJXL_INLINE SIMDVec16 SignedShiftRight() const { + return SIMDVec16{_mm512_srai_epi16(vec, i)}; + } + + static std::array LoadG8(const unsigned char* data) { + __m256i bytes = _mm256_loadu_si256((__m256i*)data); + return {SIMDVec16{_mm512_cvtepu8_epi16(bytes)}}; + } + static std::array LoadG16(const unsigned char* data) { + return {Load((const uint16_t*)data)}; + } + + static std::array LoadGA8(const unsigned char* data) { + __m512i bytes = _mm512_loadu_si512((__m512i*)data); + __m512i gray = _mm512_and_si512(bytes, _mm512_set1_epi16(0xFF)); + __m512i alpha = _mm512_srli_epi16(bytes, 8); + return {SIMDVec16{gray}, SIMDVec16{alpha}}; + } + static std::array LoadGA16(const unsigned char* data) { + __m512i bytes1 = _mm512_loadu_si512((__m512i*)data); + __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 64)); + __m512i g_mask = _mm512_set1_epi32(0xFFFF); + __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0); + __m512i g = _mm512_permutexvar_epi64( + permuteidx, _mm512_packus_epi32(_mm512_and_si512(bytes1, g_mask), + _mm512_and_si512(bytes2, g_mask))); + __m512i a = _mm512_permutexvar_epi64( + permuteidx, _mm512_packus_epi32(_mm512_srli_epi32(bytes1, 16), + _mm512_srli_epi32(bytes2, 16))); + return {SIMDVec16{g}, SIMDVec16{a}}; + } + + static std::array LoadRGB8(const unsigned char* data) { + __m512i bytes0 = _mm512_loadu_si512((__m512i*)data); + __m512i bytes1 = + _mm512_zextsi256_si512(_mm256_loadu_si256((__m256i*)(data + 64))); + + // 0x7A = element of upper half of second vector = 0 after lookup; still in + // the upper half once we add 1 or 2. + uint8_t z = 0x7A; + __m512i ridx = + _mm512_set_epi8(z, 93, z, 90, z, 87, z, 84, z, 81, z, 78, z, 75, z, 72, + z, 69, z, 66, z, 63, z, 60, z, 57, z, 54, z, 51, z, 48, + z, 45, z, 42, z, 39, z, 36, z, 33, z, 30, z, 27, z, 24, + z, 21, z, 18, z, 15, z, 12, z, 9, z, 6, z, 3, z, 0); + __m512i gidx = _mm512_add_epi8(ridx, _mm512_set1_epi8(1)); + __m512i bidx = _mm512_add_epi8(gidx, _mm512_set1_epi8(1)); + __m512i r = _mm512_permutex2var_epi8(bytes0, ridx, bytes1); + __m512i g = _mm512_permutex2var_epi8(bytes0, gidx, bytes1); + __m512i b = _mm512_permutex2var_epi8(bytes0, bidx, bytes1); + return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}}; + } + static std::array LoadRGB16(const unsigned char* data) { + __m512i bytes0 = _mm512_loadu_si512((__m512i*)data); + __m512i bytes1 = _mm512_loadu_si512((__m512i*)(data + 64)); + __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 128)); + + __m512i ridx_lo = _mm512_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63, 60, 57, + 54, 51, 48, 45, 42, 39, 36, 33, 30, 27, + 24, 21, 18, 15, 12, 9, 6, 3, 0); + // -1 is such that when adding 1 or 2, we get the correct index for + // green/blue. + __m512i ridx_hi = + _mm512_set_epi16(29, 26, 23, 20, 17, 14, 11, 8, 5, 2, -1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + __m512i gidx_lo = _mm512_add_epi16(ridx_lo, _mm512_set1_epi16(1)); + __m512i gidx_hi = _mm512_add_epi16(ridx_hi, _mm512_set1_epi16(1)); + __m512i bidx_lo = _mm512_add_epi16(gidx_lo, _mm512_set1_epi16(1)); + __m512i bidx_hi = _mm512_add_epi16(gidx_hi, _mm512_set1_epi16(1)); + + __mmask32 rmask = _cvtu32_mask32(0b11111111110000000000000000000000); + __mmask32 gbmask = _cvtu32_mask32(0b11111111111000000000000000000000); + + __m512i rlo = _mm512_permutex2var_epi16(bytes0, ridx_lo, bytes1); + __m512i glo = _mm512_permutex2var_epi16(bytes0, gidx_lo, bytes1); + __m512i blo = _mm512_permutex2var_epi16(bytes0, bidx_lo, bytes1); + __m512i r = _mm512_mask_permutexvar_epi16(rlo, rmask, ridx_hi, bytes2); + __m512i g = _mm512_mask_permutexvar_epi16(glo, gbmask, gidx_hi, bytes2); + __m512i b = _mm512_mask_permutexvar_epi16(blo, gbmask, bidx_hi, bytes2); + return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}}; + } + + static std::array LoadRGBA8(const unsigned char* data) { + __m512i bytes1 = _mm512_loadu_si512((__m512i*)data); + __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 64)); + __m512i rg_mask = _mm512_set1_epi32(0xFFFF); + __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0); + __m512i rg = _mm512_permutexvar_epi64( + permuteidx, _mm512_packus_epi32(_mm512_and_si512(bytes1, rg_mask), + _mm512_and_si512(bytes2, rg_mask))); + __m512i ba = _mm512_permutexvar_epi64( + permuteidx, _mm512_packus_epi32(_mm512_srli_epi32(bytes1, 16), + _mm512_srli_epi32(bytes2, 16))); + __m512i r = _mm512_and_si512(rg, _mm512_set1_epi16(0xFF)); + __m512i g = _mm512_srli_epi16(rg, 8); + __m512i b = _mm512_and_si512(ba, _mm512_set1_epi16(0xFF)); + __m512i a = _mm512_srli_epi16(ba, 8); + return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}}; + } + static std::array LoadRGBA16(const unsigned char* data) { + __m512i bytes0 = _mm512_loadu_si512((__m512i*)data); + __m512i bytes1 = _mm512_loadu_si512((__m512i*)(data + 64)); + __m512i bytes2 = _mm512_loadu_si512((__m512i*)(data + 128)); + __m512i bytes3 = _mm512_loadu_si512((__m512i*)(data + 192)); + + auto pack32 = [](__m512i a, __m512i b) { + __m512i permuteidx = _mm512_set_epi64(7, 5, 3, 1, 6, 4, 2, 0); + return _mm512_permutexvar_epi64(permuteidx, _mm512_packus_epi32(a, b)); + }; + auto packlow32 = [&pack32](__m512i a, __m512i b) { + __m512i mask = _mm512_set1_epi32(0xFFFF); + return pack32(_mm512_and_si512(a, mask), _mm512_and_si512(b, mask)); + }; + auto packhi32 = [&pack32](__m512i a, __m512i b) { + return pack32(_mm512_srli_epi32(a, 16), _mm512_srli_epi32(b, 16)); + }; + + __m512i rb0 = packlow32(bytes0, bytes1); + __m512i rb1 = packlow32(bytes2, bytes3); + __m512i ga0 = packhi32(bytes0, bytes1); + __m512i ga1 = packhi32(bytes2, bytes3); + + __m512i r = packlow32(rb0, rb1); + __m512i g = packlow32(ga0, ga1); + __m512i b = packhi32(rb0, rb1); + __m512i a = packhi32(ga0, ga1); + return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}}; + } + + void SwapEndian() { + auto indices = _mm512_broadcast_i32x4( + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14)); + vec = _mm512_shuffle_epi8(vec, indices); + } +}; + +SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true, + const SIMDVec16& if_false) { + return SIMDVec16{_mm512_mask_blend_epi16(mask, if_false.vec, if_true.vec)}; +} + +SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true, + const SIMDVec32& if_false) { + return SIMDVec32{_mm512_mask_blend_epi32(mask, if_false.vec, if_true.vec)}; +} + +struct Bits64 { + static constexpr size_t kLanes = 8; + + __m512i nbits; + __m512i bits; + + FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) { + _mm512_storeu_si512((__m512i*)nbits_out, nbits); + _mm512_storeu_si512((__m512i*)bits_out, bits); + } +}; + +struct Bits32 { + __m512i nbits; + __m512i bits; + + static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) { + return Bits32{nbits.vec, bits.vec}; + } + + Bits64 Merge() const { + auto nbits_hi32 = _mm512_srli_epi64(nbits, 32); + auto nbits_lo32 = _mm512_and_si512(nbits, _mm512_set1_epi64(0xFFFFFFFF)); + auto bits_hi32 = _mm512_srli_epi64(bits, 32); + auto bits_lo32 = _mm512_and_si512(bits, _mm512_set1_epi64(0xFFFFFFFF)); + + auto nbits64 = _mm512_add_epi64(nbits_hi32, nbits_lo32); + auto bits64 = + _mm512_or_si512(_mm512_sllv_epi64(bits_hi32, nbits_lo32), bits_lo32); + return Bits64{nbits64, bits64}; + } + + void Interleave(const Bits32& low) { + bits = _mm512_or_si512(_mm512_sllv_epi32(bits, low.nbits), low.bits); + nbits = _mm512_add_epi32(nbits, low.nbits); + } + + void ClipTo(size_t n) { + n = std::min(n, 16); + constexpr uint32_t kMask[32] = { + ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, + ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 16 - n)); + nbits = _mm512_and_si512(mask, nbits); + bits = _mm512_and_si512(mask, bits); + } + void Skip(size_t n) { + n = std::min(n, 16); + constexpr uint32_t kMask[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, + ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, + }; + __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 16 - n)); + nbits = _mm512_and_si512(mask, nbits); + bits = _mm512_and_si512(mask, bits); + } +}; + +struct Bits16 { + __m512i nbits; + __m512i bits; + + static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) { + return Bits16{nbits.vec, bits.vec}; + } + + Bits32 Merge() const { + auto nbits_hi16 = _mm512_srli_epi32(nbits, 16); + auto nbits_lo16 = _mm512_and_si512(nbits, _mm512_set1_epi32(0xFFFF)); + auto bits_hi16 = _mm512_srli_epi32(bits, 16); + auto bits_lo16 = _mm512_and_si512(bits, _mm512_set1_epi32(0xFFFF)); + + auto nbits32 = _mm512_add_epi32(nbits_hi16, nbits_lo16); + auto bits32 = + _mm512_or_si512(_mm512_sllv_epi32(bits_hi16, nbits_lo16), bits_lo16); + return Bits32{nbits32, bits32}; + } + + void Interleave(const Bits16& low) { + bits = _mm512_or_si512(_mm512_sllv_epi16(bits, low.nbits), low.bits); + nbits = _mm512_add_epi16(nbits, low.nbits); + } + + void ClipTo(size_t n) { + n = std::min(n, 32); + constexpr uint16_t kMask[64] = { + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 32 - n)); + nbits = _mm512_and_si512(mask, nbits); + bits = _mm512_and_si512(mask, bits); + } + void Skip(size_t n) { + n = std::min(n, 32); + constexpr uint16_t kMask[64] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + }; + __m512i mask = _mm512_loadu_si512((__m512i*)(kMask + 32 - n)); + nbits = _mm512_and_si512(mask, nbits); + bits = _mm512_and_si512(mask, bits); + } +}; + +#endif + +#ifdef FJXL_AVX2 +#define FJXL_GENERIC_SIMD + +struct SIMDVec32; + +struct Mask32 { + __m256i mask; + SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false); + size_t CountPrefix() const { + return CtzNonZero(~static_cast( + (uint8_t)_mm256_movemask_ps(_mm256_castsi256_ps(mask)))); + } +}; + +struct SIMDVec32 { + __m256i vec; + + static constexpr size_t kLanes = 8; + + FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) { + return SIMDVec32{_mm256_loadu_si256((__m256i*)data)}; + } + FJXL_INLINE void Store(uint32_t* data) { + _mm256_storeu_si256((__m256i*)data, vec); + } + FJXL_INLINE static SIMDVec32 Val(uint32_t v) { + return SIMDVec32{_mm256_set1_epi32(v)}; + } + FJXL_INLINE SIMDVec32 ValToToken() const { + // we know that each value has at most 20 bits, so we just need 5 nibbles + // and don't need to mask the fifth. However we do need to set the higher + // bytes to 0xFF, which will make table lookups return 0. + auto nibble0 = + _mm256_or_si256(_mm256_and_si256(vec, _mm256_set1_epi32(0xF)), + _mm256_set1_epi32(0xFFFFFF00)); + auto nibble1 = _mm256_or_si256( + _mm256_and_si256(_mm256_srli_epi32(vec, 4), _mm256_set1_epi32(0xF)), + _mm256_set1_epi32(0xFFFFFF00)); + auto nibble2 = _mm256_or_si256( + _mm256_and_si256(_mm256_srli_epi32(vec, 8), _mm256_set1_epi32(0xF)), + _mm256_set1_epi32(0xFFFFFF00)); + auto nibble3 = _mm256_or_si256( + _mm256_and_si256(_mm256_srli_epi32(vec, 12), _mm256_set1_epi32(0xF)), + _mm256_set1_epi32(0xFFFFFF00)); + auto nibble4 = _mm256_or_si256(_mm256_srli_epi32(vec, 16), + _mm256_set1_epi32(0xFFFFFF00)); + + auto lut0 = _mm256_broadcastsi128_si256( + _mm_setr_epi8(0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4)); + auto lut1 = _mm256_broadcastsi128_si256( + _mm_setr_epi8(0, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8)); + auto lut2 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 9, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12)); + auto lut3 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16)); + auto lut4 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 17, 18, 18, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20)); + + auto token0 = _mm256_shuffle_epi8(lut0, nibble0); + auto token1 = _mm256_shuffle_epi8(lut1, nibble1); + auto token2 = _mm256_shuffle_epi8(lut2, nibble2); + auto token3 = _mm256_shuffle_epi8(lut3, nibble3); + auto token4 = _mm256_shuffle_epi8(lut4, nibble4); + + auto token = + _mm256_max_epi32(_mm256_max_epi32(_mm256_max_epi32(token0, token1), + _mm256_max_epi32(token2, token3)), + token4); + return SIMDVec32{token}; + } + FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const { + return SIMDVec32{_mm256_sub_epi32(_mm256_max_epu32(vec, to_subtract.vec), + to_subtract.vec)}; + } + FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const { + return SIMDVec32{_mm256_sub_epi32(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const { + return SIMDVec32{_mm256_add_epi32(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const { + return SIMDVec32{_mm256_xor_si256(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec32 Pow2() const { + return SIMDVec32{_mm256_sllv_epi32(_mm256_set1_epi32(1), vec)}; + } + FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const { + return Mask32{_mm256_cmpeq_epi32(vec, oth.vec)}; + } + FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const { + return Mask32{_mm256_cmpgt_epi32(vec, oth.vec)}; + } + template + FJXL_INLINE SIMDVec32 SignedShiftRight() const { + return SIMDVec32{_mm256_srai_epi32(vec, i)}; + } +}; + +struct SIMDVec16; + +struct Mask16 { + __m256i mask; + SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false); + Mask16 And(const Mask16& oth) const { + return Mask16{_mm256_and_si256(mask, oth.mask)}; + } + size_t CountPrefix() const { + return CtzNonZero( + ~static_cast((uint32_t)_mm256_movemask_epi8(mask))) / + 2; + } +}; + +struct SIMDVec16 { + __m256i vec; + + static constexpr size_t kLanes = 16; + + FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) { + return SIMDVec16{_mm256_loadu_si256((__m256i*)data)}; + } + FJXL_INLINE void Store(uint16_t* data) { + _mm256_storeu_si256((__m256i*)data, vec); + } + FJXL_INLINE static SIMDVec16 Val(uint16_t v) { + return SIMDVec16{_mm256_set1_epi16(v)}; + } + FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo, + const SIMDVec32& hi) { + auto tmp = _mm256_packus_epi32(lo.vec, hi.vec); + return SIMDVec16{_mm256_permute4x64_epi64(tmp, 0b11011000)}; + } + + FJXL_INLINE SIMDVec16 ValToToken() const { + auto nibble0 = + _mm256_or_si256(_mm256_and_si256(vec, _mm256_set1_epi16(0xF)), + _mm256_set1_epi16(0xFF00)); + auto nibble1 = _mm256_or_si256( + _mm256_and_si256(_mm256_srli_epi16(vec, 4), _mm256_set1_epi16(0xF)), + _mm256_set1_epi16(0xFF00)); + auto nibble2 = _mm256_or_si256( + _mm256_and_si256(_mm256_srli_epi16(vec, 8), _mm256_set1_epi16(0xF)), + _mm256_set1_epi16(0xFF00)); + auto nibble3 = + _mm256_or_si256(_mm256_srli_epi16(vec, 12), _mm256_set1_epi16(0xFF00)); + + auto lut0 = _mm256_broadcastsi128_si256( + _mm_setr_epi8(0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4)); + auto lut1 = _mm256_broadcastsi128_si256( + _mm_setr_epi8(0, 5, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8)); + auto lut2 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 9, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12)); + auto lut3 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16)); + + auto token0 = _mm256_shuffle_epi8(lut0, nibble0); + auto token1 = _mm256_shuffle_epi8(lut1, nibble1); + auto token2 = _mm256_shuffle_epi8(lut2, nibble2); + auto token3 = _mm256_shuffle_epi8(lut3, nibble3); + + auto token = _mm256_max_epi16(_mm256_max_epi16(token0, token1), + _mm256_max_epi16(token2, token3)); + return SIMDVec16{token}; + } + + FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const { + return SIMDVec16{_mm256_subs_epu16(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const { + return SIMDVec16{_mm256_sub_epi16(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const { + return SIMDVec16{_mm256_add_epi16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const { + return SIMDVec16{_mm256_min_epu16(vec, oth.vec)}; + } + FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const { + return Mask16{_mm256_cmpeq_epi16(vec, oth.vec)}; + } + FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const { + return Mask16{_mm256_cmpgt_epi16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Pow2() const { + auto pow2_lo_lut = _mm256_broadcastsi128_si256( + _mm_setr_epi8(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, + 1u << 7, 0, 0, 0, 0, 0, 0, 0, 0)); + auto pow2_hi_lut = _mm256_broadcastsi128_si256( + _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1 << 0, 1 << 1, 1 << 2, 1 << 3, + 1 << 4, 1 << 5, 1 << 6, 1u << 7)); + + auto masked = _mm256_or_si256(vec, _mm256_set1_epi16(0xFF00)); + + auto pow2_lo = _mm256_shuffle_epi8(pow2_lo_lut, masked); + auto pow2_hi = _mm256_shuffle_epi8(pow2_hi_lut, masked); + + auto pow2 = _mm256_or_si256(_mm256_slli_epi16(pow2_hi, 8), pow2_lo); + return SIMDVec16{pow2}; + } + FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const { + return SIMDVec16{_mm256_or_si256(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const { + return SIMDVec16{_mm256_xor_si256(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const { + return SIMDVec16{_mm256_and_si256(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const { + return SIMDVec16{_mm256_srai_epi16(_mm256_add_epi16(vec, oth.vec), 1)}; + } + FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const { + return SIMDVec16{_mm256_or_si256(vec, _mm256_set1_epi16(0xFF00))}; + } + FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const { + return SIMDVec16{_mm256_shuffle_epi8( + _mm256_broadcastsi128_si256(_mm_loadu_si128((__m128i*)table)), vec)}; + } + FJXL_INLINE VecPair Interleave(const SIMDVec16& low) const { + auto v02 = _mm256_unpacklo_epi16(low.vec, vec); + auto v13 = _mm256_unpackhi_epi16(low.vec, vec); + return {SIMDVec16{_mm256_permute2x128_si256(v02, v13, 0x20)}, + SIMDVec16{_mm256_permute2x128_si256(v02, v13, 0x31)}}; + } + FJXL_INLINE VecPair Upcast() const { + auto v02 = _mm256_unpacklo_epi16(vec, _mm256_setzero_si256()); + auto v13 = _mm256_unpackhi_epi16(vec, _mm256_setzero_si256()); + return {SIMDVec32{_mm256_permute2x128_si256(v02, v13, 0x20)}, + SIMDVec32{_mm256_permute2x128_si256(v02, v13, 0x31)}}; + } + template + FJXL_INLINE SIMDVec16 SignedShiftRight() const { + return SIMDVec16{_mm256_srai_epi16(vec, i)}; + } + + static std::array LoadG8(const unsigned char* data) { + __m128i bytes = _mm_loadu_si128((__m128i*)data); + return {SIMDVec16{_mm256_cvtepu8_epi16(bytes)}}; + } + static std::array LoadG16(const unsigned char* data) { + return {Load((const uint16_t*)data)}; + } + + static std::array LoadGA8(const unsigned char* data) { + __m256i bytes = _mm256_loadu_si256((__m256i*)data); + __m256i gray = _mm256_and_si256(bytes, _mm256_set1_epi16(0xFF)); + __m256i alpha = _mm256_srli_epi16(bytes, 8); + return {SIMDVec16{gray}, SIMDVec16{alpha}}; + } + static std::array LoadGA16(const unsigned char* data) { + __m256i bytes1 = _mm256_loadu_si256((__m256i*)data); + __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 32)); + __m256i g_mask = _mm256_set1_epi32(0xFFFF); + __m256i g = _mm256_permute4x64_epi64( + _mm256_packus_epi32(_mm256_and_si256(bytes1, g_mask), + _mm256_and_si256(bytes2, g_mask)), + 0b11011000); + __m256i a = _mm256_permute4x64_epi64( + _mm256_packus_epi32(_mm256_srli_epi32(bytes1, 16), + _mm256_srli_epi32(bytes2, 16)), + 0b11011000); + return {SIMDVec16{g}, SIMDVec16{a}}; + } + + static std::array LoadRGB8(const unsigned char* data) { + __m128i bytes0 = _mm_loadu_si128((__m128i*)data); + __m128i bytes1 = _mm_loadu_si128((__m128i*)(data + 16)); + __m128i bytes2 = _mm_loadu_si128((__m128i*)(data + 32)); + + __m128i idx = + _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13); + + __m128i r6b5g5_0 = _mm_shuffle_epi8(bytes0, idx); + __m128i g6r5b5_1 = _mm_shuffle_epi8(bytes1, idx); + __m128i b6g5r5_2 = _mm_shuffle_epi8(bytes2, idx); + + __m128i mask010 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0, 0, 0, 0, 0); + __m128i mask001 = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF); + + __m128i b2g2b1 = _mm_blendv_epi8(b6g5r5_2, g6r5b5_1, mask001); + __m128i b2b0b1 = _mm_blendv_epi8(b2g2b1, r6b5g5_0, mask010); + + __m128i r0r1b1 = _mm_blendv_epi8(r6b5g5_0, g6r5b5_1, mask010); + __m128i r0r1r2 = _mm_blendv_epi8(r0r1b1, b6g5r5_2, mask001); + + __m128i g1r1g0 = _mm_blendv_epi8(g6r5b5_1, r6b5g5_0, mask001); + __m128i g1g2g0 = _mm_blendv_epi8(g1r1g0, b6g5r5_2, mask010); + + __m128i g0g1g2 = _mm_alignr_epi8(g1g2g0, g1g2g0, 11); + __m128i b0b1b2 = _mm_alignr_epi8(b2b0b1, b2b0b1, 6); + + return {SIMDVec16{_mm256_cvtepu8_epi16(r0r1r2)}, + SIMDVec16{_mm256_cvtepu8_epi16(g0g1g2)}, + SIMDVec16{_mm256_cvtepu8_epi16(b0b1b2)}}; + } + static std::array LoadRGB16(const unsigned char* data) { + auto load_and_split_lohi = [](const unsigned char* data) { + // LHLHLH... + __m256i bytes = _mm256_loadu_si256((__m256i*)data); + // L0L0L0... + __m256i lo = _mm256_and_si256(bytes, _mm256_set1_epi16(0xFF)); + // H0H0H0... + __m256i hi = _mm256_srli_epi16(bytes, 8); + // LLLLLLLLHHHHHHHHLLLLLLLLHHHHHHHH + __m256i packed = _mm256_packus_epi16(lo, hi); + return _mm256_permute4x64_epi64(packed, 0b11011000); + }; + __m256i bytes0 = load_and_split_lohi(data); + __m256i bytes1 = load_and_split_lohi(data + 32); + __m256i bytes2 = load_and_split_lohi(data + 64); + + __m256i idx = _mm256_broadcastsi128_si256( + _mm_setr_epi8(0, 3, 6, 9, 12, 15, 2, 5, 8, 11, 14, 1, 4, 7, 10, 13)); + + __m256i r6b5g5_0 = _mm256_shuffle_epi8(bytes0, idx); + __m256i g6r5b5_1 = _mm256_shuffle_epi8(bytes1, idx); + __m256i b6g5r5_2 = _mm256_shuffle_epi8(bytes2, idx); + + __m256i mask010 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0)); + __m256i mask001 = _mm256_broadcastsi128_si256(_mm_setr_epi8( + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF)); + + __m256i b2g2b1 = _mm256_blendv_epi8(b6g5r5_2, g6r5b5_1, mask001); + __m256i b2b0b1 = _mm256_blendv_epi8(b2g2b1, r6b5g5_0, mask010); + + __m256i r0r1b1 = _mm256_blendv_epi8(r6b5g5_0, g6r5b5_1, mask010); + __m256i r0r1r2 = _mm256_blendv_epi8(r0r1b1, b6g5r5_2, mask001); + + __m256i g1r1g0 = _mm256_blendv_epi8(g6r5b5_1, r6b5g5_0, mask001); + __m256i g1g2g0 = _mm256_blendv_epi8(g1r1g0, b6g5r5_2, mask010); + + __m256i g0g1g2 = _mm256_alignr_epi8(g1g2g0, g1g2g0, 11); + __m256i b0b1b2 = _mm256_alignr_epi8(b2b0b1, b2b0b1, 6); + + // Now r0r1r2, g0g1g2, b0b1b2 have the low bytes of the RGB pixels in their + // lower half, and the high bytes in their upper half. + + auto combine_low_hi = [](__m256i v) { + __m128i low = _mm256_extracti128_si256(v, 0); + __m128i hi = _mm256_extracti128_si256(v, 1); + __m256i low16 = _mm256_cvtepu8_epi16(low); + __m256i hi16 = _mm256_cvtepu8_epi16(hi); + return _mm256_or_si256(_mm256_slli_epi16(hi16, 8), low16); + }; + + return {SIMDVec16{combine_low_hi(r0r1r2)}, + SIMDVec16{combine_low_hi(g0g1g2)}, + SIMDVec16{combine_low_hi(b0b1b2)}}; + } + + static std::array LoadRGBA8(const unsigned char* data) { + __m256i bytes1 = _mm256_loadu_si256((__m256i*)data); + __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 32)); + __m256i rg_mask = _mm256_set1_epi32(0xFFFF); + __m256i rg = _mm256_permute4x64_epi64( + _mm256_packus_epi32(_mm256_and_si256(bytes1, rg_mask), + _mm256_and_si256(bytes2, rg_mask)), + 0b11011000); + __m256i ba = _mm256_permute4x64_epi64( + _mm256_packus_epi32(_mm256_srli_epi32(bytes1, 16), + _mm256_srli_epi32(bytes2, 16)), + 0b11011000); + __m256i r = _mm256_and_si256(rg, _mm256_set1_epi16(0xFF)); + __m256i g = _mm256_srli_epi16(rg, 8); + __m256i b = _mm256_and_si256(ba, _mm256_set1_epi16(0xFF)); + __m256i a = _mm256_srli_epi16(ba, 8); + return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}}; + } + static std::array LoadRGBA16(const unsigned char* data) { + __m256i bytes0 = _mm256_loadu_si256((__m256i*)data); + __m256i bytes1 = _mm256_loadu_si256((__m256i*)(data + 32)); + __m256i bytes2 = _mm256_loadu_si256((__m256i*)(data + 64)); + __m256i bytes3 = _mm256_loadu_si256((__m256i*)(data + 96)); + + auto pack32 = [](__m256i a, __m256i b) { + return _mm256_permute4x64_epi64(_mm256_packus_epi32(a, b), 0b11011000); + }; + auto packlow32 = [&pack32](__m256i a, __m256i b) { + __m256i mask = _mm256_set1_epi32(0xFFFF); + return pack32(_mm256_and_si256(a, mask), _mm256_and_si256(b, mask)); + }; + auto packhi32 = [&pack32](__m256i a, __m256i b) { + return pack32(_mm256_srli_epi32(a, 16), _mm256_srli_epi32(b, 16)); + }; + + __m256i rb0 = packlow32(bytes0, bytes1); + __m256i rb1 = packlow32(bytes2, bytes3); + __m256i ga0 = packhi32(bytes0, bytes1); + __m256i ga1 = packhi32(bytes2, bytes3); + + __m256i r = packlow32(rb0, rb1); + __m256i g = packlow32(ga0, ga1); + __m256i b = packhi32(rb0, rb1); + __m256i a = packhi32(ga0, ga1); + return {SIMDVec16{r}, SIMDVec16{g}, SIMDVec16{b}, SIMDVec16{a}}; + } + + void SwapEndian() { + auto indices = _mm256_broadcastsi128_si256( + _mm_setr_epi8(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14)); + vec = _mm256_shuffle_epi8(vec, indices); + } +}; + +SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true, + const SIMDVec16& if_false) { + return SIMDVec16{_mm256_blendv_epi8(if_false.vec, if_true.vec, mask)}; +} + +SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true, + const SIMDVec32& if_false) { + return SIMDVec32{_mm256_blendv_epi8(if_false.vec, if_true.vec, mask)}; +} + +struct Bits64 { + static constexpr size_t kLanes = 4; + + __m256i nbits; + __m256i bits; + + FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) { + _mm256_storeu_si256((__m256i*)nbits_out, nbits); + _mm256_storeu_si256((__m256i*)bits_out, bits); + } +}; + +struct Bits32 { + __m256i nbits; + __m256i bits; + + static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) { + return Bits32{nbits.vec, bits.vec}; + } + + Bits64 Merge() const { + auto nbits_hi32 = _mm256_srli_epi64(nbits, 32); + auto nbits_lo32 = _mm256_and_si256(nbits, _mm256_set1_epi64x(0xFFFFFFFF)); + auto bits_hi32 = _mm256_srli_epi64(bits, 32); + auto bits_lo32 = _mm256_and_si256(bits, _mm256_set1_epi64x(0xFFFFFFFF)); + + auto nbits64 = _mm256_add_epi64(nbits_hi32, nbits_lo32); + auto bits64 = + _mm256_or_si256(_mm256_sllv_epi64(bits_hi32, nbits_lo32), bits_lo32); + return Bits64{nbits64, bits64}; + } + + void Interleave(const Bits32& low) { + bits = _mm256_or_si256(_mm256_sllv_epi32(bits, low.nbits), low.bits); + nbits = _mm256_add_epi32(nbits, low.nbits); + } + + void ClipTo(size_t n) { + n = std::min(n, 8); + constexpr uint32_t kMask[16] = { + ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0, 0, 0, + }; + __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 8 - n)); + nbits = _mm256_and_si256(mask, nbits); + bits = _mm256_and_si256(mask, bits); + } + void Skip(size_t n) { + n = std::min(n, 8); + constexpr uint32_t kMask[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, + }; + __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 8 - n)); + nbits = _mm256_and_si256(mask, nbits); + bits = _mm256_and_si256(mask, bits); + } +}; + +struct Bits16 { + __m256i nbits; + __m256i bits; + + static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) { + return Bits16{nbits.vec, bits.vec}; + } + + Bits32 Merge() const { + auto nbits_hi16 = _mm256_srli_epi32(nbits, 16); + auto nbits_lo16 = _mm256_and_si256(nbits, _mm256_set1_epi32(0xFFFF)); + auto bits_hi16 = _mm256_srli_epi32(bits, 16); + auto bits_lo16 = _mm256_and_si256(bits, _mm256_set1_epi32(0xFFFF)); + + auto nbits32 = _mm256_add_epi32(nbits_hi16, nbits_lo16); + auto bits32 = + _mm256_or_si256(_mm256_sllv_epi32(bits_hi16, nbits_lo16), bits_lo16); + return Bits32{nbits32, bits32}; + } + + void Interleave(const Bits16& low) { + auto pow2_lo_lut = _mm256_broadcastsi128_si256( + _mm_setr_epi8(1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, + 1u << 7, 0, 0, 0, 0, 0, 0, 0, 0)); + auto low_nbits_masked = + _mm256_or_si256(low.nbits, _mm256_set1_epi16(0xFF00)); + + auto bits_shifted = _mm256_mullo_epi16( + bits, _mm256_shuffle_epi8(pow2_lo_lut, low_nbits_masked)); + + nbits = _mm256_add_epi16(nbits, low.nbits); + bits = _mm256_or_si256(bits_shifted, low.bits); + } + + void ClipTo(size_t n) { + n = std::min(n, 16); + constexpr uint16_t kMask[32] = { + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 16 - n)); + nbits = _mm256_and_si256(mask, nbits); + bits = _mm256_and_si256(mask, bits); + } + + void Skip(size_t n) { + n = std::min(n, 16); + constexpr uint16_t kMask[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + }; + __m256i mask = _mm256_loadu_si256((__m256i*)(kMask + 16 - n)); + nbits = _mm256_and_si256(mask, nbits); + bits = _mm256_and_si256(mask, bits); + } +}; + +#endif + +#ifdef FJXL_NEON +#define FJXL_GENERIC_SIMD + +struct SIMDVec32; + +struct Mask32 { + uint32x4_t mask; + SIMDVec32 IfThenElse(const SIMDVec32& if_true, const SIMDVec32& if_false); + Mask32 And(const Mask32& oth) const { + return Mask32{vandq_u32(mask, oth.mask)}; + } + size_t CountPrefix() const { + uint32_t val_unset[4] = {0, 1, 2, 3}; + uint32_t val_set[4] = {4, 4, 4, 4}; + uint32x4_t val = vbslq_u32(mask, vld1q_u32(val_set), vld1q_u32(val_unset)); + return vminvq_u32(val); + } +}; + +struct SIMDVec32 { + uint32x4_t vec; + + static constexpr size_t kLanes = 4; + + FJXL_INLINE static SIMDVec32 Load(const uint32_t* data) { + return SIMDVec32{vld1q_u32(data)}; + } + FJXL_INLINE void Store(uint32_t* data) { vst1q_u32(data, vec); } + FJXL_INLINE static SIMDVec32 Val(uint32_t v) { + return SIMDVec32{vdupq_n_u32(v)}; + } + FJXL_INLINE SIMDVec32 ValToToken() const { + return SIMDVec32{vsubq_u32(vdupq_n_u32(32), vclzq_u32(vec))}; + } + FJXL_INLINE SIMDVec32 SatSubU(const SIMDVec32& to_subtract) const { + return SIMDVec32{vqsubq_u32(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec32 Sub(const SIMDVec32& to_subtract) const { + return SIMDVec32{vsubq_u32(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec32 Add(const SIMDVec32& oth) const { + return SIMDVec32{vaddq_u32(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec32 Xor(const SIMDVec32& oth) const { + return SIMDVec32{veorq_u32(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec32 Pow2() const { + return SIMDVec32{vshlq_u32(vdupq_n_u32(1), vreinterpretq_s32_u32(vec))}; + } + FJXL_INLINE Mask32 Eq(const SIMDVec32& oth) const { + return Mask32{vceqq_u32(vec, oth.vec)}; + } + FJXL_INLINE Mask32 Gt(const SIMDVec32& oth) const { + return Mask32{ + vcgtq_s32(vreinterpretq_s32_u32(vec), vreinterpretq_s32_u32(oth.vec))}; + } + template + FJXL_INLINE SIMDVec32 SignedShiftRight() const { + return SIMDVec32{ + vreinterpretq_u32_s32(vshrq_n_s32(vreinterpretq_s32_u32(vec), i))}; + } +}; + +struct SIMDVec16; + +struct Mask16 { + uint16x8_t mask; + SIMDVec16 IfThenElse(const SIMDVec16& if_true, const SIMDVec16& if_false); + Mask16 And(const Mask16& oth) const { + return Mask16{vandq_u16(mask, oth.mask)}; + } + size_t CountPrefix() const { + uint16_t val_unset[8] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint16_t val_set[8] = {8, 8, 8, 8, 8, 8, 8, 8}; + uint16x8_t val = vbslq_u16(mask, vld1q_u16(val_set), vld1q_u16(val_unset)); + return vminvq_u16(val); + } +}; + +struct SIMDVec16 { + uint16x8_t vec; + + static constexpr size_t kLanes = 8; + + FJXL_INLINE static SIMDVec16 Load(const uint16_t* data) { + return SIMDVec16{vld1q_u16(data)}; + } + FJXL_INLINE void Store(uint16_t* data) { vst1q_u16(data, vec); } + FJXL_INLINE static SIMDVec16 Val(uint16_t v) { + return SIMDVec16{vdupq_n_u16(v)}; + } + FJXL_INLINE static SIMDVec16 FromTwo32(const SIMDVec32& lo, + const SIMDVec32& hi) { + return SIMDVec16{vmovn_high_u32(vmovn_u32(lo.vec), hi.vec)}; + } + + FJXL_INLINE SIMDVec16 ValToToken() const { + return SIMDVec16{vsubq_u16(vdupq_n_u16(16), vclzq_u16(vec))}; + } + FJXL_INLINE SIMDVec16 SatSubU(const SIMDVec16& to_subtract) const { + return SIMDVec16{vqsubq_u16(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec16 Sub(const SIMDVec16& to_subtract) const { + return SIMDVec16{vsubq_u16(vec, to_subtract.vec)}; + } + FJXL_INLINE SIMDVec16 Add(const SIMDVec16& oth) const { + return SIMDVec16{vaddq_u16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Min(const SIMDVec16& oth) const { + return SIMDVec16{vminq_u16(vec, oth.vec)}; + } + FJXL_INLINE Mask16 Eq(const SIMDVec16& oth) const { + return Mask16{vceqq_u16(vec, oth.vec)}; + } + FJXL_INLINE Mask16 Gt(const SIMDVec16& oth) const { + return Mask16{ + vcgtq_s16(vreinterpretq_s16_u16(vec), vreinterpretq_s16_u16(oth.vec))}; + } + FJXL_INLINE SIMDVec16 Pow2() const { + return SIMDVec16{vshlq_u16(vdupq_n_u16(1), vreinterpretq_s16_u16(vec))}; + } + FJXL_INLINE SIMDVec16 Or(const SIMDVec16& oth) const { + return SIMDVec16{vorrq_u16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 Xor(const SIMDVec16& oth) const { + return SIMDVec16{veorq_u16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 And(const SIMDVec16& oth) const { + return SIMDVec16{vandq_u16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 HAdd(const SIMDVec16& oth) const { + return SIMDVec16{vhaddq_u16(vec, oth.vec)}; + } + FJXL_INLINE SIMDVec16 PrepareForU8Lookup() const { + return SIMDVec16{vorrq_u16(vec, vdupq_n_u16(0xFF00))}; + } + FJXL_INLINE SIMDVec16 U8Lookup(const uint8_t* table) const { + uint8x16_t tbl = vld1q_u8(table); + uint8x16_t indices = vreinterpretq_u8_u16(vec); + return SIMDVec16{vreinterpretq_u16_u8(vqtbl1q_u8(tbl, indices))}; + } + FJXL_INLINE VecPair Interleave(const SIMDVec16& low) const { + return {SIMDVec16{vzip1q_u16(low.vec, vec)}, + SIMDVec16{vzip2q_u16(low.vec, vec)}}; + } + FJXL_INLINE VecPair Upcast() const { + uint32x4_t lo = vmovl_u16(vget_low_u16(vec)); + uint32x4_t hi = vmovl_high_u16(vec); + return {SIMDVec32{lo}, SIMDVec32{hi}}; + } + template + FJXL_INLINE SIMDVec16 SignedShiftRight() const { + return SIMDVec16{ + vreinterpretq_u16_s16(vshrq_n_s16(vreinterpretq_s16_u16(vec), i))}; + } + + static std::array LoadG8(const unsigned char* data) { + uint8x8_t v = vld1_u8(data); + return {SIMDVec16{vmovl_u8(v)}}; + } + static std::array LoadG16(const unsigned char* data) { + return {Load((const uint16_t*)data)}; + } + + static std::array LoadGA8(const unsigned char* data) { + uint8x8x2_t v = vld2_u8(data); + return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])}}; + } + static std::array LoadGA16(const unsigned char* data) { + uint16x8x2_t v = vld2q_u16((const uint16_t*)data); + return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}}; + } + + static std::array LoadRGB8(const unsigned char* data) { + uint8x8x3_t v = vld3_u8(data); + return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])}, + SIMDVec16{vmovl_u8(v.val[2])}}; + } + static std::array LoadRGB16(const unsigned char* data) { + uint16x8x3_t v = vld3q_u16((const uint16_t*)data); + return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}, SIMDVec16{v.val[2]}}; + } + + static std::array LoadRGBA8(const unsigned char* data) { + uint8x8x4_t v = vld4_u8(data); + return {SIMDVec16{vmovl_u8(v.val[0])}, SIMDVec16{vmovl_u8(v.val[1])}, + SIMDVec16{vmovl_u8(v.val[2])}, SIMDVec16{vmovl_u8(v.val[3])}}; + } + static std::array LoadRGBA16(const unsigned char* data) { + uint16x8x4_t v = vld4q_u16((const uint16_t*)data); + return {SIMDVec16{v.val[0]}, SIMDVec16{v.val[1]}, SIMDVec16{v.val[2]}, + SIMDVec16{v.val[3]}}; + } + + void SwapEndian() { + vec = vreinterpretq_u16_u8(vrev16q_u8(vreinterpretq_u8_u16(vec))); + } +}; + +SIMDVec16 Mask16::IfThenElse(const SIMDVec16& if_true, + const SIMDVec16& if_false) { + return SIMDVec16{vbslq_u16(mask, if_true.vec, if_false.vec)}; +} + +SIMDVec32 Mask32::IfThenElse(const SIMDVec32& if_true, + const SIMDVec32& if_false) { + return SIMDVec32{vbslq_u32(mask, if_true.vec, if_false.vec)}; +} + +struct Bits64 { + static constexpr size_t kLanes = 2; + + uint64x2_t nbits; + uint64x2_t bits; + + FJXL_INLINE void Store(uint64_t* nbits_out, uint64_t* bits_out) { + vst1q_u64(nbits_out, nbits); + vst1q_u64(bits_out, bits); + } +}; + +struct Bits32 { + uint32x4_t nbits; + uint32x4_t bits; + + static Bits32 FromRaw(SIMDVec32 nbits, SIMDVec32 bits) { + return Bits32{nbits.vec, bits.vec}; + } + + Bits64 Merge() const { + // TODO(veluca): can probably be optimized. + uint64x2_t nbits_lo32 = + vandq_u64(vreinterpretq_u64_u32(nbits), vdupq_n_u64(0xFFFFFFFF)); + uint64x2_t bits_hi32 = + vshlq_u64(vshrq_n_u64(vreinterpretq_u64_u32(bits), 32), + vreinterpretq_s64_u64(nbits_lo32)); + uint64x2_t bits_lo32 = + vandq_u64(vreinterpretq_u64_u32(bits), vdupq_n_u64(0xFFFFFFFF)); + uint64x2_t nbits64 = + vsraq_n_u64(nbits_lo32, vreinterpretq_u64_u32(nbits), 32); + uint64x2_t bits64 = vorrq_u64(bits_hi32, bits_lo32); + return Bits64{nbits64, bits64}; + } + + void Interleave(const Bits32& low) { + bits = + vorrq_u32(vshlq_u32(bits, vreinterpretq_s32_u32(low.nbits)), low.bits); + nbits = vaddq_u32(nbits, low.nbits); + } + + void ClipTo(size_t n) { + n = std::min(n, 4); + constexpr uint32_t kMask[8] = { + ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, + }; + uint32x4_t mask = vld1q_u32(kMask + 4 - n); + nbits = vandq_u32(mask, nbits); + bits = vandq_u32(mask, bits); + } + void Skip(size_t n) { + n = std::min(n, 4); + constexpr uint32_t kMask[8] = { + 0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u, + }; + uint32x4_t mask = vld1q_u32(kMask + 4 - n); + nbits = vandq_u32(mask, nbits); + bits = vandq_u32(mask, bits); + } +}; + +struct Bits16 { + uint16x8_t nbits; + uint16x8_t bits; + + static Bits16 FromRaw(SIMDVec16 nbits, SIMDVec16 bits) { + return Bits16{nbits.vec, bits.vec}; + } + + Bits32 Merge() const { + // TODO(veluca): can probably be optimized. + uint32x4_t nbits_lo16 = + vandq_u32(vreinterpretq_u32_u16(nbits), vdupq_n_u32(0xFFFF)); + uint32x4_t bits_hi16 = + vshlq_u32(vshrq_n_u32(vreinterpretq_u32_u16(bits), 16), + vreinterpretq_s32_u32(nbits_lo16)); + uint32x4_t bits_lo16 = + vandq_u32(vreinterpretq_u32_u16(bits), vdupq_n_u32(0xFFFF)); + uint32x4_t nbits32 = + vsraq_n_u32(nbits_lo16, vreinterpretq_u32_u16(nbits), 16); + uint32x4_t bits32 = vorrq_u32(bits_hi16, bits_lo16); + return Bits32{nbits32, bits32}; + } + + void Interleave(const Bits16& low) { + bits = + vorrq_u16(vshlq_u16(bits, vreinterpretq_s16_u16(low.nbits)), low.bits); + nbits = vaddq_u16(nbits, low.nbits); + } + + void ClipTo(size_t n) { + n = std::min(n, 8); + constexpr uint16_t kMask[16] = { + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + uint16x8_t mask = vld1q_u16(kMask + 8 - n); + nbits = vandq_u16(mask, nbits); + bits = vandq_u16(mask, bits); + } + void Skip(size_t n) { + n = std::min(n, 8); + constexpr uint16_t kMask[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + }; + uint16x8_t mask = vld1q_u16(kMask + 8 - n); + nbits = vandq_u16(mask, nbits); + bits = vandq_u16(mask, bits); + } +}; + +#endif + +#ifdef FJXL_GENERIC_SIMD +constexpr size_t SIMDVec32::kLanes; +constexpr size_t SIMDVec16::kLanes; + +//  Each of these functions will process SIMDVec16::kLanes worth of values. + +FJXL_INLINE void TokenizeSIMD(const uint16_t* residuals, uint16_t* token_out, + uint16_t* nbits_out, uint16_t* bits_out) { + SIMDVec16 res = SIMDVec16::Load(residuals); + SIMDVec16 token = res.ValToToken(); + SIMDVec16 nbits = token.SatSubU(SIMDVec16::Val(1)); + SIMDVec16 bits = res.SatSubU(nbits.Pow2()); + token.Store(token_out); + nbits.Store(nbits_out); + bits.Store(bits_out); +} + +FJXL_INLINE void TokenizeSIMD(const uint32_t* residuals, uint16_t* token_out, + uint32_t* nbits_out, uint32_t* bits_out) { + static_assert(SIMDVec16::kLanes == 2 * SIMDVec32::kLanes, ""); + SIMDVec32 res_lo = SIMDVec32::Load(residuals); + SIMDVec32 res_hi = SIMDVec32::Load(residuals + SIMDVec32::kLanes); + SIMDVec32 token_lo = res_lo.ValToToken(); + SIMDVec32 token_hi = res_hi.ValToToken(); + SIMDVec32 nbits_lo = token_lo.SatSubU(SIMDVec32::Val(1)); + SIMDVec32 nbits_hi = token_hi.SatSubU(SIMDVec32::Val(1)); + SIMDVec32 bits_lo = res_lo.SatSubU(nbits_lo.Pow2()); + SIMDVec32 bits_hi = res_hi.SatSubU(nbits_hi.Pow2()); + SIMDVec16 token = SIMDVec16::FromTwo32(token_lo, token_hi); + token.Store(token_out); + nbits_lo.Store(nbits_out); + nbits_hi.Store(nbits_out + SIMDVec32::kLanes); + bits_lo.Store(bits_out); + bits_hi.Store(bits_out + SIMDVec32::kLanes); +} + +FJXL_INLINE void HuffmanSIMDUpTo13(const uint16_t* tokens, + const PrefixCode& code, uint16_t* nbits_out, + uint16_t* bits_out) { + SIMDVec16 tok = SIMDVec16::Load(tokens).PrepareForU8Lookup(); + tok.U8Lookup(code.raw_nbits_simd).Store(nbits_out); + tok.U8Lookup(code.raw_bits_simd).Store(bits_out); +} + +FJXL_INLINE void HuffmanSIMD14(const uint16_t* tokens, const PrefixCode& code, + uint16_t* nbits_out, uint16_t* bits_out) { + SIMDVec16 token_cap = SIMDVec16::Val(15); + SIMDVec16 tok = SIMDVec16::Load(tokens); + SIMDVec16 tok_index = tok.Min(token_cap).PrepareForU8Lookup(); + SIMDVec16 huff_bits_pre = tok_index.U8Lookup(code.raw_bits_simd); + // Set the highest bit when token == 16; the Huffman code is constructed in + // such a way that the code for token 15 is the same as the code for 16, + // except for the highest bit. + Mask16 needs_high_bit = tok.Eq(SIMDVec16::Val(16)); + SIMDVec16 huff_bits = needs_high_bit.IfThenElse( + huff_bits_pre.Or(SIMDVec16::Val(128)), huff_bits_pre); + huff_bits.Store(bits_out); + tok_index.U8Lookup(code.raw_nbits_simd).Store(nbits_out); +} + +FJXL_INLINE void HuffmanSIMDAbove14(const uint16_t* tokens, + const PrefixCode& code, uint16_t* nbits_out, + uint16_t* bits_out) { + SIMDVec16 tok = SIMDVec16::Load(tokens); + // We assume `tok` fits in a *signed* 16-bit integer. + Mask16 above = tok.Gt(SIMDVec16::Val(12)); + // 13, 14 -> 13 + // 15, 16 -> 14 + // 17, 18 -> 15 + SIMDVec16 remap_tok = above.IfThenElse(tok.HAdd(SIMDVec16::Val(13)), tok); + SIMDVec16 tok_index = remap_tok.PrepareForU8Lookup(); + SIMDVec16 huff_bits_pre = tok_index.U8Lookup(code.raw_bits_simd); + // Set the highest bit when token == 14, 16, 18. + Mask16 needs_high_bit = above.And(tok.Eq(tok.And(SIMDVec16::Val(0xFFFE)))); + SIMDVec16 huff_bits = needs_high_bit.IfThenElse( + huff_bits_pre.Or(SIMDVec16::Val(128)), huff_bits_pre); + huff_bits.Store(bits_out); + tok_index.U8Lookup(code.raw_nbits_simd).Store(nbits_out); +} + +FJXL_INLINE void StoreSIMDUpTo8(const uint16_t* nbits_tok, + const uint16_t* bits_tok, + const uint16_t* nbits_huff, + const uint16_t* bits_huff, size_t n, + size_t skip, Bits32* bits_out) { + Bits16 bits = + Bits16::FromRaw(SIMDVec16::Load(nbits_tok), SIMDVec16::Load(bits_tok)); + Bits16 huff_bits = + Bits16::FromRaw(SIMDVec16::Load(nbits_huff), SIMDVec16::Load(bits_huff)); + bits.Interleave(huff_bits); + bits.ClipTo(n); + bits.Skip(skip); + bits_out[0] = bits.Merge(); +} + +// Huffman and raw bits don't necessarily fit in a single u16 here. +FJXL_INLINE void StoreSIMDUpTo14(const uint16_t* nbits_tok, + const uint16_t* bits_tok, + const uint16_t* nbits_huff, + const uint16_t* bits_huff, size_t n, + size_t skip, Bits32* bits_out) { + VecPair bits = + SIMDVec16::Load(bits_tok).Interleave(SIMDVec16::Load(bits_huff)); + VecPair nbits = + SIMDVec16::Load(nbits_tok).Interleave(SIMDVec16::Load(nbits_huff)); + Bits16 low = Bits16::FromRaw(nbits.low, bits.low); + Bits16 hi = Bits16::FromRaw(nbits.hi, bits.hi); + low.ClipTo(2 * n); + low.Skip(2 * skip); + hi.ClipTo(std::max(2 * n, SIMDVec16::kLanes) - SIMDVec16::kLanes); + hi.Skip(std::max(2 * skip, SIMDVec16::kLanes) - SIMDVec16::kLanes); + + bits_out[0] = low.Merge(); + bits_out[1] = hi.Merge(); +} + +FJXL_INLINE void StoreSIMDAbove14(const uint32_t* nbits_tok, + const uint32_t* bits_tok, + const uint16_t* nbits_huff, + const uint16_t* bits_huff, size_t n, + size_t skip, Bits32* bits_out) { + static_assert(SIMDVec16::kLanes == 2 * SIMDVec32::kLanes, ""); + Bits32 bits_low = + Bits32::FromRaw(SIMDVec32::Load(nbits_tok), SIMDVec32::Load(bits_tok)); + Bits32 bits_hi = + Bits32::FromRaw(SIMDVec32::Load(nbits_tok + SIMDVec32::kLanes), + SIMDVec32::Load(bits_tok + SIMDVec32::kLanes)); + + VecPair huff_bits = SIMDVec16::Load(bits_huff).Upcast(); + VecPair huff_nbits = SIMDVec16::Load(nbits_huff).Upcast(); + + Bits32 huff_low = Bits32::FromRaw(huff_nbits.low, huff_bits.low); + Bits32 huff_hi = Bits32::FromRaw(huff_nbits.hi, huff_bits.hi); + + bits_low.Interleave(huff_low); + bits_low.ClipTo(n); + bits_low.Skip(skip); + bits_out[0] = bits_low; + bits_hi.Interleave(huff_hi); + bits_hi.ClipTo(std::max(n, SIMDVec32::kLanes) - SIMDVec32::kLanes); + bits_hi.Skip(std::max(skip, SIMDVec32::kLanes) - SIMDVec32::kLanes); + bits_out[1] = bits_hi; +} + +#ifdef FJXL_AVX512 +FJXL_INLINE void StoreToWriterAVX512(const Bits32& bits32, BitWriter& output) { + __m512i bits = bits32.bits; + __m512i nbits = bits32.nbits; + + // Insert the leftover bits from the bit buffer at the bottom of the vector + // and extract the top of the vector. + uint64_t trail_bits = + _mm512_cvtsi512_si32(_mm512_alignr_epi32(bits, bits, 15)); + uint64_t trail_nbits = + _mm512_cvtsi512_si32(_mm512_alignr_epi32(nbits, nbits, 15)); + __m512i lead_bits = _mm512_set1_epi32(output.buffer); + __m512i lead_nbits = _mm512_set1_epi32(output.bits_in_buffer); + bits = _mm512_alignr_epi32(bits, lead_bits, 15); + nbits = _mm512_alignr_epi32(nbits, lead_nbits, 15); + + // Merge 32 -> 64 bits. + Bits32 b{nbits, bits}; + Bits64 b64 = b.Merge(); + bits = b64.bits; + nbits = b64.nbits; + + __m512i zero = _mm512_setzero_si512(); + + auto sh1 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 7); }; + auto sh2 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 6); }; + auto sh4 = [zero](__m512i vec) { return _mm512_alignr_epi64(vec, zero, 4); }; + + // Compute first-past-end-bit-position. + __m512i end_interm0 = _mm512_add_epi64(nbits, sh1(nbits)); + __m512i end_interm1 = _mm512_add_epi64(end_interm0, sh2(end_interm0)); + __m512i end = _mm512_add_epi64(end_interm1, sh4(end_interm1)); + + uint64_t simd_nbits = _mm512_cvtsi512_si32(_mm512_alignr_epi64(end, end, 7)); + + // Compute begin-bit-position. + __m512i begin = _mm512_sub_epi64(end, nbits); + + // Index of the last bit in the chunk, or the end bit if nbits==0. + __m512i last = _mm512_mask_sub_epi64( + end, _mm512_cmpneq_epi64_mask(nbits, zero), end, _mm512_set1_epi64(1)); + + __m512i lane_offset_mask = _mm512_set1_epi64(63); + + // Starting position of the chunk that each lane will ultimately belong to. + __m512i chunk_start = _mm512_andnot_si512(lane_offset_mask, last); + + // For all lanes that contain bits belonging to two different 64-bit chunks, + // compute the number of bits that belong to the first chunk. + // total # of bits fit in a u16, so we can satsub_u16 here. + __m512i first_chunk_nbits = _mm512_subs_epu16(chunk_start, begin); + + // Move all the previous-chunk-bits to the previous lane. + __m512i negnbits = _mm512_sub_epi64(_mm512_set1_epi64(64), first_chunk_nbits); + __m512i first_chunk_bits = + _mm512_srlv_epi64(_mm512_sllv_epi64(bits, negnbits), negnbits); + __m512i first_chunk_bits_down = + _mm512_alignr_epi32(zero, first_chunk_bits, 2); + bits = _mm512_srlv_epi64(bits, first_chunk_nbits); + nbits = _mm512_sub_epi64(nbits, first_chunk_nbits); + bits = _mm512_or_si512(bits, _mm512_sllv_epi64(first_chunk_bits_down, nbits)); + begin = _mm512_add_epi64(begin, first_chunk_nbits); + + // We now know that every lane should give bits to only one chunk. We can + // shift the bits and then horizontally-or-reduce them within the same chunk. + __m512i offset = _mm512_and_si512(begin, lane_offset_mask); + __m512i aligned_bits = _mm512_sllv_epi64(bits, offset); + // h-or-reduce within same chunk + __m512i red0 = _mm512_mask_or_epi64( + aligned_bits, _mm512_cmpeq_epi64_mask(sh1(chunk_start), chunk_start), + sh1(aligned_bits), aligned_bits); + __m512i red1 = _mm512_mask_or_epi64( + red0, _mm512_cmpeq_epi64_mask(sh2(chunk_start), chunk_start), sh2(red0), + red0); + __m512i reduced = _mm512_mask_or_epi64( + red1, _mm512_cmpeq_epi64_mask(sh4(chunk_start), chunk_start), sh4(red1), + red1); + // Extract the highest lane that belongs to each chunk (the lane that ends up + // with the OR-ed value of all the other lanes of that chunk). + __m512i next_chunk_start = + _mm512_alignr_epi32(_mm512_set1_epi64(~0), chunk_start, 2); + __m512i result = _mm512_maskz_compress_epi64( + _mm512_cmpneq_epi64_mask(chunk_start, next_chunk_start), reduced); + + _mm512_storeu_si512((__m512i*)(output.data.get() + output.bytes_written), + result); + + // Update the bit writer and add the last 32-bit lane. + // Note that since trail_nbits was at most 32 to begin with, operating on + // trail_bits does not risk overflowing. + output.bytes_written += simd_nbits / 8; + // Here we are implicitly relying on the fact that simd_nbits < 512 to know + // that the byte of bitreader data we access is initialized. This is + // guaranteed because the remaining bits in the bitreader buffer are at most + // 7, so simd_nbits <= 505 always. + trail_bits = (trail_bits << (simd_nbits % 8)) + + output.data.get()[output.bytes_written]; + trail_nbits += simd_nbits % 8; + StoreLE64(output.data.get() + output.bytes_written, trail_bits); + size_t trail_bytes = trail_nbits / 8; + output.bits_in_buffer = trail_nbits % 8; + output.buffer = trail_bits >> (trail_bytes * 8); + output.bytes_written += trail_bytes; +} + +#endif + +template +FJXL_INLINE void StoreToWriter(const Bits32* bits, BitWriter& output) { +#ifdef FJXL_AVX512 + static_assert(n <= 2, ""); + StoreToWriterAVX512(bits[0], output); + if (n == 2) { + StoreToWriterAVX512(bits[1], output); + } + return; +#endif + static_assert(n <= 4, ""); + alignas(64) uint64_t nbits64[Bits64::kLanes * n]; + alignas(64) uint64_t bits64[Bits64::kLanes * n]; + bits[0].Merge().Store(nbits64, bits64); + if (n > 1) { + bits[1].Merge().Store(nbits64 + Bits64::kLanes, bits64 + Bits64::kLanes); + } + if (n > 2) { + bits[2].Merge().Store(nbits64 + 2 * Bits64::kLanes, + bits64 + 2 * Bits64::kLanes); + } + if (n > 3) { + bits[3].Merge().Store(nbits64 + 3 * Bits64::kLanes, + bits64 + 3 * Bits64::kLanes); + } + output.WriteMultiple(nbits64, bits64, Bits64::kLanes * n); +} + +namespace detail { +template +struct IntegerTypes; + +template <> +struct IntegerTypes { + using signed_ = int16_t; + using unsigned_ = uint16_t; +}; + +template <> +struct IntegerTypes { + using signed_ = int32_t; + using unsigned_ = uint32_t; +}; + +template +struct SIMDType; + +template <> +struct SIMDType { + using type = SIMDVec16; +}; + +template <> +struct SIMDType { + using type = SIMDVec32; +}; + +} // namespace detail + +template +using signed_t = typename detail::IntegerTypes::signed_; + +template +using unsigned_t = typename detail::IntegerTypes::unsigned_; + +template +using simd_t = typename detail::SIMDType::type; + +// This function will process exactly one vector worth of pixels. + +template +size_t PredictPixels(const signed_t* pixels, const signed_t* pixels_left, + const signed_t* pixels_top, + const signed_t* pixels_topleft, + unsigned_t* residuals) { + T px = T::Load((unsigned_t*)pixels); + T left = T::Load((unsigned_t*)pixels_left); + T top = T::Load((unsigned_t*)pixels_top); + T topleft = T::Load((unsigned_t*)pixels_topleft); + T ac = left.Sub(topleft); + T ab = left.Sub(top); + T bc = top.Sub(topleft); + T grad = ac.Add(top); + T d = ab.Xor(bc); + T zero = T::Val(0); + T clamp = zero.Gt(d).IfThenElse(top, left); + T s = ac.Xor(bc); + T pred = zero.Gt(s).IfThenElse(grad, clamp); + T res = px.Sub(pred); + T res_times_2 = res.Add(res); + res = zero.Gt(res).IfThenElse(T::Val(-1).Sub(res_times_2), res_times_2); + res.Store(residuals); + return res.Eq(T::Val(0)).CountPrefix(); +} + +#endif + +void EncodeHybridUint000(uint32_t value, uint32_t* token, uint32_t* nbits, + uint32_t* bits) { + uint32_t n = FloorLog2(value); + *token = value ? n + 1 : 0; + *nbits = value ? n : 0; + *bits = value ? value - (1 << n) : 0; +} + +#ifdef FJXL_AVX512 +constexpr static size_t kLogChunkSize = 5; +#elif defined(FJXL_AVX2) || defined(FJXL_NEON) +// Even if NEON only has 128-bit lanes, it is still significantly (~1.3x) faster +// to process two vectors at a time. +constexpr static size_t kLogChunkSize = 4; +#else +constexpr static size_t kLogChunkSize = 3; +#endif + +constexpr static size_t kChunkSize = 1 << kLogChunkSize; + +template +void GenericEncodeChunk(const Residual* residuals, size_t n, size_t skip, + const PrefixCode& code, BitWriter& output) { + for (size_t ix = skip; ix < n; ix++) { + unsigned token, nbits, bits; + EncodeHybridUint000(residuals[ix], &token, &nbits, &bits); + output.Write(code.raw_nbits[token] + nbits, + code.raw_bits[token] | bits << code.raw_nbits[token]); + } +} + +struct UpTo8Bits { + size_t bitdepth; + explicit UpTo8Bits(size_t bitdepth) : bitdepth(bitdepth) { + assert(bitdepth <= 8); + } + // Here we can fit up to 9 extra bits + 7 Huffman bits in a u16; for all other + // symbols, we could actually go up to 8 Huffman bits as we have at most 8 + // extra bits; however, the SIMD bit merging logic for AVX2 assumes that no + // Huffman length is 8 or more, so we cap at 8 anyway. Last symbol is used for + // LZ77 lengths and has no limitations except allowing to represent 32 symbols + // in total. + static constexpr uint8_t kMinRawLength[12] = {}; + static constexpr uint8_t kMaxRawLength[12] = { + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 10, + }; + static size_t MaxEncodedBitsPerSample() { return 16; } + static constexpr size_t kInputBytes = 1; + using pixel_t = int16_t; + using upixel_t = uint16_t; + + static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits, + size_t n, uint8_t* nbits_simd, + uint8_t* bits_simd) { + assert(n <= 16); + memcpy(nbits_simd, nbits, 16); + memcpy(bits_simd, bits, 16); + } + + static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip, + const PrefixCode& code, BitWriter& output) { +#ifdef FJXL_GENERIC_SIMD + Bits32 bits32[kChunkSize / SIMDVec16::kLanes]; + alignas(64) uint16_t bits[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits[SIMDVec16::kLanes]; + alignas(64) uint16_t bits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t token[SIMDVec16::kLanes]; + for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) { + TokenizeSIMD(residuals + i, token, nbits, bits); + HuffmanSIMDUpTo13(token, code, nbits_huff, bits_huff); + StoreSIMDUpTo8(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i, + std::max(skip, i) - i, bits32 + i / SIMDVec16::kLanes); + } + StoreToWriter(bits32, output); + return; +#endif + GenericEncodeChunk(residuals, n, skip, code, output); + } + + size_t NumSymbols(bool doing_ycocg) const { + // values gain 1 bit for YCoCg, 1 bit for prediction. + // Maximum symbol is 1 + effective bit depth of residuals. + if (doing_ycocg) { + return bitdepth + 3; + } else { + return bitdepth + 2; + } + } +}; +constexpr uint8_t UpTo8Bits::kMinRawLength[]; +constexpr uint8_t UpTo8Bits::kMaxRawLength[]; + +struct From9To13Bits { + size_t bitdepth; + explicit From9To13Bits(size_t bitdepth) : bitdepth(bitdepth) { + assert(bitdepth <= 13 && bitdepth >= 9); + } + // Last symbol is used for LZ77 lengths and has no limitations except allowing + // to represent 32 symbols in total. + // We cannot fit all the bits in a u16, so do not even try and use up to 8 + // bits per raw symbol. + // There are at most 16 raw symbols, so Huffman coding can be SIMDfied without + // any special tricks. + static constexpr uint8_t kMinRawLength[17] = {}; + static constexpr uint8_t kMaxRawLength[17] = { + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, + }; + static size_t MaxEncodedBitsPerSample() { return 21; } + static constexpr size_t kInputBytes = 2; + using pixel_t = int16_t; + using upixel_t = uint16_t; + + static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits, + size_t n, uint8_t* nbits_simd, + uint8_t* bits_simd) { + assert(n <= 16); + memcpy(nbits_simd, nbits, 16); + memcpy(bits_simd, bits, 16); + } + + static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip, + const PrefixCode& code, BitWriter& output) { +#ifdef FJXL_GENERIC_SIMD + Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes]; + alignas(64) uint16_t bits[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits[SIMDVec16::kLanes]; + alignas(64) uint16_t bits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t token[SIMDVec16::kLanes]; + for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) { + TokenizeSIMD(residuals + i, token, nbits, bits); + HuffmanSIMDUpTo13(token, code, nbits_huff, bits_huff); + StoreSIMDUpTo14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i, + std::max(skip, i) - i, + bits32 + 2 * i / SIMDVec16::kLanes); + } + StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output); + return; +#endif + GenericEncodeChunk(residuals, n, skip, code, output); + } + + size_t NumSymbols(bool doing_ycocg) const { + // values gain 1 bit for YCoCg, 1 bit for prediction. + // Maximum symbol is 1 + effective bit depth of residuals. + if (doing_ycocg) { + return bitdepth + 3; + } else { + return bitdepth + 2; + } + } +}; +constexpr uint8_t From9To13Bits::kMinRawLength[]; +constexpr uint8_t From9To13Bits::kMaxRawLength[]; + +void CheckHuffmanBitsSIMD(int bits1, int nbits1, int bits2, int nbits2) { + assert(nbits1 == 8); + assert(nbits2 == 8); + assert(bits2 == (bits1 | 128)); +} + +struct Exactly14Bits { + explicit Exactly14Bits(size_t bitdepth) { assert(bitdepth == 14); } + // Force LZ77 symbols to have at least 8 bits, and raw symbols 15 and 16 to + // have exactly 8, and no other symbol to have 8 or more. This ensures that + // the representation for 15 and 16 is identical up to one bit. + static constexpr uint8_t kMinRawLength[18] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 7, + }; + static constexpr uint8_t kMaxRawLength[18] = { + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 10, + }; + static constexpr size_t bitdepth = 14; + static size_t MaxEncodedBitsPerSample() { return 22; } + static constexpr size_t kInputBytes = 2; + using pixel_t = int16_t; + using upixel_t = uint16_t; + + static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits, + size_t n, uint8_t* nbits_simd, + uint8_t* bits_simd) { + assert(n == 17); + CheckHuffmanBitsSIMD(bits[15], nbits[15], bits[16], nbits[16]); + memcpy(nbits_simd, nbits, 16); + memcpy(bits_simd, bits, 16); + } + + static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip, + const PrefixCode& code, BitWriter& output) { +#ifdef FJXL_GENERIC_SIMD + Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes]; + alignas(64) uint16_t bits[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits[SIMDVec16::kLanes]; + alignas(64) uint16_t bits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t token[SIMDVec16::kLanes]; + for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) { + TokenizeSIMD(residuals + i, token, nbits, bits); + HuffmanSIMD14(token, code, nbits_huff, bits_huff); + StoreSIMDUpTo14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i, + std::max(skip, i) - i, + bits32 + 2 * i / SIMDVec16::kLanes); + } + StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output); + return; +#endif + GenericEncodeChunk(residuals, n, skip, code, output); + } + + size_t NumSymbols(bool) const { return 17; } +}; +constexpr uint8_t Exactly14Bits::kMinRawLength[]; +constexpr uint8_t Exactly14Bits::kMaxRawLength[]; + +struct MoreThan14Bits { + size_t bitdepth; + explicit MoreThan14Bits(size_t bitdepth) : bitdepth(bitdepth) { + assert(bitdepth > 14); + assert(bitdepth <= 16); + } + // Force LZ77 symbols to have at least 8 bits, and raw symbols 13 to 18 to + // have exactly 8, and no other symbol to have 8 or more. This ensures that + // the representation for (13, 14), (15, 16), (17, 18) is identical up to one + // bit. + static constexpr uint8_t kMinRawLength[20] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 7, + }; + static constexpr uint8_t kMaxRawLength[20] = { + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 10, + }; + static size_t MaxEncodedBitsPerSample() { return 24; } + static constexpr size_t kInputBytes = 2; + using pixel_t = int32_t; + using upixel_t = uint32_t; + + static void PrepareForSimd(const uint8_t* nbits, const uint8_t* bits, + size_t n, uint8_t* nbits_simd, + uint8_t* bits_simd) { + assert(n == 19); + CheckHuffmanBitsSIMD(bits[13], nbits[13], bits[14], nbits[14]); + CheckHuffmanBitsSIMD(bits[15], nbits[15], bits[16], nbits[16]); + CheckHuffmanBitsSIMD(bits[17], nbits[17], bits[18], nbits[18]); + for (size_t i = 0; i < 14; i++) { + nbits_simd[i] = nbits[i]; + bits_simd[i] = bits[i]; + } + nbits_simd[14] = nbits[15]; + bits_simd[14] = bits[15]; + nbits_simd[15] = nbits[17]; + bits_simd[15] = bits[17]; + } + + static void EncodeChunk(upixel_t* residuals, size_t n, size_t skip, + const PrefixCode& code, BitWriter& output) { +#ifdef FJXL_GENERIC_SIMD + Bits32 bits32[2 * kChunkSize / SIMDVec16::kLanes]; + alignas(64) uint32_t bits[SIMDVec16::kLanes]; + alignas(64) uint32_t nbits[SIMDVec16::kLanes]; + alignas(64) uint16_t bits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t nbits_huff[SIMDVec16::kLanes]; + alignas(64) uint16_t token[SIMDVec16::kLanes]; + for (size_t i = 0; i < kChunkSize; i += SIMDVec16::kLanes) { + TokenizeSIMD(residuals + i, token, nbits, bits); + HuffmanSIMDAbove14(token, code, nbits_huff, bits_huff); + StoreSIMDAbove14(nbits, bits, nbits_huff, bits_huff, std::max(n, i) - i, + std::max(skip, i) - i, + bits32 + 2 * i / SIMDVec16::kLanes); + } + StoreToWriter<2 * kChunkSize / SIMDVec16::kLanes>(bits32, output); + return; +#endif + GenericEncodeChunk(residuals, n, skip, code, output); + } + size_t NumSymbols(bool) const { return 19; } +}; +constexpr uint8_t MoreThan14Bits::kMinRawLength[]; +constexpr uint8_t MoreThan14Bits::kMaxRawLength[]; + +void PrepareDCGlobalCommon(bool is_single_group, size_t width, size_t height, + const PrefixCode code[4], BitWriter* output) { + output->Allocate(100000 + (is_single_group ? width * height * 16 : 0)); + // No patches, spline or noise. + output->Write(1, 1); // default DC dequantization factors (?) + output->Write(1, 1); // use global tree / histograms + output->Write(1, 0); // no lz77 for the tree + + output->Write(1, 1); // simple code for the tree's context map + output->Write(2, 0); // all contexts clustered together + output->Write(1, 1); // use prefix code for tree + output->Write(4, 0); // 000 hybrid uint + output->Write(6, 0b100011); // Alphabet size is 4 (var16) + output->Write(2, 1); // simple prefix code + output->Write(2, 3); // with 4 symbols + output->Write(2, 0); + output->Write(2, 1); + output->Write(2, 2); + output->Write(2, 3); + output->Write(1, 0); // First tree encoding option + // Huffman table + extra bits for the tree. + uint8_t symbol_bits[6] = {0b00, 0b10, 0b001, 0b101, 0b0011, 0b0111}; + uint8_t symbol_nbits[6] = {2, 2, 3, 3, 4, 4}; + // Write a tree with a leaf per channel, and gradient predictor for every + // leaf. + for (auto v : {1, 2, 1, 4, 1, 0, 0, 5, 0, 0, 0, 0, 5, + 0, 0, 0, 0, 5, 0, 0, 0, 0, 5, 0, 0, 0}) { + output->Write(symbol_nbits[v], symbol_bits[v]); + } + + output->Write(1, 1); // Enable lz77 for the main bitstream + output->Write(2, 0b00); // lz77 offset 224 + static_assert(kLZ77Offset == 224, ""); + output->Write(4, 0b1010); // lz77 min length 7 + // 400 hybrid uint config for lz77 + output->Write(4, 4); + output->Write(3, 0); + output->Write(3, 0); + + output->Write(1, 1); // simple code for the context map + output->Write(2, 3); // 3 bits per entry + output->Write(3, 4); // channel 3 + output->Write(3, 3); // channel 2 + output->Write(3, 2); // channel 1 + output->Write(3, 1); // channel 0 + output->Write(3, 0); // distance histogram first + + output->Write(1, 1); // use prefix codes + output->Write(4, 0); // 000 hybrid uint config for distances (only need 0) + for (size_t i = 0; i < 4; i++) { + output->Write(4, 0); // 000 hybrid uint config for symbols (only <= 10) + } + + // Distance alphabet size: + output->Write(5, 0b00001); // 2: just need 1 for RLE (i.e. distance 1) + // Symbol + LZ77 alphabet size: + for (size_t i = 0; i < 4; i++) { + output->Write(1, 1); // > 1 + output->Write(4, 8); // <= 512 + output->Write(8, 256); // == 512 + } + + // Distance histogram: + output->Write(2, 1); // simple prefix code + output->Write(2, 0); // with one symbol + output->Write(1, 1); // 1 + + // Symbol + lz77 histogram: + for (size_t i = 0; i < 4; i++) { + code[i].WriteTo(output); + } + + // Group header for global modular image. + output->Write(1, 1); // Global tree + output->Write(1, 1); // All default wp +} + +void PrepareDCGlobal(bool is_single_group, size_t width, size_t height, + size_t nb_chans, const PrefixCode code[4], + BitWriter* output) { + PrepareDCGlobalCommon(is_single_group, width, height, code, output); + if (nb_chans > 2) { + output->Write(2, 0b01); // 1 transform + output->Write(2, 0b00); // RCT + output->Write(5, 0b00000); // Starting from ch 0 + output->Write(2, 0b00); // YCoCg + } else { + output->Write(2, 0b00); // no transforms + } + if (!is_single_group) { + output->ZeroPadToByte(); + } +} + +template +struct ChunkEncoder { + FJXL_INLINE static void EncodeRle(size_t count, const PrefixCode& code, + BitWriter& output) { + if (count == 0) return; + count -= kLZ77MinLength + 1; + if (count < kLZ77CacheSize) { + output.Write(code.lz77_cache_nbits[count], code.lz77_cache_bits[count]); + } else { + unsigned token, nbits, bits; + EncodeHybridUintLZ77(count, &token, &nbits, &bits); + uint64_t wbits = bits; + wbits = (wbits << code.lz77_nbits[token]) | code.lz77_bits[token]; + wbits = (wbits << code.raw_nbits[0]) | code.raw_bits[0]; + output.Write(code.lz77_nbits[token] + nbits + code.raw_nbits[0], wbits); + } + } + + FJXL_INLINE void Chunk(size_t run, typename BitDepth::upixel_t* residuals, + size_t skip, size_t n) { + EncodeRle(run, *code, *output); + BitDepth::EncodeChunk(residuals, n, skip, *code, *output); + } + + inline void Finalize(size_t run) { EncodeRle(run, *code, *output); } + + const PrefixCode* code; + BitWriter* output; +}; + +template +struct ChunkSampleCollector { + FJXL_INLINE void Rle(size_t count, uint64_t* lz77_counts) { + if (count == 0) return; + raw_counts[0] += 1; + count -= kLZ77MinLength + 1; + unsigned token, nbits, bits; + EncodeHybridUintLZ77(count, &token, &nbits, &bits); + lz77_counts[token]++; + } + + FJXL_INLINE void Chunk(size_t run, typename BitDepth::upixel_t* residuals, + size_t skip, size_t n) { + // Run is broken. Encode the run and encode the individual vector. + Rle(run, lz77_counts); + for (size_t ix = skip; ix < n; ix++) { + unsigned token, nbits, bits; + EncodeHybridUint000(residuals[ix], &token, &nbits, &bits); + raw_counts[token]++; + } + } + + // don't count final run since we don't know how long it really is + void Finalize(size_t run) {} + + uint64_t* raw_counts; + uint64_t* lz77_counts; +}; + +constexpr uint32_t PackSigned(int32_t value) { + return (static_cast(value) << 1) ^ + ((static_cast(~value) >> 31) - 1); +} + +template +struct ChannelRowProcessor { + using upixel_t = typename BitDepth::upixel_t; + using pixel_t = typename BitDepth::pixel_t; + T* t; + void ProcessChunk(const pixel_t* row, const pixel_t* row_left, + const pixel_t* row_top, const pixel_t* row_topleft, + size_t n) { + alignas(64) upixel_t residuals[kChunkSize] = {}; + size_t prefix_size = 0; + size_t required_prefix_size = 0; +#ifdef FJXL_GENERIC_SIMD + constexpr size_t kNum = + sizeof(pixel_t) == 2 ? SIMDVec16::kLanes : SIMDVec32::kLanes; + for (size_t ix = 0; ix < kChunkSize; ix += kNum) { + size_t c = + PredictPixels>(row + ix, row_left + ix, row_top + ix, + row_topleft + ix, residuals + ix); + prefix_size = + prefix_size == required_prefix_size ? prefix_size + c : prefix_size; + required_prefix_size += kNum; + } +#else + for (size_t ix = 0; ix < kChunkSize; ix++) { + pixel_t px = row[ix]; + pixel_t left = row_left[ix]; + pixel_t top = row_top[ix]; + pixel_t topleft = row_topleft[ix]; + pixel_t ac = left - topleft; + pixel_t ab = left - top; + pixel_t bc = top - topleft; + pixel_t grad = static_cast(static_cast(ac) + + static_cast(top)); + pixel_t d = ab ^ bc; + pixel_t clamp = d < 0 ? top : left; + pixel_t s = ac ^ bc; + pixel_t pred = s < 0 ? grad : clamp; + residuals[ix] = PackSigned(px - pred); + prefix_size = prefix_size == required_prefix_size + ? prefix_size + (residuals[ix] == 0) + : prefix_size; + required_prefix_size += 1; + } +#endif + prefix_size = std::min(n, prefix_size); + if (prefix_size == n && (run > 0 || prefix_size > kLZ77MinLength)) { + // Run continues, nothing to do. + run += prefix_size; + } else if (prefix_size + run > kLZ77MinLength) { + // Run is broken. Encode the run and encode the individual vector. + t->Chunk(run + prefix_size, residuals, prefix_size, n); + run = 0; + } else { + // There was no run to begin with. + t->Chunk(0, residuals, 0, n); + } + } + + void ProcessRow(const pixel_t* row, const pixel_t* row_left, + const pixel_t* row_top, const pixel_t* row_topleft, + size_t xs) { + for (size_t x = 0; x < xs; x += kChunkSize) { + ProcessChunk(row + x, row_left + x, row_top + x, row_topleft + x, + std::min(kChunkSize, xs - x)); + } + } + + void Finalize() { t->Finalize(run); } + // Invariant: run == 0 or run > kLZ77MinLength. + size_t run = 0; +}; + +uint16_t LoadLE16(const unsigned char* ptr) { + return uint16_t{ptr[0]} | (uint16_t{ptr[1]} << 8); +} + +uint16_t SwapEndian(uint16_t in) { return (in >> 8) | (in << 8); } + +#ifdef FJXL_GENERIC_SIMD +void StorePixels(SIMDVec16 p, int16_t* dest) { p.Store((uint16_t*)dest); } + +void StorePixels(SIMDVec16 p, int32_t* dest) { + VecPair p_up = p.Upcast(); + p_up.low.Store((uint32_t*)dest); + p_up.hi.Store((uint32_t*)dest + SIMDVec32::kLanes); +} +#endif + +template +void FillRowG8(const unsigned char* rgba, size_t oxs, pixel_t* luma) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadG8(rgba + x); + StorePixels(rgb[0], luma + x); + } +#endif + for (; x < oxs; x++) { + luma[x] = rgba[x]; + } +} + +template +void FillRowG16(const unsigned char* rgba, size_t oxs, pixel_t* luma) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadG16(rgba + 2 * x); + if (big_endian) { + rgb[0].SwapEndian(); + } + StorePixels(rgb[0], luma + x); + } +#endif + for (; x < oxs; x++) { + uint16_t val = LoadLE16(rgba + 2 * x); + if (big_endian) { + val = SwapEndian(val); + } + luma[x] = val; + } +} + +template +void FillRowGA8(const unsigned char* rgba, size_t oxs, pixel_t* luma, + pixel_t* alpha) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadGA8(rgba + 2 * x); + StorePixels(rgb[0], luma + x); + StorePixels(rgb[1], alpha + x); + } +#endif + for (; x < oxs; x++) { + luma[x] = rgba[2 * x]; + alpha[x] = rgba[2 * x + 1]; + } +} + +template +void FillRowGA16(const unsigned char* rgba, size_t oxs, pixel_t* luma, + pixel_t* alpha) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadGA16(rgba + 4 * x); + if (big_endian) { + rgb[0].SwapEndian(); + rgb[1].SwapEndian(); + } + StorePixels(rgb[0], luma + x); + StorePixels(rgb[1], alpha + x); + } +#endif + for (; x < oxs; x++) { + uint16_t l = LoadLE16(rgba + 4 * x); + uint16_t a = LoadLE16(rgba + 4 * x + 2); + if (big_endian) { + l = SwapEndian(l); + a = SwapEndian(a); + } + luma[x] = l; + alpha[x] = a; + } +} + +template +void StoreYCoCg(pixel_t r, pixel_t g, pixel_t b, pixel_t* y, pixel_t* co, + pixel_t* cg) { + *co = r - b; + pixel_t tmp = b + (*co >> 1); + *cg = g - tmp; + *y = tmp + (*cg >> 1); +} + +#ifdef FJXL_GENERIC_SIMD +void StoreYCoCg(SIMDVec16 r, SIMDVec16 g, SIMDVec16 b, int16_t* y, int16_t* co, + int16_t* cg) { + SIMDVec16 co_v = r.Sub(b); + SIMDVec16 tmp = b.Add(co_v.SignedShiftRight<1>()); + SIMDVec16 cg_v = g.Sub(tmp); + SIMDVec16 y_v = tmp.Add(cg_v.SignedShiftRight<1>()); + y_v.Store((uint16_t*)y); + co_v.Store((uint16_t*)co); + cg_v.Store((uint16_t*)cg); +} + +void StoreYCoCg(SIMDVec16 r, SIMDVec16 g, SIMDVec16 b, int32_t* y, int32_t* co, + int32_t* cg) { + VecPair r_up = r.Upcast(); + VecPair g_up = g.Upcast(); + VecPair b_up = b.Upcast(); + SIMDVec32 co_lo_v = r_up.low.Sub(b_up.low); + SIMDVec32 tmp_lo = b_up.low.Add(co_lo_v.SignedShiftRight<1>()); + SIMDVec32 cg_lo_v = g_up.low.Sub(tmp_lo); + SIMDVec32 y_lo_v = tmp_lo.Add(cg_lo_v.SignedShiftRight<1>()); + SIMDVec32 co_hi_v = r_up.hi.Sub(b_up.hi); + SIMDVec32 tmp_hi = b_up.hi.Add(co_hi_v.SignedShiftRight<1>()); + SIMDVec32 cg_hi_v = g_up.hi.Sub(tmp_hi); + SIMDVec32 y_hi_v = tmp_hi.Add(cg_hi_v.SignedShiftRight<1>()); + y_lo_v.Store((uint32_t*)y); + co_lo_v.Store((uint32_t*)co); + cg_lo_v.Store((uint32_t*)cg); + y_hi_v.Store((uint32_t*)y + SIMDVec32::kLanes); + co_hi_v.Store((uint32_t*)co + SIMDVec32::kLanes); + cg_hi_v.Store((uint32_t*)cg + SIMDVec32::kLanes); +} +#endif + +template +void FillRowRGB8(const unsigned char* rgba, size_t oxs, pixel_t* y, pixel_t* co, + pixel_t* cg) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadRGB8(rgba + 3 * x); + StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x); + } +#endif + for (; x < oxs; x++) { + uint16_t r = rgba[3 * x]; + uint16_t g = rgba[3 * x + 1]; + uint16_t b = rgba[3 * x + 2]; + StoreYCoCg(r, g, b, y + x, co + x, cg + x); + } +} + +template +void FillRowRGB16(const unsigned char* rgba, size_t oxs, pixel_t* y, + pixel_t* co, pixel_t* cg) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadRGB16(rgba + 6 * x); + if (big_endian) { + rgb[0].SwapEndian(); + rgb[1].SwapEndian(); + rgb[2].SwapEndian(); + } + StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x); + } +#endif + for (; x < oxs; x++) { + uint16_t r = LoadLE16(rgba + 6 * x); + uint16_t g = LoadLE16(rgba + 6 * x + 2); + uint16_t b = LoadLE16(rgba + 6 * x + 4); + if (big_endian) { + r = SwapEndian(r); + g = SwapEndian(g); + b = SwapEndian(b); + } + StoreYCoCg(r, g, b, y + x, co + x, cg + x); + } +} + +template +void FillRowRGBA8(const unsigned char* rgba, size_t oxs, pixel_t* y, + pixel_t* co, pixel_t* cg, pixel_t* alpha) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadRGBA8(rgba + 4 * x); + StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x); + StorePixels(rgb[3], alpha + x); + } +#endif + for (; x < oxs; x++) { + uint16_t r = rgba[4 * x]; + uint16_t g = rgba[4 * x + 1]; + uint16_t b = rgba[4 * x + 2]; + uint16_t a = rgba[4 * x + 3]; + StoreYCoCg(r, g, b, y + x, co + x, cg + x); + alpha[x] = a; + } +} + +template +void FillRowRGBA16(const unsigned char* rgba, size_t oxs, pixel_t* y, + pixel_t* co, pixel_t* cg, pixel_t* alpha) { + size_t x = 0; +#ifdef FJXL_GENERIC_SIMD + for (; x + SIMDVec16::kLanes <= oxs; x += SIMDVec16::kLanes) { + auto rgb = SIMDVec16::LoadRGBA16(rgba + 8 * x); + if (big_endian) { + rgb[0].SwapEndian(); + rgb[1].SwapEndian(); + rgb[2].SwapEndian(); + rgb[3].SwapEndian(); + } + StoreYCoCg(rgb[0], rgb[1], rgb[2], y + x, co + x, cg + x); + StorePixels(rgb[3], alpha + x); + } +#endif + for (; x < oxs; x++) { + uint16_t r = LoadLE16(rgba + 8 * x); + uint16_t g = LoadLE16(rgba + 8 * x + 2); + uint16_t b = LoadLE16(rgba + 8 * x + 4); + uint16_t a = LoadLE16(rgba + 8 * x + 6); + if (big_endian) { + r = SwapEndian(r); + g = SwapEndian(g); + b = SwapEndian(b); + a = SwapEndian(a); + } + StoreYCoCg(r, g, b, y + x, co + x, cg + x); + alpha[x] = a; + } +} + +template +void ProcessImageArea(const unsigned char* rgba, size_t x0, size_t y0, + size_t xs, size_t yskip, size_t ys, size_t row_stride, + BitDepth bitdepth, size_t nb_chans, bool big_endian, + Processor* processors) { + constexpr size_t kPadding = 32; + + using pixel_t = typename BitDepth::pixel_t; + + constexpr size_t kAlign = 64; + constexpr size_t kAlignPixels = kAlign / sizeof(pixel_t); + + auto align = [=](pixel_t* ptr) { + size_t offset = reinterpret_cast(ptr) % kAlign; + if (offset) { + ptr += offset / sizeof(pixel_t); + } + return ptr; + }; + + constexpr size_t kNumPx = + (256 + kPadding * 2 + kAlignPixels + kAlignPixels - 1) / kAlignPixels * + kAlignPixels; + + std::vector, 2>> group_data(nb_chans); + + for (size_t y = 0; y < ys; y++) { + const auto rgba_row = + rgba + row_stride * (y0 + y) + x0 * nb_chans * BitDepth::kInputBytes; + pixel_t* crow[4] = {}; + pixel_t* prow[4] = {}; + for (size_t i = 0; i < nb_chans; i++) { + crow[i] = align(&group_data[i][y & 1][kPadding]); + prow[i] = align(&group_data[i][(y - 1) & 1][kPadding]); + } + + // Pre-fill rows with YCoCg converted pixels. + if (nb_chans == 1) { + if (BitDepth::kInputBytes == 1) { + FillRowG8(rgba_row, xs, crow[0]); + } else if (big_endian) { + FillRowG16(rgba_row, xs, crow[0]); + } else { + FillRowG16(rgba_row, xs, crow[0]); + } + } else if (nb_chans == 2) { + if (BitDepth::kInputBytes == 1) { + FillRowGA8(rgba_row, xs, crow[0], crow[1]); + } else if (big_endian) { + FillRowGA16(rgba_row, xs, crow[0], crow[1]); + } else { + FillRowGA16(rgba_row, xs, crow[0], crow[1]); + } + } else if (nb_chans == 3) { + if (BitDepth::kInputBytes == 1) { + FillRowRGB8(rgba_row, xs, crow[0], crow[1], crow[2]); + } else if (big_endian) { + FillRowRGB16(rgba_row, xs, crow[0], crow[1], + crow[2]); + } else { + FillRowRGB16(rgba_row, xs, crow[0], crow[1], + crow[2]); + } + } else { + if (BitDepth::kInputBytes == 1) { + FillRowRGBA8(rgba_row, xs, crow[0], crow[1], crow[2], crow[3]); + } else if (big_endian) { + FillRowRGBA16(rgba_row, xs, crow[0], crow[1], + crow[2], crow[3]); + } else { + FillRowRGBA16(rgba_row, xs, crow[0], crow[1], + crow[2], crow[3]); + } + } + // Deal with x == 0. + for (size_t c = 0; c < nb_chans; c++) { + *(crow[c] - 1) = y > 0 ? *(prow[c]) : 0; + // Fix topleft. + *(prow[c] - 1) = y > 0 ? *(prow[c]) : 0; + } + if (y < yskip) continue; + for (size_t c = 0; c < nb_chans; c++) { + // Get pointers to px/left/top/topleft data to speedup loop. + const pixel_t* row = crow[c]; + const pixel_t* row_left = crow[c] - 1; + const pixel_t* row_top = y == 0 ? row_left : prow[c]; + const pixel_t* row_topleft = y == 0 ? row_left : prow[c] - 1; + + processors[c].ProcessRow(row, row_left, row_top, row_topleft, xs); + } + } + for (size_t c = 0; c < nb_chans; c++) { + processors[c].Finalize(); + } +} + +template +void WriteACSection(const unsigned char* rgba, size_t x0, size_t y0, size_t xs, + size_t ys, size_t row_stride, bool is_single_group, + BitDepth bitdepth, size_t nb_chans, bool big_endian, + const PrefixCode code[4], + std::array& output) { + for (size_t i = 0; i < nb_chans; i++) { + if (is_single_group && i == 0) continue; + output[i].Allocate(xs * ys * bitdepth.MaxEncodedBitsPerSample() + 4); + } + if (!is_single_group) { + // Group header for modular image. + // When the image is single-group, the global modular image is the one + // that contains the pixel data, and there is no group header. + output[0].Write(1, 1); // Global tree + output[0].Write(1, 1); // All default wp + output[0].Write(2, 0b00); // 0 transforms + } + + ChunkEncoder encoders[4]; + ChannelRowProcessor, BitDepth> row_encoders[4]; + for (size_t c = 0; c < nb_chans; c++) { + row_encoders[c].t = &encoders[c]; + encoders[c].output = &output[c]; + encoders[c].code = &code[c]; + } + ProcessImageArea, BitDepth>>( + rgba, x0, y0, xs, 0, ys, row_stride, bitdepth, nb_chans, big_endian, + row_encoders); +} + +constexpr int kHashExp = 16; +constexpr uint32_t kHashSize = 1 << kHashExp; +constexpr uint32_t kHashMultiplier = 2654435761; +constexpr int kMaxColors = 512; + +// can be any function that returns a value in 0 .. kHashSize-1 +// has to map 0 to 0 +inline uint32_t pixel_hash(uint32_t p) { + return (p * kHashMultiplier) >> (32 - kHashExp); +} + +template +void FillRowPalette(const unsigned char* inrow, size_t xs, + const int16_t* lookup, int16_t* out) { + for (size_t x = 0; x < xs; x++) { + uint32_t p = 0; + memcpy(&p, inrow + x * nb_chans, nb_chans); + out[x] = lookup[pixel_hash(p)]; + } +} + +template +void ProcessImageAreaPalette(const unsigned char* rgba, size_t x0, size_t y0, + size_t xs, size_t yskip, size_t ys, + size_t row_stride, const int16_t* lookup, + size_t nb_chans, Processor* processors) { + constexpr size_t kPadding = 32; + + std::vector> group_data(2); + Processor& row_encoder = processors[0]; + + for (size_t y = 0; y < ys; y++) { + // Pre-fill rows with palette converted pixels. + const unsigned char* inrow = rgba + row_stride * (y0 + y) + x0 * nb_chans; + int16_t* outrow = &group_data[y & 1][kPadding]; + if (nb_chans == 1) { + FillRowPalette<1>(inrow, xs, lookup, outrow); + } else if (nb_chans == 2) { + FillRowPalette<2>(inrow, xs, lookup, outrow); + } else if (nb_chans == 3) { + FillRowPalette<3>(inrow, xs, lookup, outrow); + } else if (nb_chans == 4) { + FillRowPalette<4>(inrow, xs, lookup, outrow); + } + // Deal with x == 0. + group_data[y & 1][kPadding - 1] = + y > 0 ? group_data[(y - 1) & 1][kPadding] : 0; + // Fix topleft. + group_data[(y - 1) & 1][kPadding - 1] = + y > 0 ? group_data[(y - 1) & 1][kPadding] : 0; + // Get pointers to px/left/top/topleft data to speedup loop. + const int16_t* row = &group_data[y & 1][kPadding]; + const int16_t* row_left = &group_data[y & 1][kPadding - 1]; + const int16_t* row_top = + y == 0 ? row_left : &group_data[(y - 1) & 1][kPadding]; + const int16_t* row_topleft = + y == 0 ? row_left : &group_data[(y - 1) & 1][kPadding - 1]; + + row_encoder.ProcessRow(row, row_left, row_top, row_topleft, xs); + } + row_encoder.Finalize(); +} + +void WriteACSectionPalette(const unsigned char* rgba, size_t x0, size_t y0, + size_t xs, size_t ys, size_t row_stride, + bool is_single_group, const PrefixCode code[4], + const int16_t* lookup, size_t nb_chans, + BitWriter& output) { + if (!is_single_group) { + output.Allocate(16 * xs * ys + 4); + // Group header for modular image. + // When the image is single-group, the global modular image is the one + // that contains the pixel data, and there is no group header. + output.Write(1, 1); // Global tree + output.Write(1, 1); // All default wp + output.Write(2, 0b00); // 0 transforms + } + + ChunkEncoder encoder; + ChannelRowProcessor, UpTo8Bits> row_encoder; + + row_encoder.t = &encoder; + encoder.output = &output; + encoder.code = &code[is_single_group ? 1 : 0]; + ProcessImageAreaPalette< + ChannelRowProcessor, UpTo8Bits>>( + rgba, x0, y0, xs, 0, ys, row_stride, lookup, nb_chans, &row_encoder); +} + +template +void CollectSamples(const unsigned char* rgba, size_t x0, size_t y0, size_t xs, + size_t row_stride, size_t row_count, + uint64_t raw_counts[4][kNumRawSymbols], + uint64_t lz77_counts[4][kNumLZ77], bool is_single_group, + bool palette, BitDepth bitdepth, size_t nb_chans, + bool big_endian, const int16_t* lookup) { + if (palette) { + ChunkSampleCollector sample_collectors[4]; + ChannelRowProcessor, UpTo8Bits> + row_sample_collectors[4]; + for (size_t c = 0; c < nb_chans; c++) { + row_sample_collectors[c].t = &sample_collectors[c]; + sample_collectors[c].raw_counts = raw_counts[is_single_group ? 1 : 0]; + sample_collectors[c].lz77_counts = lz77_counts[is_single_group ? 1 : 0]; + } + ProcessImageAreaPalette< + ChannelRowProcessor, UpTo8Bits>>( + rgba, x0, y0, xs, 1, 1 + row_count, row_stride, lookup, nb_chans, + row_sample_collectors); + } else { + ChunkSampleCollector sample_collectors[4]; + ChannelRowProcessor, BitDepth> + row_sample_collectors[4]; + for (size_t c = 0; c < nb_chans; c++) { + row_sample_collectors[c].t = &sample_collectors[c]; + sample_collectors[c].raw_counts = raw_counts[c]; + sample_collectors[c].lz77_counts = lz77_counts[c]; + } + ProcessImageArea< + ChannelRowProcessor, BitDepth>>( + rgba, x0, y0, xs, 1, 1 + row_count, row_stride, bitdepth, nb_chans, + big_endian, row_sample_collectors); + } +} + +void PrepareDCGlobalPalette(bool is_single_group, size_t width, size_t height, + size_t nb_chans, const PrefixCode code[4], + const std::vector& palette, + size_t pcolors, BitWriter* output) { + PrepareDCGlobalCommon(is_single_group, width, height, code, output); + output->Write(2, 0b01); // 1 transform + output->Write(2, 0b01); // Palette + output->Write(5, 0b00000); // Starting from ch 0 + if (nb_chans == 1) { + output->Write(2, 0b00); // 1-channel palette (Gray) + } else if (nb_chans == 3) { + output->Write(2, 0b01); // 3-channel palette (RGB) + } else if (nb_chans == 4) { + output->Write(2, 0b10); // 4-channel palette (RGBA) + } else { + output->Write(2, 0b11); + output->Write(13, nb_chans - 1); + } + // pcolors <= kMaxColors + kChunkSize - 1 + static_assert(kMaxColors + kChunkSize < 1281, + "add code to signal larger palette sizes"); + if (pcolors < 256) { + output->Write(2, 0b00); + output->Write(8, pcolors); + } else { + output->Write(2, 0b01); + output->Write(10, pcolors - 256); + } + + output->Write(2, 0b00); // nb_deltas == 0 + output->Write(4, 0); // Zero predictor for delta palette + // Encode palette + ChunkEncoder encoder; + ChannelRowProcessor, UpTo8Bits> row_encoder; + row_encoder.t = &encoder; + encoder.output = output; + encoder.code = &code[0]; + int16_t p[4][32 + 1024] = {}; + uint8_t prgba[4]; + size_t i = 0; + size_t have_zero = 0; + if (palette[pcolors - 1] == 0) have_zero = 1; + for (; i < pcolors; i++) { + memcpy(prgba, &palette[i], 4); + p[0][16 + i + have_zero] = prgba[0]; + p[1][16 + i + have_zero] = prgba[1]; + p[2][16 + i + have_zero] = prgba[2]; + p[3][16 + i + have_zero] = prgba[3]; + } + p[0][15] = 0; + row_encoder.ProcessRow(p[0] + 16, p[0] + 15, p[0] + 15, p[0] + 15, pcolors); + p[1][15] = p[0][16]; + p[0][15] = p[0][16]; + row_encoder.ProcessRow(p[1] + 16, p[1] + 15, p[0] + 16, p[0] + 15, pcolors); + p[2][15] = p[1][16]; + p[1][15] = p[1][16]; + row_encoder.ProcessRow(p[2] + 16, p[2] + 15, p[1] + 16, p[1] + 15, pcolors); + p[3][15] = p[2][16]; + p[2][15] = p[2][16]; + row_encoder.ProcessRow(p[3] + 16, p[3] + 15, p[2] + 16, p[2] + 15, pcolors); + row_encoder.Finalize(); + + if (!is_single_group) { + output->ZeroPadToByte(); + } +} + +template +bool detect_palette(const unsigned char* r, size_t width, + std::vector& palette) { + size_t x = 0; + bool collided = false; + // this is just an unrolling of the next loop + for (; x + 7 < width; x += 8) { + uint32_t p[8] = {}, index[8]; + for (int i = 0; i < 8; i++) memcpy(&p[i], r + (x + i) * nb_chans, 4); + for (int i = 0; i < 8; i++) p[i] &= ((1llu << (8 * nb_chans)) - 1); + for (int i = 0; i < 8; i++) index[i] = pixel_hash(p[i]); + for (int i = 0; i < 8; i++) { + collided |= (palette[index[i]] != 0 && p[i] != palette[index[i]]); + } + for (int i = 0; i < 8; i++) palette[index[i]] = p[i]; + } + for (; x < width; x++) { + uint32_t p = 0; + memcpy(&p, r + x * nb_chans, nb_chans); + uint32_t index = pixel_hash(p); + collided |= (palette[index] != 0 && p != palette[index]); + palette[index] = p; + } + return collided; +} + +template +JxlFastLosslessFrameState* LLEnc(const unsigned char* rgba, size_t width, + size_t stride, size_t height, + BitDepth bitdepth, size_t nb_chans, + bool big_endian, int effort, + void* runner_opaque, + FJxlParallelRunner runner) { + assert(width != 0); + assert(height != 0); + assert(stride >= nb_chans * BitDepth::kInputBytes * width); + + // Count colors to try palette + std::vector palette(kHashSize); + std::vector lookup(kHashSize); + lookup[0] = 0; + int pcolors = 0; + bool collided = effort < 2 || bitdepth.bitdepth != 8; + for (size_t y = 0; y < height && !collided; y++) { + const unsigned char* r = rgba + stride * y; + if (nb_chans == 1) collided = detect_palette<1>(r, width, palette); + if (nb_chans == 2) collided = detect_palette<2>(r, width, palette); + if (nb_chans == 3) collided = detect_palette<3>(r, width, palette); + if (nb_chans == 4) collided = detect_palette<4>(r, width, palette); + } + + int nb_entries = 0; + if (!collided) { + pcolors = 1; // always have all-zero as a palette color + bool have_color = false; + uint8_t minG = 255, maxG = 0; + for (uint32_t k = 0; k < kHashSize; k++) { + if (palette[k] == 0) continue; + uint8_t p[4]; + memcpy(p, &palette[k], 4); + // move entries to front so sort has less work + palette[nb_entries] = palette[k]; + if (p[0] != p[1] || p[0] != p[2]) have_color = true; + if (p[1] < minG) minG = p[1]; + if (p[1] > maxG) maxG = p[1]; + nb_entries++; + // don't do palette if too many colors are needed + if (nb_entries + pcolors > kMaxColors) { + collided = true; + break; + } + } + if (!have_color) { + // don't do palette if it's just grayscale without many holes + if (maxG - minG < nb_entries * 1.4f) collided = true; + } + } + if (!collided) { + std::sort( + palette.begin(), palette.begin() + nb_entries, + [&nb_chans](uint32_t ap, uint32_t bp) { + if (ap == 0) return false; + if (bp == 0) return true; + uint8_t a[4], b[4]; + memcpy(a, &ap, 4); + memcpy(b, &bp, 4); + float ay, by; + if (nb_chans == 4) { + ay = (0.299f * a[0] + 0.587f * a[1] + 0.114f * a[2] + 0.01f) * a[3]; + by = (0.299f * b[0] + 0.587f * b[1] + 0.114f * b[2] + 0.01f) * b[3]; + } else { + ay = (0.299f * a[0] + 0.587f * a[1] + 0.114f * a[2] + 0.01f); + by = (0.299f * b[0] + 0.587f * b[1] + 0.114f * b[2] + 0.01f); + } + return ay < by; // sort on alpha*luma + }); + for (int k = 0; k < nb_entries; k++) { + if (palette[k] == 0) break; + lookup[pixel_hash(palette[k])] = pcolors++; + } + } + + size_t num_groups_x = (width + 255) / 256; + size_t num_groups_y = (height + 255) / 256; + size_t num_dc_groups_x = (width + 2047) / 2048; + size_t num_dc_groups_y = (height + 2047) / 2048; + + uint64_t raw_counts[4][kNumRawSymbols] = {}; + uint64_t lz77_counts[4][kNumLZ77] = {}; + + bool onegroup = num_groups_x == 1 && num_groups_y == 1; + + // sample the middle (effort * 2) rows of every group + for (size_t g = 0; g < num_groups_y * num_groups_x; g++) { + size_t xg = g % num_groups_x; + size_t yg = g / num_groups_x; + int y_offset = yg * 256; + int y_max = std::min(height - yg * 256, 256); + int y_begin = y_offset + std::max(0, y_max - 2 * effort) / 2; + int y_count = + std::min(2 * effort * y_max / 256, y_offset + y_max - y_begin - 1); + int x_max = + std::min(width - xg * 256, 256) / kChunkSize * kChunkSize; + CollectSamples(rgba, xg * 256, y_begin, x_max, stride, y_count, raw_counts, + lz77_counts, onegroup, !collided, bitdepth, nb_chans, + big_endian, lookup.data()); + } + + // TODO(veluca): can probably improve this and make it bitdepth-dependent. + uint64_t base_raw_counts[kNumRawSymbols] = { + 3843, 852, 1270, 1214, 1014, 727, 481, 300, 159, 51, + 5, 1, 1, 1, 1, 1, 1, 1, 1}; + + bool doing_ycocg = nb_chans > 2 && collided; + for (size_t i = bitdepth.NumSymbols(doing_ycocg); i < kNumRawSymbols; i++) { + base_raw_counts[i] = 0; + } + + for (size_t c = 0; c < 4; c++) { + for (size_t i = 0; i < kNumRawSymbols; i++) { + raw_counts[c][i] = (raw_counts[c][i] << 8) + base_raw_counts[i]; + } + } + + if (!collided) { + unsigned token, nbits, bits; + EncodeHybridUint000(PackSigned(pcolors - 1), &token, &nbits, &bits); + // ensure all palette indices can actually be encoded + for (size_t i = 0; i < token + 1; i++) + raw_counts[0][i] = std::max(raw_counts[0][i], 1); + // these tokens are only used for the palette itself so they can get a bad + // code + for (size_t i = token + 1; i < 10; i++) raw_counts[0][i] = 1; + } + + uint64_t base_lz77_counts[kNumLZ77] = { + 29, 27, 25, 23, 21, 21, 19, 18, 21, 17, 16, 15, 15, 14, + 13, 13, 137, 98, 61, 34, 1, 1, 1, 1, 1, 1, 1, 1, + }; + + for (size_t c = 0; c < 4; c++) { + for (size_t i = 0; i < kNumLZ77; i++) { + lz77_counts[c][i] = (lz77_counts[c][i] << 8) + base_lz77_counts[i]; + } + } + + alignas(64) PrefixCode hcode[4]; + for (size_t i = 0; i < 4; i++) { + hcode[i] = PrefixCode(bitdepth, raw_counts[i], lz77_counts[i]); + } + + size_t num_groups = onegroup ? 1 + : (2 + num_dc_groups_x * num_dc_groups_y + + num_groups_x * num_groups_y); + + JxlFastLosslessFrameState* frame_state = new JxlFastLosslessFrameState(); + + frame_state->width = width; + frame_state->height = height; + frame_state->nb_chans = nb_chans; + frame_state->bitdepth = bitdepth.bitdepth; + + frame_state->group_data = std::vector>(num_groups); + if (collided) { + PrepareDCGlobal(onegroup, width, height, nb_chans, hcode, + &frame_state->group_data[0][0]); + } else { + PrepareDCGlobalPalette(onegroup, width, height, nb_chans, hcode, palette, + pcolors, &frame_state->group_data[0][0]); + } + + auto run_one = [&](size_t g) { + size_t xg = g % num_groups_x; + size_t yg = g / num_groups_x; + size_t group_id = + onegroup ? 0 : (2 + num_dc_groups_x * num_dc_groups_y + g); + size_t xs = std::min(width - xg * 256, 256); + size_t ys = std::min(height - yg * 256, 256); + size_t x0 = xg * 256; + size_t y0 = yg * 256; + auto& gd = frame_state->group_data[group_id]; + if (collided) { + WriteACSection(rgba, x0, y0, xs, ys, stride, onegroup, bitdepth, nb_chans, + big_endian, hcode, gd); + + } else { + WriteACSectionPalette(rgba, x0, y0, xs, ys, stride, onegroup, hcode, + lookup.data(), nb_chans, gd[0]); + } + }; + + runner( + runner_opaque, &run_one, + +[](void* r, size_t i) { (*reinterpret_cast(r))(i); }, + num_groups_x * num_groups_y); + + return frame_state; +} + +JxlFastLosslessFrameState* JxlFastLosslessEncodeImpl( + const unsigned char* rgba, size_t width, size_t stride, size_t height, + size_t nb_chans, size_t bitdepth, bool big_endian, int effort, + void* runner_opaque, FJxlParallelRunner runner) { + assert(bitdepth > 0); + assert(nb_chans <= 4); + assert(nb_chans != 0); + if (bitdepth <= 8) { + return LLEnc(rgba, width, stride, height, UpTo8Bits(bitdepth), nb_chans, + big_endian, effort, runner_opaque, runner); + } + if (bitdepth <= 13) { + return LLEnc(rgba, width, stride, height, From9To13Bits(bitdepth), nb_chans, + big_endian, effort, runner_opaque, runner); + } + if (bitdepth == 14) { + return LLEnc(rgba, width, stride, height, Exactly14Bits(bitdepth), nb_chans, + big_endian, effort, runner_opaque, runner); + } + return LLEnc(rgba, width, stride, height, MoreThan14Bits(bitdepth), nb_chans, + big_endian, effort, runner_opaque, runner); +} + +} // namespace + +#endif // FJXL_SELF_INCLUDE + +#ifndef FJXL_SELF_INCLUDE + +#define FJXL_SELF_INCLUDE + +// If we have NEON enabled, it is the default target. +#if FJXL_ENABLE_NEON + +namespace default_implementation { +#define FJXL_NEON +#include "lib/jxl/enc_fast_lossless.cc" +#undef FJXL_NEON +} // namespace default_implementation + +#else // FJXL_ENABLE_NEON + +namespace default_implementation { +#include "lib/jxl/enc_fast_lossless.cc" +} + +#if FJXL_ENABLE_AVX2 +#ifdef __clang__ +#pragma clang attribute push(__attribute__((target("avx,avx2"))), \ + apply_to = function) +// Causes spurious warnings on clang5. +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wmissing-braces" +#elif defined(__GNUC__) +#pragma GCC push_options +// Seems to cause spurious errors on GCC8. +#pragma GCC diagnostic ignored "-Wpsabi" +#pragma GCC target "avx,avx2" +#endif + +namespace AVX2 { +#define FJXL_AVX2 +#include "lib/jxl/enc_fast_lossless.cc" +#undef FJXL_AVX2 +} // namespace AVX2 + +#ifdef __clang__ +#pragma clang attribute pop +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC pop_options +#endif +#endif // FJXL_ENABLE_AVX2 + +#if FJXL_ENABLE_AVX512 +#ifdef __clang__ +#pragma clang attribute push( \ + __attribute__((target("avx512cd,avx512bw,avx512vl,avx512f,avx512vbmi"))), \ + apply_to = function) +#elif defined(__GNUC__) +#pragma GCC push_options +#pragma GCC target "avx512cd,avx512bw,avx512vl,avx512f,avx512vbmi" +#endif + +namespace AVX512 { +#define FJXL_AVX512 +#include "lib/jxl/enc_fast_lossless.cc" +#undef FJXL_AVX512 +} // namespace AVX512 + +#ifdef __clang__ +#pragma clang attribute pop +#elif defined(__GNUC__) +#pragma GCC pop_options +#endif +#endif // FJXL_ENABLE_AVX512 + +#endif + +extern "C" { + +#if FJXL_STANDALONE +size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width, + size_t row_stride, size_t height, size_t nb_chans, + size_t bitdepth, int big_endian, int effort, + unsigned char** output, void* runner_opaque, + FJxlParallelRunner runner) { + auto frame_state = JxlFastLosslessPrepareFrame( + rgba, width, row_stride, height, nb_chans, bitdepth, big_endian, effort, + runner_opaque, runner); + JxlFastLosslessPrepareHeader(frame_state, /*add_image_header=*/1, + /*is_last=*/1); + size_t output_size = JxlFastLosslessMaxRequiredOutput(frame_state); + *output = (unsigned char*)malloc(output_size); + size_t written = 0; + size_t total = 0; + while ((written = JxlFastLosslessWriteOutput(frame_state, *output + total, + output_size - total)) != 0) { + total += written; + } + return total; +} +#endif + +JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame( + const unsigned char* rgba, size_t width, size_t row_stride, size_t height, + size_t nb_chans, size_t bitdepth, int big_endian, int effort, + void* runner_opaque, FJxlParallelRunner runner) { + auto trivial_runner = + +[](void*, void* opaque, void fun(void*, size_t), size_t count) { + for (size_t i = 0; i < count; i++) { + fun(opaque, i); + } + }; + + if (runner == nullptr) { + runner = trivial_runner; + } + +#if FJXL_ENABLE_AVX512 + if (__builtin_cpu_supports("avx512cd") && + __builtin_cpu_supports("avx512vbmi") && + __builtin_cpu_supports("avx512bw") && __builtin_cpu_supports("avx512f") && + __builtin_cpu_supports("avx512vl")) { + return AVX512::JxlFastLosslessEncodeImpl(rgba, width, row_stride, height, + nb_chans, bitdepth, big_endian, + effort, runner_opaque, runner); + } +#endif +#if FJXL_ENABLE_AVX2 + if (__builtin_cpu_supports("avx2")) { + return AVX2::JxlFastLosslessEncodeImpl(rgba, width, row_stride, height, + nb_chans, bitdepth, big_endian, + effort, runner_opaque, runner); + } +#endif + + return default_implementation::JxlFastLosslessEncodeImpl( + rgba, width, row_stride, height, nb_chans, bitdepth, big_endian, effort, + runner_opaque, runner); +} + +} // extern "C" + +#endif // FJXL_SELF_INCLUDE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.h b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.h new file mode 100644 index 0000000000..f0bcd72cbf --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_fast_lossless.h @@ -0,0 +1,85 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_FAST_LOSSLESS_H_ +#define LIB_JXL_ENC_FAST_LOSSLESS_H_ +#include + +// FJXL_STANDALONE=1 for a stand-alone jxl encoder +// FJXL_STANDALONE=0 for use in libjxl to encode frames (but no image header) +#ifndef FJXL_STANDALONE +#ifdef JPEGXL_MAJOR_VERSION +#define FJXL_STANDALONE 0 +#else +#define FJXL_STANDALONE 1 +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Simple encoding API. + +// A FJxlParallelRunner must call fun(opaque, i) for all i from 0 to count. It +// may do so in parallel. +typedef void(FJxlParallelRunner)(void* runner_opaque, void* opaque, + void fun(void*, size_t), size_t count); + +#if FJXL_STANDALONE +// You may pass `nullptr` as a runner: encoding will be sequential. +size_t JxlFastLosslessEncode(const unsigned char* rgba, size_t width, + size_t row_stride, size_t height, size_t nb_chans, + size_t bitdepth, int big_endian, int effort, + unsigned char** output, void* runner_opaque, + FJxlParallelRunner runner); +#endif + +// More complex API for cases in which you may want to allocate your own buffer +// and other advanced use cases. + +// Opaque struct that represents an intermediate state of the computation. +struct JxlFastLosslessFrameState; + +// Returned JxlFastLosslessFrameState must be freed by calling +// JxlFastLosslessFreeFrameState. +JxlFastLosslessFrameState* JxlFastLosslessPrepareFrame( + const unsigned char* rgba, size_t width, size_t row_stride, size_t height, + size_t nb_chans, size_t bitdepth, int big_endian, int effort, + void* runner_opaque, FJxlParallelRunner runner); + +// Prepare the (image/frame) header. You may encode animations by concatenating +// the output of multiple frames, of which the first one has add_image_header = +// 1 and subsequent ones have add_image_header = 0, and all frames but the last +// one have is_last = 0. +// (when FJXL_STANDALONE=0, add_image_header has to be 0) +void JxlFastLosslessPrepareHeader(JxlFastLosslessFrameState* frame, + int add_image_header, int is_last); + +// Upper bound on the required output size, including any padding that may be +// required by JxlFastLosslessWriteOutput. Cannot be called before +// JxlFastLosslessPrepareHeader. +size_t JxlFastLosslessMaxRequiredOutput(const JxlFastLosslessFrameState* frame); + +// Actual size of the frame once it is encoded. This is not identical to +// JxlFastLosslessMaxRequiredOutput because JxlFastLosslessWriteOutput may +// require extra padding. +size_t JxlFastLosslessOutputSize(const JxlFastLosslessFrameState* frame); + +// Writes the frame to the given output buffer. Returns the number of bytes that +// were written, which is at least 1 unless the entire output has been written +// already. It is required that `output_size >= 32` when calling this function. +// This function must be called repeatedly until it returns 0. +size_t JxlFastLosslessWriteOutput(JxlFastLosslessFrameState* frame, + unsigned char* output, size_t output_size); + +// Frees the provided frame state. +void JxlFastLosslessFreeFrameState(JxlFastLosslessFrameState* frame); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // LIB_JXL_ENC_FAST_LOSSLESS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fields.cc b/third-party/libjxl/libjxl/lib/jxl/enc_fields.cc new file mode 100644 index 0000000000..22c763e13f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_fields.cc @@ -0,0 +1,239 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_fields.h" + +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/fields.h" + +namespace jxl { + +namespace { +using ::jxl::fields_internal::VisitorBase; +class WriteVisitor : public VisitorBase { + public: + WriteVisitor(const size_t extension_bits, BitWriter* JXL_RESTRICT writer) + : extension_bits_(extension_bits), writer_(writer) {} + + Status Bits(const size_t bits, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT value) override { + ok_ &= BitsCoder::Write(bits, *value, writer_); + return true; + } + Status U32(const U32Enc enc, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT value) override { + ok_ &= U32Coder::Write(enc, *value, writer_); + return true; + } + + Status U64(const uint64_t /*default_value*/, + uint64_t* JXL_RESTRICT value) override { + ok_ &= U64Coder::Write(*value, writer_); + return true; + } + + Status F16(const float /*default_value*/, + float* JXL_RESTRICT value) override { + ok_ &= F16Coder::Write(*value, writer_); + return true; + } + + Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override { + JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions)); + if (*extensions == 0) { + JXL_ASSERT(extension_bits_ == 0); + return true; + } + // TODO(janwas): extend API to pass in array of extension_bits, one per + // extension. We currently ascribe all bits to the first extension, but + // this is only an encoder limitation. NOTE: extension_bits_ can be zero + // if an extension does not require any additional fields. + ok_ &= U64Coder::Write(extension_bits_, writer_); + // For each nonzero bit except the lowest/first (already written): + for (uint64_t remaining_extensions = *extensions & (*extensions - 1); + remaining_extensions != 0; + remaining_extensions &= remaining_extensions - 1) { + ok_ &= U64Coder::Write(0, writer_); + } + return true; + } + // EndExtensions = default. + + Status OK() const { return ok_; } + + private: + const size_t extension_bits_; + BitWriter* JXL_RESTRICT writer_; + bool ok_ = true; +}; +} // namespace + +Status Bundle::Write(const Fields& fields, BitWriter* writer, size_t layer, + AuxOut* aux_out) { + size_t extension_bits, total_bits; + JXL_RETURN_IF_ERROR(Bundle::CanEncode(fields, &extension_bits, &total_bits)); + + BitWriter::Allotment allotment(writer, total_bits); + WriteVisitor visitor(extension_bits, writer); + JXL_RETURN_IF_ERROR(visitor.VisitConst(fields)); + JXL_RETURN_IF_ERROR(visitor.OK()); + allotment.ReclaimAndCharge(writer, layer, aux_out); + return true; +} + +// Returns false if the value is too large to encode. +Status BitsCoder::Write(const size_t bits, const uint32_t value, + BitWriter* JXL_RESTRICT writer) { + if (value >= (1ULL << bits)) { + return JXL_FAILURE("Value %d too large to encode in %" PRIu64 " bits", + value, static_cast(bits)); + } + writer->Write(bits, value); + return true; +} + +// Returns false if the value is too large to encode. +Status U32Coder::Write(const U32Enc enc, const uint32_t value, + BitWriter* JXL_RESTRICT writer) { + uint32_t selector; + size_t total_bits; + JXL_RETURN_IF_ERROR(ChooseSelector(enc, value, &selector, &total_bits)); + + writer->Write(2, selector); + + const U32Distr d = enc.GetDistr(selector); + if (!d.IsDirect()) { // Nothing more to write for direct encoding + const uint32_t offset = d.Offset(); + JXL_ASSERT(value >= offset); + writer->Write(total_bits - 2, value - offset); + } + + return true; +} + +// Returns false if the value is too large to encode. +Status U64Coder::Write(uint64_t value, BitWriter* JXL_RESTRICT writer) { + if (value == 0) { + // Selector: use 0 bits, value 0 + writer->Write(2, 0); + } else if (value <= 16) { + // Selector: use 4 bits, value 1..16 + writer->Write(2, 1); + writer->Write(4, value - 1); + } else if (value <= 272) { + // Selector: use 8 bits, value 17..272 + writer->Write(2, 2); + writer->Write(8, value - 17); + } else { + // Selector: varint, first a 12-bit group, after that per 8-bit group. + writer->Write(2, 3); + writer->Write(12, value & 4095); + value >>= 12; + int shift = 12; + while (value > 0 && shift < 60) { + // Indicate varint not done + writer->Write(1, 1); + writer->Write(8, value & 255); + value >>= 8; + shift += 8; + } + if (value > 0) { + // This only could happen if shift == N - 4. + writer->Write(1, 1); + writer->Write(4, value & 15); + // Implicitly closed sequence, no extra stop bit is required. + } else { + // Indicate end of varint + writer->Write(1, 0); + } + } + + return true; +} + +Status F16Coder::Write(float value, BitWriter* JXL_RESTRICT writer) { + uint32_t bits32; + memcpy(&bits32, &value, sizeof(bits32)); + const uint32_t sign = bits32 >> 31; + const uint32_t biased_exp32 = (bits32 >> 23) & 0xFF; + const uint32_t mantissa32 = bits32 & 0x7FFFFF; + + const int32_t exp = static_cast(biased_exp32) - 127; + if (JXL_UNLIKELY(exp > 15)) { + return JXL_FAILURE("Too big to encode, CanEncode should return false"); + } + + // Tiny or zero => zero. + if (exp < -24) { + writer->Write(16, 0); + return true; + } + + uint32_t biased_exp16, mantissa16; + + // exp = [-24, -15] => subnormal + if (JXL_UNLIKELY(exp < -14)) { + biased_exp16 = 0; + const uint32_t sub_exp = static_cast(-14 - exp); + JXL_ASSERT(1 <= sub_exp && sub_exp < 11); + mantissa16 = (1 << (10 - sub_exp)) + (mantissa32 >> (13 + sub_exp)); + } else { + // exp = [-14, 15] + biased_exp16 = static_cast(exp + 15); + JXL_ASSERT(1 <= biased_exp16 && biased_exp16 < 31); + mantissa16 = mantissa32 >> 13; + } + + JXL_ASSERT(mantissa16 < 1024); + const uint32_t bits16 = (sign << 15) | (biased_exp16 << 10) | mantissa16; + JXL_ASSERT(bits16 < 0x10000); + writer->Write(16, bits16); + return true; +} + +Status WriteCodestreamHeaders(CodecMetadata* metadata, BitWriter* writer, + AuxOut* aux_out) { + // Marker/signature + BitWriter::Allotment allotment(writer, 16); + writer->Write(8, 0xFF); + writer->Write(8, kCodestreamMarker); + allotment.ReclaimAndCharge(writer, kLayerHeader, aux_out); + + JXL_RETURN_IF_ERROR( + WriteSizeHeader(metadata->size, writer, kLayerHeader, aux_out)); + + JXL_RETURN_IF_ERROR( + WriteImageMetadata(metadata->m, writer, kLayerHeader, aux_out)); + + metadata->transform_data.nonserialized_xyb_encoded = metadata->m.xyb_encoded; + JXL_RETURN_IF_ERROR( + Bundle::Write(metadata->transform_data, writer, kLayerHeader, aux_out)); + + return true; +} + +Status WriteFrameHeader(const FrameHeader& frame, + BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) { + return Bundle::Write(frame, writer, kLayerHeader, aux_out); +} + +Status WriteImageMetadata(const ImageMetadata& metadata, + BitWriter* JXL_RESTRICT writer, size_t layer, + AuxOut* aux_out) { + return Bundle::Write(metadata, writer, layer, aux_out); +} + +Status WriteQuantizerParams(const QuantizerParams& params, + BitWriter* JXL_RESTRICT writer, size_t layer, + AuxOut* aux_out) { + return Bundle::Write(params, writer, layer, aux_out); +} + +Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer, + size_t layer, AuxOut* aux_out) { + return Bundle::Write(size, writer, layer, aux_out); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_fields.h b/third-party/libjxl/libjxl/lib/jxl/enc_fields.h new file mode 100644 index 0000000000..5bb179a719 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_fields.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_FIELDS_H_ +#define LIB_JXL_ENC_FIELDS_H_ + +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { + +struct AuxOut; + +// Write headers from the CodecMetadata. Also may modify nonserialized_... +// fields of the metadata. +Status WriteCodestreamHeaders(CodecMetadata* metadata, BitWriter* writer, + AuxOut* aux_out); + +Status WriteFrameHeader(const FrameHeader& frame, + BitWriter* JXL_RESTRICT writer, AuxOut* aux_out); + +Status WriteQuantizerParams(const QuantizerParams& params, + BitWriter* JXL_RESTRICT writer, size_t layer, + AuxOut* aux_out); + +Status WriteSizeHeader(const SizeHeader& size, BitWriter* JXL_RESTRICT writer, + size_t layer, AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_FIELDS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_file.cc b/third-party/libjxl/libjxl/lib/jxl/enc_file.cc new file mode 100644 index 0000000000..ef5e21414e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_file.cc @@ -0,0 +1,141 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_file.h" + +#include + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_frame.h" +#include "lib/jxl/enc_icc_codec.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +namespace { + +Status PrepareCodecMetadataFromIO(const CompressParams& cparams, + const CodecInOut* io, + CodecMetadata* metadata) { + *metadata = io->metadata; + size_t ups = 1; + if (cparams.already_downsampled) ups = cparams.resampling; + + JXL_RETURN_IF_ERROR(metadata->size.Set(io->xsize() * ups, io->ysize() * ups)); + + // Keep ICC profile in lossless modes because a reconstructed profile may be + // slightly different (quantization). + // Also keep ICC in JPEG reconstruction mode as we need byte-exact profiles. + if (!cparams.IsLossless() && !io->Main().IsJPEG() && cparams.cms_set) { + metadata->m.color_encoding.DecideIfWantICC(cparams.cms); + } + + metadata->m.xyb_encoded = + cparams.color_transform == ColorTransform::kXYB ? true : false; + + // TODO(firsching): move this EncodeFile to test_utils / re-implement this + // using API functions + return true; +} + +} // namespace + +Status EncodePreview(const CompressParams& cparams, const ImageBundle& ib, + const CodecMetadata* metadata, const JxlCmsInterface& cms, + ThreadPool* pool, BitWriter* JXL_RESTRICT writer) { + BitWriter preview_writer; + // TODO(janwas): also support generating preview by downsampling + if (ib.HasColor()) { + AuxOut aux_out; + PassesEncoderState passes_enc_state; + // TODO(lode): check if we want all extra channels and matching xyb_encoded + // for the preview, such that using the main ImageMetadata object for + // encoding this frame is warrented. + FrameInfo frame_info; + frame_info.is_preview = true; + JXL_RETURN_IF_ERROR(EncodeFrame(cparams, frame_info, metadata, ib, + &passes_enc_state, cms, pool, + &preview_writer, &aux_out)); + preview_writer.ZeroPadToByte(); + } + + if (preview_writer.BitsWritten() != 0) { + writer->ZeroPadToByte(); + writer->AppendByteAligned(preview_writer); + } + + return true; +} + +Status EncodeFile(const CompressParams& params, const CodecInOut* io, + PassesEncoderState* passes_enc_state, PaddedBytes* compressed, + const JxlCmsInterface& cms, AuxOut* aux_out, + ThreadPool* pool) { + io->CheckMetadata(); + BitWriter writer; + + CompressParams cparams = params; + if (io->Main().color_transform != ColorTransform::kNone) { + // Set the color transform to YCbCr or XYB if the original image is such. + cparams.color_transform = io->Main().color_transform; + } + + JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams)); + + std::unique_ptr metadata = jxl::make_unique(); + JXL_RETURN_IF_ERROR(PrepareCodecMetadataFromIO(cparams, io, metadata.get())); + JXL_RETURN_IF_ERROR(WriteCodestreamHeaders(metadata.get(), &writer, aux_out)); + + // Only send ICC (at least several hundred bytes) if fields aren't enough. + if (metadata->m.color_encoding.WantICC()) { + JXL_RETURN_IF_ERROR(WriteICC(metadata->m.color_encoding.ICC(), &writer, + kLayerHeader, aux_out)); + } + + if (metadata->m.have_preview) { + JXL_RETURN_IF_ERROR(EncodePreview(cparams, io->preview_frame, + metadata.get(), cms, pool, &writer)); + } + + // Each frame should start on byte boundaries. + BitWriter::Allotment allotment(&writer, 8); + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, kLayerHeader, aux_out); + + for (size_t i = 0; i < io->frames.size(); i++) { + FrameInfo info; + info.is_last = i == io->frames.size() - 1; + if (io->frames[i].use_for_next_frame) { + info.save_as_reference = 1; + } + JXL_RETURN_IF_ERROR(EncodeFrame(cparams, info, metadata.get(), + io->frames[i], passes_enc_state, cms, pool, + &writer, aux_out)); + } + + // Clean up passes_enc_state in case it gets reused. + for (size_t i = 0; i < 4; i++) { + passes_enc_state->shared.dc_frames[i] = Image3F(); + passes_enc_state->shared.reference_frames[i].frame = ImageBundle(); + } + + *compressed = std::move(writer).TakeBytes(); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_file.h b/third-party/libjxl/libjxl/lib/jxl/enc_file.h new file mode 100644 index 0000000000..ff3ad1233d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_file.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_FILE_H_ +#define LIB_JXL_ENC_FILE_H_ + +// Facade for JXL encoding. + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" + +namespace jxl { + +struct AuxOut; +class CodecInOut; + +// Compresses pixels from `io` (given in any ColorEncoding). +// `io->metadata.m.original` must be set. +Status EncodeFile(const CompressParams& params, const CodecInOut* io, + PassesEncoderState* passes_enc_state, PaddedBytes* compressed, + const JxlCmsInterface& cms, AuxOut* aux_out = nullptr, + ThreadPool* pool = nullptr); + +} // namespace jxl + +#endif // LIB_JXL_ENC_FILE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_frame.cc b/third-party/libjxl/libjxl/lib/jxl/enc_frame.cc new file mode 100644 index 0000000000..5fe2b6a712 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_frame.cc @@ -0,0 +1,1558 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_frame.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/compressed_dc.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/enc_adaptive_quantization.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_chroma_from_luma.h" +#include "lib/jxl/enc_coeff_order.h" +#include "lib/jxl/enc_context_map.h" +#include "lib/jxl/enc_entropy_coder.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_gaborish.h" +#include "lib/jxl/enc_group.h" +#include "lib/jxl/enc_modular.h" +#include "lib/jxl/enc_noise.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_patch_dictionary.h" +#include "lib/jxl/enc_photon_noise.h" +#include "lib/jxl/enc_quant_weights.h" +#include "lib/jxl/enc_splines.h" +#include "lib/jxl/enc_toc.h" +#include "lib/jxl/enc_xyb.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" +#include "lib/jxl/splines.h" +#include "lib/jxl/toc.h" + +namespace jxl { +namespace { + +PassDefinition progressive_passes_dc_vlf_lf_full_ac[] = { + {/*num_coefficients=*/2, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/4}, + {/*num_coefficients=*/3, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/2}, + {/*num_coefficients=*/8, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/0}, +}; + +PassDefinition progressive_passes_dc_quant_ac_full_ac[] = { + {/*num_coefficients=*/8, /*shift=*/1, + /*suitable_for_downsampling_of_at_least=*/2}, + {/*num_coefficients=*/8, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/0}, +}; + +uint64_t FrameFlagsFromParams(const CompressParams& cparams) { + uint64_t flags = 0; + + const float dist = cparams.butteraugli_distance; + + // We don't add noise at low butteraugli distances because the original + // noise is stored within the compressed image and adding noise makes things + // worse. + if (ApplyOverride(cparams.noise, dist >= kMinButteraugliForNoise) || + cparams.photon_noise_iso > 0 || + cparams.manual_noise.size() == NoiseParams::kNumNoisePoints) { + flags |= FrameHeader::kNoise; + } + + if (cparams.progressive_dc > 0 && cparams.modular_mode == false) { + flags |= FrameHeader::kUseDcFrame; + } + + return flags; +} + +Status LoopFilterFromParams(const CompressParams& cparams, + FrameHeader* JXL_RESTRICT frame_header) { + LoopFilter* loop_filter = &frame_header->loop_filter; + + // Gaborish defaults to enabled in Hare or slower. + loop_filter->gab = ApplyOverride( + cparams.gaborish, cparams.speed_tier <= SpeedTier::kHare && + frame_header->encoding == FrameEncoding::kVarDCT && + cparams.decoding_speed_tier < 4); + + if (cparams.epf != -1) { + loop_filter->epf_iters = cparams.epf; + } else { + if (frame_header->encoding == FrameEncoding::kModular) { + loop_filter->epf_iters = 0; + } else { + constexpr float kThresholds[3] = {0.7, 1.5, 4.0}; + loop_filter->epf_iters = 0; + if (cparams.decoding_speed_tier < 3) { + for (size_t i = cparams.decoding_speed_tier == 2 ? 1 : 0; i < 3; i++) { + if (cparams.butteraugli_distance >= kThresholds[i]) { + loop_filter->epf_iters++; + } + } + } + } + } + // Strength of EPF in modular mode. + if (frame_header->encoding == FrameEncoding::kModular && + !cparams.IsLossless()) { + // TODO(veluca): this formula is nonsense. + loop_filter->epf_sigma_for_modular = cparams.butteraugli_distance; + } + if (frame_header->encoding == FrameEncoding::kModular && + cparams.lossy_palette) { + loop_filter->epf_sigma_for_modular = 1.0f; + } + + return true; +} + +Status MakeFrameHeader(const CompressParams& cparams, + const ProgressiveSplitter& progressive_splitter, + const FrameInfo& frame_info, const ImageBundle& ib, + FrameHeader* JXL_RESTRICT frame_header) { + frame_header->nonserialized_is_preview = frame_info.is_preview; + frame_header->is_last = frame_info.is_last; + frame_header->save_before_color_transform = + frame_info.save_before_color_transform; + frame_header->frame_type = frame_info.frame_type; + frame_header->name = ib.name; + + progressive_splitter.InitPasses(&frame_header->passes); + + if (cparams.modular_mode) { + frame_header->encoding = FrameEncoding::kModular; + if (cparams.modular_group_size_shift == -1) { + frame_header->group_size_shift = 1; + // no point using groups when only one group is full and the others are + // less than half full: multithreading will not really help much, while + // compression does suffer + if (ib.xsize() <= 400 && ib.ysize() <= 400) { + frame_header->group_size_shift = 2; + } + } else { + frame_header->group_size_shift = cparams.modular_group_size_shift; + } + } + + frame_header->chroma_subsampling = ib.chroma_subsampling; + if (ib.IsJPEG()) { + // we are transcoding a JPEG, so we don't get to choose + frame_header->encoding = FrameEncoding::kVarDCT; + frame_header->color_transform = ib.color_transform; + } else { + frame_header->color_transform = cparams.color_transform; + if (!cparams.modular_mode && + (frame_header->chroma_subsampling.MaxHShift() != 0 || + frame_header->chroma_subsampling.MaxVShift() != 0)) { + return JXL_FAILURE( + "Chroma subsampling is not supported in VarDCT mode when not " + "recompressing JPEGs"); + } + } + if (frame_header->color_transform != ColorTransform::kYCbCr && + (frame_header->chroma_subsampling.MaxHShift() != 0 || + frame_header->chroma_subsampling.MaxVShift() != 0)) { + return JXL_FAILURE( + "Chroma subsampling is not supported when color transform is not " + "YCbCr"); + } + + frame_header->flags = FrameFlagsFromParams(cparams); + // Non-photon noise is not supported in the Modular encoder for now. + if (frame_header->encoding != FrameEncoding::kVarDCT && + cparams.photon_noise_iso == 0 && cparams.manual_noise.empty()) { + frame_header->UpdateFlag(false, FrameHeader::Flags::kNoise); + } + + JXL_RETURN_IF_ERROR(LoopFilterFromParams(cparams, frame_header)); + + frame_header->dc_level = frame_info.dc_level; + if (frame_header->dc_level > 2) { + // With 3 or more progressive_dc frames, the implementation does not yet + // work, see enc_cache.cc. + return JXL_FAILURE("progressive_dc > 2 is not yet supported"); + } + if (cparams.progressive_dc > 0 && + (cparams.ec_resampling != 1 || cparams.resampling != 1)) { + return JXL_FAILURE("Resampling not supported with DC frames"); + } + if (cparams.resampling != 1 && cparams.resampling != 2 && + cparams.resampling != 4 && cparams.resampling != 8) { + return JXL_FAILURE("Invalid resampling factor"); + } + if (cparams.ec_resampling != 1 && cparams.ec_resampling != 2 && + cparams.ec_resampling != 4 && cparams.ec_resampling != 8) { + return JXL_FAILURE("Invalid ec_resampling factor"); + } + // Resized frames. + if (frame_info.frame_type != FrameType::kDCFrame) { + frame_header->frame_origin = ib.origin; + size_t ups = 1; + if (cparams.already_downsampled) ups = cparams.resampling; + + // TODO(lode): this is not correct in case of odd original image sizes in + // combination with cparams.already_downsampled. Likely these values should + // be set to respectively frame_header->default_xsize() and + // frame_header->default_ysize() instead, the original (non downsampled) + // intended decoded image dimensions. But it may be more subtle than that + // if combined with crop. This issue causes custom_size_or_origin to be + // incorrectly set to true in case of already_downsampled with odd output + // image size when no cropping is used. + frame_header->frame_size.xsize = ib.xsize() * ups; + frame_header->frame_size.ysize = ib.ysize() * ups; + if (ib.origin.x0 != 0 || ib.origin.y0 != 0 || + frame_header->frame_size.xsize != frame_header->default_xsize() || + frame_header->frame_size.ysize != frame_header->default_ysize()) { + frame_header->custom_size_or_origin = true; + } + } + // Upsampling. + frame_header->upsampling = cparams.resampling; + const std::vector& extra_channels = + frame_header->nonserialized_metadata->m.extra_channel_info; + frame_header->extra_channel_upsampling.clear(); + frame_header->extra_channel_upsampling.resize(extra_channels.size(), + cparams.ec_resampling); + frame_header->save_as_reference = frame_info.save_as_reference; + + // Set blending-related information. + if (ib.blend || frame_header->custom_size_or_origin) { + // Set blend_channel to the first alpha channel. These values are only + // encoded in case a blend mode involving alpha is used and there are more + // than one extra channels. + size_t index = 0; + if (frame_info.alpha_channel == -1) { + if (extra_channels.size() > 1) { + for (size_t i = 0; i < extra_channels.size(); i++) { + if (extra_channels[i].type == ExtraChannel::kAlpha) { + index = i; + break; + } + } + } + } else { + index = static_cast(frame_info.alpha_channel); + JXL_ASSERT(index == 0 || index < extra_channels.size()); + } + frame_header->blending_info.alpha_channel = index; + frame_header->blending_info.mode = + ib.blend ? ib.blendmode : BlendMode::kReplace; + frame_header->blending_info.source = frame_info.source; + frame_header->blending_info.clamp = frame_info.clamp; + const auto& extra_channel_info = frame_info.extra_channel_blending_info; + for (size_t i = 0; i < extra_channels.size(); i++) { + if (i < extra_channel_info.size()) { + frame_header->extra_channel_blending_info[i] = extra_channel_info[i]; + } else { + frame_header->extra_channel_blending_info[i].alpha_channel = index; + BlendMode default_blend = ib.blendmode; + if (extra_channels[i].type != ExtraChannel::kBlack && i != index) { + // K needs to be blended, spot colors and other stuff gets added + default_blend = BlendMode::kAdd; + } + frame_header->extra_channel_blending_info[i].mode = + ib.blend ? default_blend : BlendMode::kReplace; + frame_header->extra_channel_blending_info[i].source = 1; + } + } + } + + frame_header->animation_frame.duration = ib.duration; + frame_header->animation_frame.timecode = ib.timecode; + + return true; +} + +// Invisible (alpha = 0) pixels tend to be a mess in optimized PNGs. +// Since they have no visual impact whatsoever, we can replace them with +// something that compresses better and reduces artifacts near the edges. This +// does some kind of smooth stuff that seems to work. +// Replace invisible pixels with a weighted average of the pixel to the left, +// the pixel to the topright, and non-invisible neighbours. +// Produces downward-blurry smears, with in the upwards direction only a 1px +// edge duplication but not more. It would probably be better to smear in all +// directions. That requires an alpha-weighed convolution with a large enough +// kernel though, which might be overkill... +void SimplifyInvisible(Image3F* image, const ImageF& alpha, bool lossless) { + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < image->ysize(); ++y) { + float* JXL_RESTRICT row = image->PlaneRow(c, y); + const float* JXL_RESTRICT prow = + (y > 0 ? image->PlaneRow(c, y - 1) : nullptr); + const float* JXL_RESTRICT nrow = + (y + 1 < image->ysize() ? image->PlaneRow(c, y + 1) : nullptr); + const float* JXL_RESTRICT a = alpha.Row(y); + const float* JXL_RESTRICT pa = (y > 0 ? alpha.Row(y - 1) : nullptr); + const float* JXL_RESTRICT na = + (y + 1 < image->ysize() ? alpha.Row(y + 1) : nullptr); + for (size_t x = 0; x < image->xsize(); ++x) { + if (a[x] == 0) { + if (lossless) { + row[x] = 0; + continue; + } + float d = 0.f; + row[x] = 0; + if (x > 0) { + row[x] += row[x - 1]; + d++; + if (a[x - 1] > 0.f) { + row[x] += row[x - 1]; + d++; + } + } + if (x + 1 < image->xsize()) { + if (y > 0) { + row[x] += prow[x + 1]; + d++; + } + if (a[x + 1] > 0.f) { + row[x] += 2.f * row[x + 1]; + d += 2.f; + } + if (y > 0 && pa[x + 1] > 0.f) { + row[x] += 2.f * prow[x + 1]; + d += 2.f; + } + if (y + 1 < image->ysize() && na[x + 1] > 0.f) { + row[x] += 2.f * nrow[x + 1]; + d += 2.f; + } + } + if (y > 0 && pa[x] > 0.f) { + row[x] += 2.f * prow[x]; + d += 2.f; + } + if (y + 1 < image->ysize() && na[x] > 0.f) { + row[x] += 2.f * nrow[x]; + d += 2.f; + } + if (d > 1.f) row[x] /= d; + } + } + } + } +} + +struct PixelStatsForChromacityAdjustment { + float dx = 0; + float db = 0; + float exposed_blue = 0; + float CalcPlane(const ImageF* JXL_RESTRICT plane) const { + float xmax = 0; + float ymax = 0; + for (size_t ty = 1; ty < plane->ysize(); ++ty) { + for (size_t tx = 1; tx < plane->xsize(); ++tx) { + float cur = plane->Row(ty)[tx]; + float prev_row = plane->Row(ty - 1)[tx]; + float prev = plane->Row(ty)[tx - 1]; + xmax = std::max(xmax, std::abs(cur - prev)); + ymax = std::max(ymax, std::abs(cur - prev_row)); + } + } + return std::max(xmax, ymax); + } + void CalcExposedBlue(const ImageF* JXL_RESTRICT plane_y, + const ImageF* JXL_RESTRICT plane_b) { + float eb = 0; + float xmax = 0; + float ymax = 0; + for (size_t ty = 1; ty < plane_y->ysize(); ++ty) { + for (size_t tx = 1; tx < plane_y->xsize(); ++tx) { + float cur_y = plane_y->Row(ty)[tx]; + float cur_b = plane_b->Row(ty)[tx]; + float exposed_b = cur_b - cur_y * 1.2; + float diff_b = cur_b - cur_y; + float prev_row = plane_b->Row(ty - 1)[tx]; + float prev = plane_b->Row(ty)[tx - 1]; + float diff_prev_row = prev_row - plane_y->Row(ty - 1)[tx]; + float diff_prev = prev - plane_y->Row(ty)[tx - 1]; + xmax = std::max(xmax, std::abs(diff_b - diff_prev)); + ymax = std::max(ymax, std::abs(diff_b - diff_prev_row)); + if (exposed_b >= 0) { + exposed_b *= fabs(cur_b - prev) + fabs(cur_b - prev_row); + eb = std::max(eb, exposed_b); + } + } + } + exposed_blue = eb; + db = std::max(xmax, ymax); + } + void Calc(const Image3F* JXL_RESTRICT opsin) { + dx = CalcPlane(&opsin->Plane(0)); + CalcExposedBlue(&opsin->Plane(1), &opsin->Plane(2)); + } + int HowMuchIsXChannelPixelized() { + if (dx >= 0.03) { + return 2; + } + if (dx >= 0.017) { + return 1; + } + return 0; + } + int HowMuchIsBChannelPixelized() { + int add = exposed_blue >= 0.13 ? 1 : 0; + if (db > 0.38) { + return 2 + add; + } + if (db > 0.33) { + return 1 + add; + } + if (db > 0.28) { + return add; + } + return 0; + } +}; + +} // namespace + +class LossyFrameEncoder { + public: + LossyFrameEncoder(const CompressParams& cparams, + const FrameHeader& frame_header, + PassesEncoderState* JXL_RESTRICT enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out) + : enc_state_(enc_state), cms_(cms), pool_(pool), aux_out_(aux_out) { + JXL_CHECK(InitializePassesSharedState(frame_header, &enc_state_->shared, + /*encoder=*/true)); + enc_state_->cparams = cparams; + enc_state_->passes.clear(); + } + + Status ComputeEncodingData(const ImageBundle* linear, + Image3F* JXL_RESTRICT opsin, + const JxlCmsInterface& cms, ThreadPool* pool, + ModularFrameEncoder* modular_frame_encoder, + FrameHeader* frame_header) { + JXL_ASSERT((opsin->xsize() % kBlockDim) == 0 && + (opsin->ysize() % kBlockDim) == 0); + PassesSharedState& shared = enc_state_->shared; + + if (!enc_state_->cparams.max_error_mode) { + // Compute chromacity adjustments using two approaches. + // 1) Distance based approach for chromacity adjustment: + float x_qm_scale_steps[4] = {1.25f, 7.0f, 15.0f, 24.0f}; + shared.frame_header.x_qm_scale = 2; + for (float x_qm_scale_step : x_qm_scale_steps) { + if (enc_state_->cparams.original_butteraugli_distance > + x_qm_scale_step) { + shared.frame_header.x_qm_scale++; + } + } + if (enc_state_->cparams.butteraugli_distance < 0.299f) { + // Favor chromacity preservation for making images appear more + // faithful to original even with extreme (5-10x) zooming. + shared.frame_header.x_qm_scale++; + } + // 2) Pixel-based approach for chromacity adjustment: + // look at the individual pixels and make a guess how difficult + // the image would be based on the worst case pixel. + PixelStatsForChromacityAdjustment pixel_stats; + if (enc_state_->cparams.speed_tier <= SpeedTier::kWombat) { + pixel_stats.Calc(opsin); + } + // For X take the most severe adjustment. + shared.frame_header.x_qm_scale = + std::max(shared.frame_header.x_qm_scale, + 2 + pixel_stats.HowMuchIsXChannelPixelized()); + // B only ajudsted by pixel-based approach. + shared.frame_header.b_qm_scale = + 2 + pixel_stats.HowMuchIsBChannelPixelized(); + } + + JXL_RETURN_IF_ERROR(enc_state_->heuristics->LossyFrameHeuristics( + enc_state_, modular_frame_encoder, linear, opsin, cms_, pool_, + aux_out_)); + + JXL_RETURN_IF_ERROR(InitializePassesEncoder( + *opsin, cms, pool_, enc_state_, modular_frame_encoder, aux_out_)); + + enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses()); + for (PassesEncoderState::PassData& pass : enc_state_->passes) { + pass.ac_tokens.resize(shared.frame_dim.num_groups); + } + + ComputeAllCoeffOrders(shared.frame_dim); + shared.num_histograms = 1; + + const auto tokenize_group_init = [&](const size_t num_threads) { + group_caches_.resize(num_threads); + return true; + }; + const auto tokenize_group = [&](const uint32_t group_index, + const size_t thread) { + // Tokenize coefficients. + const Rect rect = shared.BlockGroupRect(group_index); + for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); + idx_pass++) { + JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32); + const int32_t* JXL_RESTRICT ac_rows[3] = { + enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32, + enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32, + enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32, + }; + // Ensure group cache is initialized. + group_caches_[thread].InitOnce(); + TokenizeCoefficients( + &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, + ac_rows, shared.ac_strategy, frame_header->chroma_subsampling, + &group_caches_[thread].num_nzeroes, + &enc_state_->passes[idx_pass].ac_tokens[group_index], + enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field, + enc_state_->shared.block_ctx_map); + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_groups, + tokenize_group_init, tokenize_group, + "TokenizeGroup")); + + *frame_header = shared.frame_header; + return true; + } + + Status ComputeJPEGTranscodingData(const jpeg::JPEGData& jpeg_data, + ModularFrameEncoder* modular_frame_encoder, + FrameHeader* frame_header) { + PassesSharedState& shared = enc_state_->shared; + + frame_header->x_qm_scale = 2; + frame_header->b_qm_scale = 2; + + FrameDimensions frame_dim = frame_header->ToFrameDimensions(); + + const size_t xsize = frame_dim.xsize_padded; + const size_t ysize = frame_dim.ysize_padded; + const size_t xsize_blocks = frame_dim.xsize_blocks; + const size_t ysize_blocks = frame_dim.ysize_blocks; + + // no-op chroma from luma + shared.cmap = ColorCorrelationMap(xsize, ysize, false); + shared.ac_strategy.FillDCT8(); + FillImage(uint8_t(0), &shared.epf_sharpness); + + enc_state_->passes.resize(enc_state_->progressive_splitter.GetNumPasses()); + for (PassesEncoderState::PassData& pass : enc_state_->passes) { + pass.ac_tokens.resize(shared.frame_dim.num_groups); + } + + enc_state_->coeffs.clear(); + while (enc_state_->coeffs.size() < enc_state_->passes.size()) { + enc_state_->coeffs.emplace_back(make_unique>( + kGroupDim * kGroupDim, frame_dim.num_groups)); + } + + // convert JPEG quantization table to a Quantizer object + float dcquantization[3]; + std::vector qe(DequantMatrices::kNum, + QuantEncoding::Library(0)); + + auto jpeg_c_map = JpegOrder(frame_header->color_transform, + jpeg_data.components.size() == 1); + + std::vector qt(192); + for (size_t c = 0; c < 3; c++) { + size_t jpeg_c = jpeg_c_map[c]; + const int32_t* quant = + jpeg_data.quant[jpeg_data.components[jpeg_c].quant_idx].values.data(); + + dcquantization[c] = 255 * 8.0f / quant[0]; + for (size_t y = 0; y < 8; y++) { + for (size_t x = 0; x < 8; x++) { + // JPEG XL transposes the DCT, JPEG doesn't. + qt[c * 64 + 8 * x + y] = quant[8 * y + x]; + } + } + } + DequantMatricesSetCustomDC(&shared.matrices, dcquantization); + float dcquantization_r[3] = {1.0f / dcquantization[0], + 1.0f / dcquantization[1], + 1.0f / dcquantization[2]}; + + qe[AcStrategy::Type::DCT] = QuantEncoding::RAW(qt); + DequantMatricesSetCustom(&shared.matrices, qe, modular_frame_encoder); + + // Ensure that InvGlobalScale() is 1. + shared.quantizer = Quantizer(&shared.matrices, 1, kGlobalScaleDenom); + // Recompute MulDC() and InvMulDC(). + shared.quantizer.RecomputeFromGlobalScale(); + + // Per-block dequant scaling should be 1. + FillImage(static_cast(shared.quantizer.InvGlobalScale()), + &shared.raw_quant_field); + + std::vector scaled_qtable(192); + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 64; i++) { + scaled_qtable[64 * c + i] = + (1 << kCFLFixedPointPrecision) * qt[64 + i] / qt[64 * c + i]; + } + } + + auto jpeg_row = [&](size_t c, size_t y) { + return jpeg_data.components[jpeg_c_map[c]].coeffs.data() + + jpeg_data.components[jpeg_c_map[c]].width_in_blocks * + kDCTBlockSize * y; + }; + + Image3F dc = Image3F(xsize_blocks, ysize_blocks); + bool DCzero = + (shared.frame_header.color_transform == ColorTransform::kYCbCr); + // Compute chroma-from-luma for AC (doesn't seem to be useful for DC) + if (frame_header->chroma_subsampling.Is444() && + enc_state_->cparams.force_cfl_jpeg_recompression && + jpeg_data.components.size() == 3) { + for (size_t c : {0, 2}) { + ImageSB* map = (c == 0 ? &shared.cmap.ytox_map : &shared.cmap.ytob_map); + const float kScale = kDefaultColorFactor; + const int kOffset = 127; + const float kBase = + c == 0 ? shared.cmap.YtoXRatio(0) : shared.cmap.YtoBRatio(0); + const float kZeroThresh = + kScale * kZeroBiasDefault[c] * + 0.9999f; // just epsilon less for better rounding + + auto process_row = [&](const uint32_t task, const size_t thread) { + size_t ty = task; + int8_t* JXL_RESTRICT row_out = map->Row(ty); + for (size_t tx = 0; tx < map->xsize(); ++tx) { + const size_t y0 = ty * kColorTileDimInBlocks; + const size_t x0 = tx * kColorTileDimInBlocks; + const size_t y1 = std::min(frame_dim.ysize_blocks, + (ty + 1) * kColorTileDimInBlocks); + const size_t x1 = std::min(frame_dim.xsize_blocks, + (tx + 1) * kColorTileDimInBlocks); + int32_t d_num_zeros[257] = {0}; + // TODO(veluca): this needs SIMD + fixed point adaptation, and/or + // conversion to the new CfL algorithm. + for (size_t y = y0; y < y1; ++y) { + const int16_t* JXL_RESTRICT row_m = jpeg_row(1, y); + const int16_t* JXL_RESTRICT row_s = jpeg_row(c, y); + for (size_t x = x0; x < x1; ++x) { + for (size_t coeffpos = 1; coeffpos < kDCTBlockSize; + coeffpos++) { + const float scaled_m = + row_m[x * kDCTBlockSize + coeffpos] * + scaled_qtable[64 * c + coeffpos] * + (1.0f / (1 << kCFLFixedPointPrecision)); + const float scaled_s = + kScale * row_s[x * kDCTBlockSize + coeffpos] + + (kOffset - kBase * kScale) * scaled_m; + if (std::abs(scaled_m) > 1e-8f) { + float from, to; + if (scaled_m > 0) { + from = (scaled_s - kZeroThresh) / scaled_m; + to = (scaled_s + kZeroThresh) / scaled_m; + } else { + from = (scaled_s + kZeroThresh) / scaled_m; + to = (scaled_s - kZeroThresh) / scaled_m; + } + if (from < 0.0f) { + from = 0.0f; + } + if (to > 255.0f) { + to = 255.0f; + } + // Instead of clamping the both values + // we just check that range is sane. + if (from <= to) { + d_num_zeros[static_cast(std::ceil(from))]++; + d_num_zeros[static_cast(std::floor(to + 1))]--; + } + } + } + } + } + int best = 0; + int32_t best_sum = 0; + FindIndexOfSumMaximum(d_num_zeros, 256, &best, &best_sum); + int32_t offset_sum = 0; + for (int i = 0; i < 256; ++i) { + if (i <= kOffset) { + offset_sum += d_num_zeros[i]; + } + } + row_out[tx] = 0; + if (best_sum > offset_sum + 1) { + row_out[tx] = best - kOffset; + } + } + }; + + JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, map->ysize(), + ThreadPool::NoInit, process_row, + "FindCorrelation")); + } + } + + if (!frame_header->chroma_subsampling.Is444()) { + ZeroFillImage(&dc); + for (auto& coeff : enc_state_->coeffs) { + coeff->ZeroFill(); + } + } + // JPEG DC is from -1024 to 1023. + std::vector dc_counts[3] = {}; + dc_counts[0].resize(2048); + dc_counts[1].resize(2048); + dc_counts[2].resize(2048); + size_t total_dc[3] = {}; + for (size_t c : {1, 0, 2}) { + if (jpeg_data.components.size() == 1 && c != 1) { + for (auto& coeff : enc_state_->coeffs) { + coeff->ZeroFillPlane(c); + } + ZeroFillImage(&dc.Plane(c)); + // Ensure no division by 0. + dc_counts[c][1024] = 1; + total_dc[c] = 1; + continue; + } + size_t hshift = frame_header->chroma_subsampling.HShift(c); + size_t vshift = frame_header->chroma_subsampling.VShift(c); + ImageSB& map = (c == 0 ? shared.cmap.ytox_map : shared.cmap.ytob_map); + for (size_t group_index = 0; group_index < frame_dim.num_groups; + group_index++) { + const size_t gx = group_index % frame_dim.xsize_groups; + const size_t gy = group_index / frame_dim.xsize_groups; + int32_t* coeffs[kMaxNumPasses]; + for (size_t i = 0; i < enc_state_->coeffs.size(); i++) { + coeffs[i] = enc_state_->coeffs[i]->PlaneRow(c, group_index, 0).ptr32; + } + int32_t block[64]; + for (size_t by = gy * kGroupDimInBlocks; + by < ysize_blocks && by < (gy + 1) * kGroupDimInBlocks; ++by) { + if ((by >> vshift) << vshift != by) continue; + const int16_t* JXL_RESTRICT inputjpeg = jpeg_row(c, by >> vshift); + const int16_t* JXL_RESTRICT inputjpegY = jpeg_row(1, by); + float* JXL_RESTRICT fdc = dc.PlaneRow(c, by >> vshift); + const int8_t* JXL_RESTRICT cm = + map.ConstRow(by / kColorTileDimInBlocks); + for (size_t bx = gx * kGroupDimInBlocks; + bx < xsize_blocks && bx < (gx + 1) * kGroupDimInBlocks; ++bx) { + if ((bx >> hshift) << hshift != bx) continue; + size_t base = (bx >> hshift) * kDCTBlockSize; + int idc; + if (DCzero) { + idc = inputjpeg[base]; + } else { + idc = inputjpeg[base] + 1024 / qt[c * 64]; + } + dc_counts[c][std::min(static_cast(idc + 1024), + uint32_t(2047))]++; + total_dc[c]++; + fdc[bx >> hshift] = idc * dcquantization_r[c]; + if (c == 1 || !enc_state_->cparams.force_cfl_jpeg_recompression || + !frame_header->chroma_subsampling.Is444()) { + for (size_t y = 0; y < 8; y++) { + for (size_t x = 0; x < 8; x++) { + block[y * 8 + x] = inputjpeg[base + x * 8 + y]; + } + } + } else { + const int32_t scale = + shared.cmap.RatioJPEG(cm[bx / kColorTileDimInBlocks]); + + for (size_t y = 0; y < 8; y++) { + for (size_t x = 0; x < 8; x++) { + int Y = inputjpegY[kDCTBlockSize * bx + x * 8 + y]; + int QChroma = inputjpeg[kDCTBlockSize * bx + x * 8 + y]; + // Fixed-point multiply of CfL scale with quant table ratio + // first, and Y value second. + int coeff_scale = (scale * scaled_qtable[64 * c + y * 8 + x] + + (1 << (kCFLFixedPointPrecision - 1))) >> + kCFLFixedPointPrecision; + int cfl_factor = (Y * coeff_scale + + (1 << (kCFLFixedPointPrecision - 1))) >> + kCFLFixedPointPrecision; + int QCR = QChroma - cfl_factor; + block[y * 8 + x] = QCR; + } + } + } + enc_state_->progressive_splitter.SplitACCoefficients( + block, AcStrategy::FromRawStrategy(AcStrategy::Type::DCT), bx, + by, coeffs); + for (size_t i = 0; i < enc_state_->coeffs.size(); i++) { + coeffs[i] += kDCTBlockSize; + } + } + } + } + } + + auto& dct = enc_state_->shared.block_ctx_map.dc_thresholds; + auto& num_dc_ctxs = enc_state_->shared.block_ctx_map.num_dc_ctxs; + num_dc_ctxs = 1; + for (size_t i = 0; i < 3; i++) { + dct[i].clear(); + int num_thresholds = (CeilLog2Nonzero(total_dc[i]) - 12) / 2; + // up to 3 buckets per channel: + // dark/medium/bright, yellow/unsat/blue, green/unsat/red + num_thresholds = std::min(std::max(num_thresholds, 0), 2); + size_t cumsum = 0; + size_t cut = total_dc[i] / (num_thresholds + 1); + for (int j = 0; j < 2048; j++) { + cumsum += dc_counts[i][j]; + if (cumsum > cut) { + dct[i].push_back(j - 1025); + cut = total_dc[i] * (dct[i].size() + 1) / (num_thresholds + 1); + } + } + num_dc_ctxs *= dct[i].size() + 1; + } + + auto& ctx_map = enc_state_->shared.block_ctx_map.ctx_map; + ctx_map.clear(); + ctx_map.resize(3 * kNumOrders * num_dc_ctxs, 0); + + int lbuckets = (dct[1].size() + 1); + for (size_t i = 0; i < num_dc_ctxs; i++) { + // up to 9 contexts for luma + ctx_map[i] = i / lbuckets; + // up to 3 contexts for chroma + ctx_map[kNumOrders * num_dc_ctxs + i] = + ctx_map[2 * kNumOrders * num_dc_ctxs + i] = + num_dc_ctxs / lbuckets + (i % lbuckets); + } + enc_state_->shared.block_ctx_map.num_ctxs = + *std::max_element(ctx_map.begin(), ctx_map.end()) + 1; + + enc_state_->histogram_idx.resize(shared.frame_dim.num_groups); + + // disable DC frame for now + shared.frame_header.UpdateFlag(false, FrameHeader::kUseDcFrame); + auto compute_dc_coeffs = [&](const uint32_t group_index, + size_t /* thread */) { + modular_frame_encoder->AddVarDCTDC(dc, group_index, /*nl_dc=*/false, + enc_state_, /*jpeg_transcode=*/true); + modular_frame_encoder->AddACMetadata(group_index, /*jpeg_transcode=*/true, + enc_state_); + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_dc_groups, + ThreadPool::NoInit, compute_dc_coeffs, + "Compute DC coeffs")); + + // Must happen before WriteFrameHeader! + shared.frame_header.UpdateFlag(true, FrameHeader::kSkipAdaptiveDCSmoothing); + + ComputeAllCoeffOrders(frame_dim); + shared.num_histograms = 1; + + const auto tokenize_group_init = [&](const size_t num_threads) { + group_caches_.resize(num_threads); + return true; + }; + const auto tokenize_group = [&](const uint32_t group_index, + const size_t thread) { + // Tokenize coefficients. + const Rect rect = shared.BlockGroupRect(group_index); + for (size_t idx_pass = 0; idx_pass < enc_state_->passes.size(); + idx_pass++) { + JXL_ASSERT(enc_state_->coeffs[idx_pass]->Type() == ACType::k32); + const int32_t* JXL_RESTRICT ac_rows[3] = { + enc_state_->coeffs[idx_pass]->PlaneRow(0, group_index, 0).ptr32, + enc_state_->coeffs[idx_pass]->PlaneRow(1, group_index, 0).ptr32, + enc_state_->coeffs[idx_pass]->PlaneRow(2, group_index, 0).ptr32, + }; + // Ensure group cache is initialized. + group_caches_[thread].InitOnce(); + TokenizeCoefficients( + &shared.coeff_orders[idx_pass * shared.coeff_order_size], rect, + ac_rows, shared.ac_strategy, frame_header->chroma_subsampling, + &group_caches_[thread].num_nzeroes, + &enc_state_->passes[idx_pass].ac_tokens[group_index], + enc_state_->shared.quant_dc, enc_state_->shared.raw_quant_field, + enc_state_->shared.block_ctx_map); + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool_, 0, shared.frame_dim.num_groups, + tokenize_group_init, tokenize_group, + "TokenizeGroup")); + *frame_header = shared.frame_header; + doing_jpeg_recompression = true; + return true; + } + + Status EncodeGlobalDCInfo(const FrameHeader& frame_header, + BitWriter* writer) const { + // Encode quantizer DC and global scale. + QuantizerParams params = enc_state_->shared.quantizer.GetParams(); + JXL_RETURN_IF_ERROR( + WriteQuantizerParams(params, writer, kLayerQuant, aux_out_)); + EncodeBlockCtxMap(enc_state_->shared.block_ctx_map, writer, aux_out_); + ColorCorrelationMapEncodeDC(&enc_state_->shared.cmap, writer, kLayerDC, + aux_out_); + return true; + } + + Status EncodeGlobalACInfo(BitWriter* writer, + ModularFrameEncoder* modular_frame_encoder) { + JXL_RETURN_IF_ERROR(DequantMatricesEncode(&enc_state_->shared.matrices, + writer, kLayerQuant, aux_out_, + modular_frame_encoder)); + size_t num_histo_bits = + CeilLog2Nonzero(enc_state_->shared.frame_dim.num_groups); + if (num_histo_bits != 0) { + BitWriter::Allotment allotment(writer, num_histo_bits); + writer->Write(num_histo_bits, enc_state_->shared.num_histograms - 1); + allotment.ReclaimAndCharge(writer, kLayerAC, aux_out_); + } + + for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); + i++) { + // Encode coefficient orders. + size_t order_bits = 0; + JXL_RETURN_IF_ERROR(U32Coder::CanEncode( + kOrderEnc, enc_state_->used_orders[i], &order_bits)); + BitWriter::Allotment allotment(writer, order_bits); + JXL_CHECK(U32Coder::Write(kOrderEnc, enc_state_->used_orders[i], writer)); + allotment.ReclaimAndCharge(writer, kLayerOrder, aux_out_); + EncodeCoeffOrders( + enc_state_->used_orders[i], + &enc_state_->shared + .coeff_orders[i * enc_state_->shared.coeff_order_size], + writer, kLayerOrder, aux_out_); + + // Encode histograms. + HistogramParams hist_params( + enc_state_->cparams.speed_tier, + enc_state_->shared.block_ctx_map.NumACContexts()); + if (enc_state_->cparams.speed_tier > SpeedTier::kTortoise) { + hist_params.lz77_method = HistogramParams::LZ77Method::kNone; + } + if (enc_state_->cparams.decoding_speed_tier >= 1) { + hist_params.max_histograms = 6; + } + BuildAndEncodeHistograms( + hist_params, + enc_state_->shared.num_histograms * + enc_state_->shared.block_ctx_map.NumACContexts(), + enc_state_->passes[i].ac_tokens, &enc_state_->passes[i].codes, + &enc_state_->passes[i].context_map, writer, kLayerAC, aux_out_); + } + + return true; + } + + Status EncodeACGroup(size_t pass, size_t group_index, BitWriter* group_code, + AuxOut* local_aux_out) { + return EncodeGroupTokenizedCoefficients( + group_index, pass, enc_state_->histogram_idx[group_index], *enc_state_, + group_code, local_aux_out); + } + + PassesEncoderState* State() { return enc_state_; } + + private: + void ComputeAllCoeffOrders(const FrameDimensions& frame_dim) { + // No coefficient reordering in Falcon or faster. + auto used_orders_info = ComputeUsedOrders( + enc_state_->cparams.speed_tier, enc_state_->shared.ac_strategy, + Rect(enc_state_->shared.raw_quant_field)); + enc_state_->used_orders.clear(); + enc_state_->used_orders.resize( + enc_state_->progressive_splitter.GetNumPasses(), + used_orders_info.second); + for (size_t i = 0; i < enc_state_->progressive_splitter.GetNumPasses(); + i++) { + ComputeCoeffOrder( + enc_state_->cparams.speed_tier, *enc_state_->coeffs[i], + enc_state_->shared.ac_strategy, frame_dim, enc_state_->used_orders[i], + used_orders_info.first, + &enc_state_->shared + .coeff_orders[i * enc_state_->shared.coeff_order_size]); + } + } + + template + static inline void FindIndexOfSumMaximum(const V* array, const size_t len, + R* idx, V* sum) { + JXL_ASSERT(len > 0); + V maxval = 0; + V val = 0; + R maxidx = 0; + for (size_t i = 0; i < len; ++i) { + val += array[i]; + if (val > maxval) { + maxval = val; + maxidx = i; + } + } + *idx = maxidx; + *sum = maxval; + } + + PassesEncoderState* JXL_RESTRICT enc_state_; + JxlCmsInterface cms_; + ThreadPool* pool_; + AuxOut* aux_out_; + std::vector group_caches_; + bool doing_jpeg_recompression = false; +}; + +Status ParamsPostInit(CompressParams* p) { + if (!p->manual_noise.empty() && + p->manual_noise.size() != NoiseParams::kNumNoisePoints) { + return JXL_FAILURE("Invalid number of noise lut entries"); + } + if (!p->manual_xyb_factors.empty() && p->manual_xyb_factors.size() != 3) { + return JXL_FAILURE("Invalid number of XYB quantization factors"); + } + if (!p->modular_mode && p->butteraugli_distance == 0.0) { + p->butteraugli_distance = kMinButteraugliDistance; + } + if (p->original_butteraugli_distance == -1.0) { + p->original_butteraugli_distance = p->butteraugli_distance; + } + if (p->resampling <= 0) { + p->resampling = 1; + // For very low bit rates, using 2x2 resampling gives better results on + // most photographic images, with an adjusted butteraugli score chosen to + // give roughly the same amount of bits per pixel. + if (!p->already_downsampled && p->butteraugli_distance >= 20) { + p->resampling = 2; + p->butteraugli_distance = 6 + ((p->butteraugli_distance - 20) * 0.25); + } + } + if (p->ec_resampling <= 0) { + p->ec_resampling = p->resampling; + } + return true; +} + +Status EncodeFrame(const CompressParams& cparams_orig, + const FrameInfo& frame_info, const CodecMetadata* metadata, + const ImageBundle& ib, PassesEncoderState* passes_enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + BitWriter* writer, AuxOut* aux_out) { + CompressParams cparams = cparams_orig; + if (cparams.speed_tier == SpeedTier::kGlacier && !cparams.IsLossless()) { + cparams.speed_tier = SpeedTier::kTortoise; + } + if (cparams.speed_tier == SpeedTier::kGlacier) { + std::vector all_params; + std::vector size; + + CompressParams cparams_attempt = cparams_orig; + cparams_attempt.speed_tier = SpeedTier::kTortoise; + cparams_attempt.options.max_properties = 4; + + for (float x : {0.0f, 80.f}) { + cparams_attempt.channel_colors_percent = x; + for (float y : {0.0f, 95.0f}) { + cparams_attempt.channel_colors_pre_transform_percent = y; + // 70000 ensures that the number of palette colors is representable in + // modular headers. + for (int K : {0, 1 << 10, 70000}) { + cparams_attempt.palette_colors = K; + for (int tree_mode : {-1, (int)ModularOptions::TreeMode::kNoWP, + (int)ModularOptions::TreeMode::kDefault}) { + if (tree_mode == -1) { + // LZ77 only + cparams_attempt.options.nb_repeats = 0; + } else { + cparams_attempt.options.nb_repeats = 1; + cparams_attempt.options.wp_tree_mode = + static_cast(tree_mode); + } + for (Predictor pred : {Predictor::Zero, Predictor::Variable}) { + cparams_attempt.options.predictor = pred; + for (int g : {0, -1, 3}) { + cparams_attempt.modular_group_size_shift = g; + for (Override patches : {Override::kDefault, Override::kOff}) { + cparams_attempt.patches = patches; + all_params.push_back(cparams_attempt); + } + } + } + } + } + } + } + + size.resize(all_params.size()); + + std::atomic num_errors{0}; + + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, all_params.size(), ThreadPool::NoInit, + [&](size_t task, size_t) { + BitWriter w; + PassesEncoderState state; + if (!EncodeFrame(all_params[task], frame_info, metadata, ib, &state, + cms, nullptr, &w, aux_out)) { + num_errors.fetch_add(1, std::memory_order_relaxed); + return; + } + size[task] = w.BitsWritten(); + }, + "Compress kGlacier")); + JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0); + + size_t best_idx = 0; + for (size_t i = 1; i < all_params.size(); i++) { + if (size[best_idx] > size[i]) { + best_idx = i; + } + } + cparams = all_params[best_idx]; + } + + ib.VerifyMetadata(); + + passes_enc_state->special_frames.clear(); + + if (cparams.qprogressive_mode) { + passes_enc_state->progressive_splitter.SetProgressiveMode( + ProgressiveMode{progressive_passes_dc_quant_ac_full_ac}); + } else if (cparams.progressive_mode) { + passes_enc_state->progressive_splitter.SetProgressiveMode( + ProgressiveMode{progressive_passes_dc_vlf_lf_full_ac}); + } + + JXL_RETURN_IF_ERROR(ParamsPostInit(&cparams)); + + if (cparams.progressive_dc < 0) { + if (cparams.progressive_dc != -1) { + return JXL_FAILURE("Invalid progressive DC setting value (%d)", + cparams.progressive_dc); + } + cparams.progressive_dc = 0; + } + if (cparams.ec_resampling < cparams.resampling) { + cparams.ec_resampling = cparams.resampling; + } + if (cparams.resampling > 1 || frame_info.is_preview) { + cparams.progressive_dc = 0; + } + + if (frame_info.dc_level + cparams.progressive_dc > 4) { + return JXL_FAILURE("Too many levels of progressive DC"); + } + + if (cparams.butteraugli_distance != 0 && + cparams.butteraugli_distance < kMinButteraugliDistance) { + return JXL_FAILURE("Butteraugli distance is too low (%f)", + cparams.butteraugli_distance); + } + + if (ib.IsJPEG()) { + cparams.gaborish = Override::kOff; + cparams.epf = 0; + cparams.modular_mode = false; + } + + if (ib.xsize() == 0 || ib.ysize() == 0) return JXL_FAILURE("Empty image"); + + // Assert that this metadata is correctly set up for the compression params, + // this should have been done by enc_file.cc + JXL_ASSERT(metadata->m.xyb_encoded == + (cparams.color_transform == ColorTransform::kXYB)); + std::unique_ptr frame_header = + jxl::make_unique(metadata); + JXL_RETURN_IF_ERROR(MakeFrameHeader(cparams, + passes_enc_state->progressive_splitter, + frame_info, ib, frame_header.get())); + // Check that if the codestream header says xyb_encoded, the color_transform + // matches the requirement. This is checked from the cparams here, even though + // optimally we'd be able to check this against what has actually been written + // in the main codestream header, but since ib is a const object and the data + // written to the main codestream header is (in modified form) in ib, the + // encoder cannot indicate this fact in the ib's metadata. + if (cparams_orig.color_transform == ColorTransform::kXYB) { + if (frame_header->color_transform != ColorTransform::kXYB) { + return JXL_FAILURE( + "The color transform of frames must be xyb if the codestream is xyb " + "encoded"); + } + } else { + if (frame_header->color_transform == ColorTransform::kXYB) { + return JXL_FAILURE( + "The color transform of frames cannot be xyb if the codestream is " + "not xyb encoded"); + } + } + + FrameDimensions frame_dim = frame_header->ToFrameDimensions(); + + const size_t num_groups = frame_dim.num_groups; + + Image3F opsin; + const ColorEncoding& c_linear = ColorEncoding::LinearSRGB(ib.IsGray()); + std::unique_ptr metadata_linear = + jxl::make_unique(); + metadata_linear->xyb_encoded = + (cparams.color_transform == ColorTransform::kXYB); + metadata_linear->color_encoding = c_linear; + ImageBundle linear_storage(metadata_linear.get()); + + std::vector aux_outs; + // LossyFrameEncoder stores a reference to a std::function + // so we need to keep the std::function being referenced + // alive while lossy_frame_encoder is used. We could make resize_aux_outs a + // lambda type by making LossyFrameEncoder a template instead, but this is + // simpler. + const std::function resize_aux_outs = + [&aux_outs, aux_out](const size_t num_threads) -> Status { + if (aux_out != nullptr) { + size_t old_size = aux_outs.size(); + for (size_t i = num_threads; i < old_size; i++) { + aux_out->Assimilate(aux_outs[i]); + } + aux_outs.resize(num_threads); + } + return true; + }; + + LossyFrameEncoder lossy_frame_encoder(cparams, *frame_header, + passes_enc_state, cms, pool, aux_out); + std::unique_ptr modular_frame_encoder = + jxl::make_unique(*frame_header, cparams); + + const std::vector* extra_channels = &ib.extra_channels(); + std::vector extra_channels_storage; + // Clear patches + passes_enc_state->shared.image_features.patches = PatchDictionary(); + passes_enc_state->shared.image_features.patches.SetPassesSharedState( + &passes_enc_state->shared); + + if (ib.IsJPEG()) { + JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeJPEGTranscodingData( + *ib.jpeg_data, modular_frame_encoder.get(), frame_header.get())); + } else if (!lossy_frame_encoder.State()->heuristics->HandlesColorConversion( + cparams, ib) || + frame_header->encoding != FrameEncoding::kVarDCT) { + // Allocating a large enough image avoids a copy when padding. + opsin = + Image3F(RoundUpToBlockDim(ib.xsize()), RoundUpToBlockDim(ib.ysize())); + opsin.ShrinkTo(ib.xsize(), ib.ysize()); + + const bool want_linear = frame_header->encoding == FrameEncoding::kVarDCT && + cparams.speed_tier <= SpeedTier::kKitten; + const ImageBundle* JXL_RESTRICT ib_or_linear = &ib; + + if (frame_header->color_transform == ColorTransform::kXYB && + frame_info.ib_needs_color_transform) { + // linear_storage would only be used by the Butteraugli loop (passing + // linear sRGB avoids a color conversion there). Otherwise, don't + // fill it to reduce memory usage. + ib_or_linear = + ToXYB(ib, pool, &opsin, cms, want_linear ? &linear_storage : nullptr); + } else { // RGB or YCbCr: don't do anything (forward YCbCr is not + // implemented, this is only used when the input is already in + // YCbCr) + // If encoding a special DC or reference frame, don't do anything: + // input is already in XYB. + CopyImageTo(ib.color(), &opsin); + } + bool lossless = cparams.IsLossless(); + if (ib.HasAlpha() && !ib.AlphaIsPremultiplied() && + frame_header->frame_type == FrameType::kRegularFrame && + !ApplyOverride(cparams.keep_invisible, lossless) && + cparams.ec_resampling == cparams.resampling) { + // simplify invisible pixels + SimplifyInvisible(&opsin, ib.alpha(), lossless); + if (want_linear) { + SimplifyInvisible(const_cast(&ib_or_linear->color()), + ib.alpha(), lossless); + } + } + if (frame_header->encoding == FrameEncoding::kVarDCT) { + PadImageToBlockMultipleInPlace(&opsin); + JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData( + ib_or_linear, &opsin, cms, pool, modular_frame_encoder.get(), + frame_header.get())); + } else if (frame_header->upsampling != 1 && !cparams.already_downsampled) { + // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling + // after noise, if necessary. + DownsampleImage(&opsin, frame_header->upsampling); + } + } else { + JXL_RETURN_IF_ERROR(lossy_frame_encoder.ComputeEncodingData( + &ib, &opsin, cms, pool, modular_frame_encoder.get(), + frame_header.get())); + } + if (cparams.ec_resampling != 1 && !cparams.already_downsampled) { + extra_channels = &extra_channels_storage; + for (const ImageF& ec : ib.extra_channels()) { + ImageF d_ec(ec.xsize(), ec.ysize()); + CopyImageTo(ec, &d_ec); + DownsampleImage(&d_ec, cparams.ec_resampling); + extra_channels_storage.emplace_back(std::move(d_ec)); + } + } + // needs to happen *AFTER* VarDCT-ComputeEncodingData. + JXL_RETURN_IF_ERROR(modular_frame_encoder->ComputeEncodingData( + *frame_header, *ib.metadata(), &opsin, *extra_channels, + lossy_frame_encoder.State(), cms, pool, aux_out, + /* do_color=*/frame_header->encoding == FrameEncoding::kModular)); + + writer->AppendByteAligned(lossy_frame_encoder.State()->special_frames); + frame_header->UpdateFlag( + lossy_frame_encoder.State()->shared.image_features.patches.HasAny(), + FrameHeader::kPatches); + frame_header->UpdateFlag( + lossy_frame_encoder.State()->shared.image_features.splines.HasAny(), + FrameHeader::kSplines); + JXL_RETURN_IF_ERROR(WriteFrameHeader(*frame_header, writer, aux_out)); + + const size_t num_passes = + passes_enc_state->progressive_splitter.GetNumPasses(); + + // DC global info + DC groups + AC global info + AC groups * + // num_passes. + const bool has_ac_global = true; + std::vector group_codes(NumTocEntries(frame_dim.num_groups, + frame_dim.num_dc_groups, + num_passes, has_ac_global)); + const size_t global_ac_index = frame_dim.num_dc_groups + 1; + const bool is_small_image = frame_dim.num_groups == 1 && num_passes == 1; + const auto get_output = [&](const size_t index) { + return &group_codes[is_small_image ? 0 : index]; + }; + auto ac_group_code = [&](size_t pass, size_t group) { + return get_output(AcGroupIndex(pass, group, frame_dim.num_groups, + frame_dim.num_dc_groups, has_ac_global)); + }; + + if (frame_header->flags & FrameHeader::kPatches) { + PatchDictionaryEncoder::Encode( + lossy_frame_encoder.State()->shared.image_features.patches, + get_output(0), kLayerDictionary, aux_out); + } + + if (frame_header->flags & FrameHeader::kSplines) { + EncodeSplines(lossy_frame_encoder.State()->shared.image_features.splines, + get_output(0), kLayerSplines, HistogramParams(), aux_out); + } + + if (cparams.photon_noise_iso > 0) { + lossy_frame_encoder.State()->shared.image_features.noise_params = + SimulatePhotonNoise(ib.xsize(), ib.ysize(), cparams.photon_noise_iso); + } + if (cparams.manual_noise.size() == NoiseParams::kNumNoisePoints) { + for (size_t i = 0; i < NoiseParams::kNumNoisePoints; i++) { + lossy_frame_encoder.State()->shared.image_features.noise_params.lut[i] = + cparams.manual_noise[i]; + } + } + if (frame_header->flags & FrameHeader::kNoise) { + EncodeNoise(lossy_frame_encoder.State()->shared.image_features.noise_params, + get_output(0), kLayerNoise, aux_out); + } + + JXL_RETURN_IF_ERROR( + DequantMatricesEncodeDC(&lossy_frame_encoder.State()->shared.matrices, + get_output(0), kLayerQuant, aux_out)); + if (frame_header->encoding == FrameEncoding::kVarDCT) { + JXL_RETURN_IF_ERROR( + lossy_frame_encoder.EncodeGlobalDCInfo(*frame_header, get_output(0))); + } + JXL_RETURN_IF_ERROR( + modular_frame_encoder->EncodeGlobalInfo(get_output(0), aux_out)); + JXL_RETURN_IF_ERROR(modular_frame_encoder->EncodeStream( + get_output(0), aux_out, kLayerModularGlobal, ModularStreamId::Global())); + + const auto process_dc_group = [&](const uint32_t group_index, + const size_t thread) { + AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr; + BitWriter* output = get_output(group_index + 1); + if (frame_header->encoding == FrameEncoding::kVarDCT && + !(frame_header->flags & FrameHeader::kUseDcFrame)) { + BitWriter::Allotment allotment(output, 2); + output->Write(2, modular_frame_encoder->extra_dc_precision[group_index]); + allotment.ReclaimAndCharge(output, kLayerDC, my_aux_out); + JXL_CHECK(modular_frame_encoder->EncodeStream( + output, my_aux_out, kLayerDC, + ModularStreamId::VarDCTDC(group_index))); + } + JXL_CHECK(modular_frame_encoder->EncodeStream( + output, my_aux_out, kLayerModularDcGroup, + ModularStreamId::ModularDC(group_index))); + if (frame_header->encoding == FrameEncoding::kVarDCT) { + const Rect& rect = + lossy_frame_encoder.State()->shared.DCGroupRect(group_index); + size_t nb_bits = CeilLog2Nonzero(rect.xsize() * rect.ysize()); + if (nb_bits != 0) { + BitWriter::Allotment allotment(output, nb_bits); + output->Write(nb_bits, + modular_frame_encoder->ac_metadata_size[group_index] - 1); + allotment.ReclaimAndCharge(output, kLayerControlFields, my_aux_out); + } + JXL_CHECK(modular_frame_encoder->EncodeStream( + output, my_aux_out, kLayerControlFields, + ModularStreamId::ACMetadata(group_index))); + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, frame_dim.num_dc_groups, + resize_aux_outs, process_dc_group, + "EncodeDCGroup")); + + if (frame_header->encoding == FrameEncoding::kVarDCT) { + JXL_RETURN_IF_ERROR(lossy_frame_encoder.EncodeGlobalACInfo( + get_output(global_ac_index), modular_frame_encoder.get())); + } + + std::atomic num_errors{0}; + const auto process_group = [&](const uint32_t group_index, + const size_t thread) { + AuxOut* my_aux_out = aux_out ? &aux_outs[thread] : nullptr; + + for (size_t i = 0; i < num_passes; i++) { + if (frame_header->encoding == FrameEncoding::kVarDCT) { + if (!lossy_frame_encoder.EncodeACGroup( + i, group_index, ac_group_code(i, group_index), my_aux_out)) { + num_errors.fetch_add(1, std::memory_order_relaxed); + return; + } + } + // Write all modular encoded data (color?, alpha, depth, extra channels) + if (!modular_frame_encoder->EncodeStream( + ac_group_code(i, group_index), my_aux_out, kLayerModularAcGroup, + ModularStreamId::ModularAC(group_index, i))) { + num_errors.fetch_add(1, std::memory_order_relaxed); + return; + } + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, num_groups, resize_aux_outs, + process_group, "EncodeGroupCoefficients")); + + // Resizing aux_outs to 0 also Assimilates the array. + static_cast(resize_aux_outs(0)); + JXL_RETURN_IF_ERROR(num_errors.load(std::memory_order_relaxed) == 0); + + for (BitWriter& bw : group_codes) { + BitWriter::Allotment allotment(&bw, 8); + bw.ZeroPadToByte(); // end of group. + allotment.ReclaimAndCharge(&bw, kLayerAC, aux_out); + } + + std::vector* permutation_ptr = nullptr; + std::vector permutation; + if (cparams.centerfirst && !(num_passes == 1 && num_groups == 1)) { + permutation_ptr = &permutation; + // Don't permute global DC/AC or DC. + permutation.resize(global_ac_index + 1); + std::iota(permutation.begin(), permutation.end(), 0); + std::vector ac_group_order(num_groups); + std::iota(ac_group_order.begin(), ac_group_order.end(), 0); + size_t group_dim = frame_dim.group_dim; + + // The center of the image is either given by parameters or chosen + // to be the middle of the image by default if center_x, center_y resp. + // are not provided. + + int64_t imag_cx; + if (cparams.center_x != static_cast(-1)) { + JXL_RETURN_IF_ERROR(cparams.center_x < ib.xsize()); + imag_cx = cparams.center_x; + } else { + imag_cx = ib.xsize() / 2; + } + + int64_t imag_cy; + if (cparams.center_y != static_cast(-1)) { + JXL_RETURN_IF_ERROR(cparams.center_y < ib.ysize()); + imag_cy = cparams.center_y; + } else { + imag_cy = ib.ysize() / 2; + } + + // The center of the group containing the center of the image. + int64_t cx = (imag_cx / group_dim) * group_dim + group_dim / 2; + int64_t cy = (imag_cy / group_dim) * group_dim + group_dim / 2; + // This identifies in what area of the central group the center of the image + // lies in. + double direction = -std::atan2(imag_cy - cy, imag_cx - cx); + // This identifies the side of the central group the center of the image + // lies closest to. This can take values 0, 1, 2, 3 corresponding to left, + // bottom, right, top. + int64_t side = std::fmod((direction + 5 * kPi / 4), 2 * kPi) * 2 / kPi; + auto get_distance_from_center = [&](size_t gid) { + Rect r = passes_enc_state->shared.GroupRect(gid); + int64_t gcx = r.x0() + group_dim / 2; + int64_t gcy = r.y0() + group_dim / 2; + int64_t dx = gcx - cx; + int64_t dy = gcy - cy; + // The angle is determined by taking atan2 and adding an appropriate + // starting point depending on the side we want to start on. + double angle = std::remainder( + std::atan2(dy, dx) + kPi / 4 + side * (kPi / 2), 2 * kPi); + // Concentric squares in clockwise order. + return std::make_pair(std::max(std::abs(dx), std::abs(dy)), angle); + }; + std::sort(ac_group_order.begin(), ac_group_order.end(), + [&](coeff_order_t a, coeff_order_t b) { + return get_distance_from_center(a) < + get_distance_from_center(b); + }); + std::vector inv_ac_group_order(ac_group_order.size(), 0); + for (size_t i = 0; i < ac_group_order.size(); i++) { + inv_ac_group_order[ac_group_order[i]] = i; + } + for (size_t i = 0; i < num_passes; i++) { + size_t pass_start = permutation.size(); + for (coeff_order_t v : inv_ac_group_order) { + permutation.push_back(pass_start + v); + } + } + std::vector new_group_codes(group_codes.size()); + for (size_t i = 0; i < permutation.size(); i++) { + new_group_codes[permutation[i]] = std::move(group_codes[i]); + } + group_codes = std::move(new_group_codes); + } + + JXL_RETURN_IF_ERROR( + WriteGroupOffsets(group_codes, permutation_ptr, writer, aux_out)); + writer->AppendByteAligned(group_codes); + + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_frame.h b/third-party/libjxl/libjxl/lib/jxl/enc_frame.h new file mode 100644 index 0000000000..b1dc637eb0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_frame.h @@ -0,0 +1,78 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_FRAME_H_ +#define LIB_JXL_ENC_FRAME_H_ + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +struct AuxOut; + +// Information needed for encoding a frame that is not contained elsewhere and +// does not belong to `cparams`. +// TODO(lode): if possible, it might be better to replace FrameInfo and several +// fields from ImageBundle (such as frame name and duration) by direct usage of +// jxl::FrameHeader itself. +struct FrameInfo { + // TODO(veluca): consider adding more parameters, such as custom patches. + bool save_before_color_transform = false; + // Whether or not the input image bundle is already in the codestream + // colorspace (as deduced by cparams). + // TODO(veluca): this is a hack - ImageBundle doesn't have a simple way to say + // "this is already in XYB". + bool ib_needs_color_transform = true; + FrameType frame_type = FrameType::kRegularFrame; + size_t dc_level = 0; + // Only used for kRegularFrame. + bool is_last = true; + bool is_preview = false; + // Information for storing this frame for future use (only for non-DC frames). + size_t save_as_reference = 0; + // The source frame for blending of a next frame, matching the + // save_as_reference value of a previous frame. Animated frames can use + // save_as_reference values 1, 2 and 3, while composite still frames can use + // save_as_reference values 0, 1, 2 and 3. The current C++ encoder + // implementation is assuming and using 1 for all frames of animations, so + // using that as the default value here. + // Corresponds to BlendingInfo::source from the FrameHeader. + size_t source = 1; + // Corresponds to BlendingInfo::clamp from the FrameHeader. + size_t clamp = 1; + // Corresponds to BlendingInfo::alpha_channel from the FrameHeader, or set to + // -1 to automatically choose it as the index of the first extra channel of + // type alpha. + int alpha_channel = -1; + + // If non-empty, uses this blending info for the extra channels, otherwise + // automatically chooses it. The encoder API will fill this vector with the + // extra channel info and allows more options. The non-API cjxl leaves it + // empty and relies on the default behavior. + std::vector extra_channel_blending_info; +}; + +// Checks and adjusts CompressParams when they are all initialized. +Status ParamsPostInit(CompressParams* p); + +// Encodes a single frame (including its header) into a byte stream. Groups may +// be processed in parallel by `pool`. metadata is the ImageMetadata encoded in +// the codestream, and must be used for the FrameHeaders, do not use +// ib.metadata. +Status EncodeFrame(const CompressParams& cparams_orig, + const FrameInfo& frame_info, const CodecMetadata* metadata, + const ImageBundle& ib, PassesEncoderState* passes_enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + BitWriter* writer, AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_FRAME_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.cc b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.cc new file mode 100644 index 0000000000..32914a0dff --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.cc @@ -0,0 +1,62 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_gaborish.h" + +#include + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool) { + WeightsSymmetric5 weights[3]; + // Only an approximation. One or even two 3x3, and rank-1 (separable) 5x5 + // are insufficient. The numbers here have been obtained by butteraugli + // based optimizing the whole system and the errors produced are likely + // more favorable for good rate-distortion compromises rather than + // just using mathematical optimization to find the inverse. + static const float kGaborish[5] = { + -0.090881924078487886f, -0.043663953593472138f, 0.01392497846646211f, + 0.0036189602184591141f, 0.0030557936884763499f}; + for (int i = 0; i < 3; ++i) { + double sum = 1.0 + mul[i] * 4 * + (kGaborish[0] + kGaborish[1] + kGaborish[2] + + kGaborish[4] + 2 * kGaborish[3]); + if (sum < 1e-5) { + sum = 1e-5; + } + const float normalize = static_cast(1.0 / sum); + const float normalize_mul = mul[i] * normalize; + weights[i] = WeightsSymmetric5{{HWY_REP4(normalize)}, + {HWY_REP4(normalize_mul * kGaborish[0])}, + {HWY_REP4(normalize_mul * kGaborish[2])}, + {HWY_REP4(normalize_mul * kGaborish[1])}, + {HWY_REP4(normalize_mul * kGaborish[4])}, + {HWY_REP4(normalize_mul * kGaborish[3])}}; + } + // Reduce memory footprint by only allocating a single plane and swapping it + // into the output Image3F. Better still would be tiling. + // Note that we cannot *allocate* a plane, as doing so might cause Image3F to + // have planes of different stride. Instead, we copy one plane in a temporary + // image and reuse the existing planes of the in/out image. + ImageF temp(in_out->Plane(2).xsize(), in_out->Plane(2).ysize()); + CopyImageTo(in_out->Plane(2), &temp); + Symmetric5(in_out->Plane(0), Rect(*in_out), weights[0], pool, + &in_out->Plane(2)); + Symmetric5(in_out->Plane(1), Rect(*in_out), weights[1], pool, + &in_out->Plane(0)); + Symmetric5(temp, Rect(*in_out), weights[2], pool, &in_out->Plane(1)); + // Now planes are 1, 2, 0. + in_out->Plane(0).Swap(in_out->Plane(1)); + // 2 1 0 + in_out->Plane(0).Swap(in_out->Plane(2)); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.h b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.h new file mode 100644 index 0000000000..102064f9a2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_GABORISH_H_ +#define LIB_JXL_GABORISH_H_ + +// Linear smoothing (3x3 convolution) for deblocking without too much blur. + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/image.h" + +namespace jxl { + +// Used in encoder to reduce the impact of the decoder's smoothing. +// This is not exact. Works in-place to reduce memory use. +// The input is typically in XYB space. +void GaborishInverse(Image3F* in_out, float mul[3], ThreadPool* pool); + +} // namespace jxl + +#endif // LIB_JXL_GABORISH_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gaborish_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish_test.cc new file mode 100644 index 0000000000..57a18e3338 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_gaborish_test.cc @@ -0,0 +1,77 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_gaborish.h" + +#include + +#include "lib/jxl/convolve.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +// weight1,2 need not be normalized. +WeightsSymmetric3 GaborishKernel(float weight1, float weight2) { + constexpr float weight0 = 1.0f; + + // Normalize + const float mul = 1.0f / (weight0 + 4 * (weight1 + weight2)); + const float w0 = weight0 * mul; + const float w1 = weight1 * mul; + const float w2 = weight2 * mul; + + const WeightsSymmetric3 w = {{HWY_REP4(w0)}, {HWY_REP4(w1)}, {HWY_REP4(w2)}}; + return w; +} + +void ConvolveGaborish(const ImageF& in, float weight1, float weight2, + ThreadPool* pool, ImageF* JXL_RESTRICT out) { + JXL_CHECK(SameSize(in, *out)); + Symmetric3(in, Rect(in), GaborishKernel(weight1, weight2), pool, out); +} + +void TestRoundTrip(const Image3F& in, float max_l1) { + Image3F fwd(in.xsize(), in.ysize()); + ThreadPool* null_pool = nullptr; + ConvolveGaborish(in.Plane(0), 0, 0, null_pool, &fwd.Plane(0)); + ConvolveGaborish(in.Plane(1), 0, 0, null_pool, &fwd.Plane(1)); + ConvolveGaborish(in.Plane(2), 0, 0, null_pool, &fwd.Plane(2)); + float w = 0.92718927264540152f; + float weights[3] = { + w, + w, + w, + }; + GaborishInverse(&fwd, weights, null_pool); + JXL_ASSERT_OK(VerifyRelativeError(in, fwd, max_l1, 1E-4f, _)); +} + +TEST(GaborishTest, TestZero) { + Image3F in(20, 20); + ZeroFillImage(&in); + TestRoundTrip(in, 0.0f); +} + +// Disabled: large difference. +#if 0 +TEST(GaborishTest, TestDirac) { + Image3F in(20, 20); + ZeroFillImage(&in); + in.PlaneRow(1, 10)[10] = 10.0f; + TestRoundTrip(in, 0.26f); +} +#endif + +TEST(GaborishTest, TestFlat) { + Image3F in(20, 20); + FillImage(1.0f, &in); + TestRoundTrip(in, 1E-5f); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_gamma_correct.h b/third-party/libjxl/libjxl/lib/jxl/enc_gamma_correct.h new file mode 100644 index 0000000000..0db7012bbe --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_gamma_correct.h @@ -0,0 +1,36 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_GAMMA_CORRECT_H_ +#define LIB_JXL_ENC_GAMMA_CORRECT_H_ + +// Deprecated: sRGB transfer function. Use color_management.h instead. + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/transfer_functions-inl.h" + +namespace jxl { + +// Values are in [0, 1]. +static JXL_INLINE double Srgb8ToLinearDirect(double srgb) { + if (srgb <= 0.0) return 0.0; + if (srgb <= 0.04045) return srgb / 12.92; + if (srgb >= 1.0) return 1.0; + return std::pow((srgb + 0.055) / 1.055, 2.4); +} + +// Values are in [0, 1]. +static JXL_INLINE double LinearToSrgb8Direct(double linear) { + if (linear <= 0.0) return 0.0; + if (linear >= 1.0) return 1.0; + if (linear <= 0.0031308) return linear * 12.92; + return std::pow(linear, 1.0 / 2.4) * 1.055 - 0.055; +} + +} // namespace jxl + +#endif // LIB_JXL_ENC_GAMMA_CORRECT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_group.cc b/third-party/libjxl/libjxl/lib/jxl/enc_group.cc new file mode 100644 index 0000000000..9ff5f5526f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_group.cc @@ -0,0 +1,515 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_group.h" + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_group.cc" +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/dec_transforms-inl.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_transforms-inl.h" +#include "lib/jxl/image.h" +#include "lib/jxl/quantizer-inl.h" +#include "lib/jxl/quantizer.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Ge; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::MaskFromVec; +using hwy::HWY_NAMESPACE::Round; + +// NOTE: caller takes care of extracting quant from rect of RawQuantField. +void QuantizeBlockAC(const Quantizer& quantizer, const bool error_diffusion, + size_t c, float qm_multiplier, size_t quant_kind, + size_t xsize, size_t ysize, float* thresholds, + const float* JXL_RESTRICT block_in, int32_t* quant, + int32_t* JXL_RESTRICT block_out) { + const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c); + float qac = quantizer.Scale() * (*quant); + // Not SIMD-ified for now. + if (c != 1 && xsize * ysize >= 4) { + for (int i = 0; i < 4; ++i) { + thresholds[i] -= 0.00744f * xsize * ysize; + if (thresholds[i] < 0.5) { + thresholds[i] = 0.5; + } + } + } + HWY_CAPPED(float, kBlockDim) df; + HWY_CAPPED(int32_t, kBlockDim) di; + HWY_CAPPED(uint32_t, kBlockDim) du; + const auto quantv = Set(df, qac * qm_multiplier); + for (size_t y = 0; y < ysize * kBlockDim; y++) { + size_t yfix = static_cast(y >= ysize * kBlockDim / 2) * 2; + const size_t off = y * kBlockDim * xsize; + for (size_t x = 0; x < xsize * kBlockDim; x += Lanes(df)) { + auto thr = Zero(df); + if (xsize == 1) { + HWY_ALIGN uint32_t kMask[kBlockDim] = {0, 0, 0, 0, ~0u, ~0u, ~0u, ~0u}; + const auto mask = MaskFromVec(BitCast(df, Load(du, kMask + x))); + thr = IfThenElse(mask, Set(df, thresholds[yfix + 1]), + Set(df, thresholds[yfix])); + } else { + // Same for all lanes in the vector. + thr = Set( + df, + thresholds[yfix + static_cast(x >= xsize * kBlockDim / 2)]); + } + const auto q = Mul(Load(df, qm + off + x), quantv); + const auto in = Load(df, block_in + off + x); + const auto val = Mul(q, in); + const auto nzero_mask = Ge(Abs(val), thr); + const auto v = ConvertTo(di, IfThenElseZero(nzero_mask, Round(val))); + Store(v, di, block_out + off + x); + } + } +} + +void AdjustQuantBlockAC(const Quantizer& quantizer, size_t c, + float qm_multiplier, size_t quant_kind, size_t xsize, + size_t ysize, float* thresholds, + const float* JXL_RESTRICT block_in, int32_t* quant) { + // No quantization adjusting for these small blocks. + // Quantization adjusting attempts to fix some known issues + // with larger blocks and on the 8x8 dct's emerging 8x8 blockiness + // when there are not many non-zeros. + constexpr size_t kPartialBlockKinds = + (1 << AcStrategy::Type::IDENTITY) | (1 << AcStrategy::Type::DCT2X2) | + (1 << AcStrategy::Type::DCT4X4) | (1 << AcStrategy::Type::DCT4X8) | + (1 << AcStrategy::Type::DCT8X4) | (1 << AcStrategy::Type::AFV0) | + (1 << AcStrategy::Type::AFV1) | (1 << AcStrategy::Type::AFV2) | + (1 << AcStrategy::Type::AFV3); + if ((1 << quant_kind) & kPartialBlockKinds) { + return; + } + + const float* JXL_RESTRICT qm = quantizer.InvDequantMatrix(quant_kind, c); + float qac = quantizer.Scale() * (*quant); + if (xsize > 1 || ysize > 1) { + for (int i = 0; i < 4; ++i) { + thresholds[i] -= Clamp1(0.003f * xsize * ysize, 0.f, 0.08f); + if (thresholds[i] < 0.54) { + thresholds[i] = 0.54; + } + } + } + float sum_of_highest_freq_row_and_column = 0; + float sum_of_error = 0; + float sum_of_vals = 0; + float hfNonZeros[4] = {}; + float hfMaxError[4] = {}; + + for (size_t y = 0; y < ysize * kBlockDim; y++) { + for (size_t x = 0; x < xsize * kBlockDim; x++) { + const size_t pos = y * kBlockDim * xsize + x; + if (x < xsize && y < ysize) { + continue; + } + const size_t hfix = (static_cast(y >= ysize * kBlockDim / 2) * 2 + + static_cast(x >= xsize * kBlockDim / 2)); + const float val = block_in[pos] * (qm[pos] * qac * qm_multiplier); + const float v = (std::abs(val) < thresholds[hfix]) ? 0 : rintf(val); + const float error = std::abs(val - v); + sum_of_error += error; + sum_of_vals += std::abs(v); + if (c == 1 && v == 0) { + if (hfMaxError[hfix] < error) { + hfMaxError[hfix] = error; + } + } + if (v != 0.0f) { + hfNonZeros[hfix] += std::abs(v); + bool in_corner = y >= 7 * ysize && x >= 7 * xsize; + bool on_border = + y == ysize * kBlockDim - 1 || x == xsize * kBlockDim - 1; + bool in_larger_corner = x >= 4 * xsize && y >= 4 * ysize; + if (in_corner || (on_border && in_larger_corner)) { + sum_of_highest_freq_row_and_column += std::abs(val); + } + } + } + } + if (c == 1 && sum_of_vals < std::max(xsize, ysize)) { + static const double kLimit[4] = { + 0.46, + 0.46, + 0.46, + 0.46, + }; + static const double kMul[4] = { + 0.9999, + 0.9999, + 0.9999, + 0.9999, + }; + const int32_t orig_quant = *quant; + int32_t new_quant = *quant; + for (int i = 1; i < 4; ++i) { + if (hfNonZeros[i] == 0.0 && hfMaxError[i] > kLimit[i]) { + new_quant = orig_quant + 1; + break; + } + } + *quant = new_quant; + if (hfNonZeros[3] == 0.0 && hfMaxError[3] > kLimit[3]) { + thresholds[3] = kMul[3] * hfMaxError[3] * new_quant / orig_quant; + } else if ((hfNonZeros[1] == 0.0 && hfMaxError[1] > kLimit[1]) || + (hfNonZeros[2] == 0.0 && hfMaxError[2] > kLimit[2])) { + thresholds[1] = kMul[1] * std::max(hfMaxError[1], hfMaxError[2]) * + new_quant / orig_quant; + thresholds[2] = thresholds[1]; + } else if (hfNonZeros[0] == 0.0 && hfMaxError[0] > kLimit[0]) { + thresholds[0] = kMul[0] * hfMaxError[0] * new_quant / orig_quant; + } + } + // Heuristic for improving accuracy of high-frequency patterns + // occurring in an environment with no medium-frequency masking + // patterns. + { + float all = + hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] + 1; + float mul[3] = {70, 30, 60}; + if (mul[c] * sum_of_highest_freq_row_and_column >= all) { + *quant += mul[c] * sum_of_highest_freq_row_and_column / all; + if (*quant >= Quantizer::kQuantMax) { + *quant = Quantizer::kQuantMax - 1; + } + } + } + if (quant_kind == AcStrategy::Type::DCT) { + // If this 8x8 block is too flat, increase the adaptive quantization level + // a bit to reduce visible block boundaries and requantize the block. + if (hfNonZeros[0] + hfNonZeros[1] + hfNonZeros[2] + hfNonZeros[3] < 11) { + *quant += 1; + if (*quant >= Quantizer::kQuantMax) { + *quant = Quantizer::kQuantMax - 1; + } + } + } + { + static const double kMul1[3][3] = { + { + 0.13289977307244785, + 0.13991489841351781, + 0.083900681804010419, + }, + { + 0.69938583107168562, + 0.19612117586770869, + 0.15307492924107463, + }, + { + 0.099160801461836312, + 0.16684944507307059, + 0.16608517854968413, + }, + }; + static const double kMul2[3][3] = { + { + 0.24773711435293466, + 0.65189637683223112, + 1.0, + }, + { + 0.46465181913392556, + 0.3142440606068525, + 0.30128806880068809, + }, + { + 0.45203398366713637, + 0.15063329382779103, + 0.067846407329923752, + }, + }; + const float kQuantNormalizer = 2.8261379721245263; + sum_of_error *= kQuantNormalizer; + sum_of_vals *= kQuantNormalizer; + if (quant_kind >= AcStrategy::Type::DCT16X16) { + int ix = 2; + if (quant_kind == AcStrategy::Type::DCT32X16 || + quant_kind == AcStrategy::Type::DCT16X32) { + ix = 1; + } else if (quant_kind == AcStrategy::Type::DCT16X16) { + ix = 0; + } + int step = + sum_of_error / (kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim + + kMul2[ix][c] * sum_of_vals); + if (step >= 2) { + step = 2; + } + if (sum_of_error > kMul1[ix][c] * xsize * ysize * kBlockDim * kBlockDim + + kMul2[ix][c] * sum_of_vals) { + *quant += step; + if (*quant >= Quantizer::kQuantMax) { + *quant = Quantizer::kQuantMax - 1; + } + } + } + } + { + // Reduce quant in highly active areas. + int32_t div = (xsize + ysize) / 2; + int32_t activity = (hfNonZeros[0] + div / 2) / div; + int32_t orig_qp_limit = std::max(4, *quant / 2); + for (int i = 1; i < 4; ++i) { + activity = std::min(activity, (hfNonZeros[i] + div / 2) / div); + } + if (activity >= 15) { + activity = 15; + } + int32_t qp = *quant - activity; + if (c == 1) { + for (int i = 1; i < 4; ++i) { + thresholds[i] += 0.01 * activity; + } + } + if (qp < orig_qp_limit) { + qp = orig_qp_limit; + } + *quant = qp; + } +} + +// NOTE: caller takes care of extracting quant from rect of RawQuantField. +void QuantizeRoundtripYBlockAC(PassesEncoderState* enc_state, const size_t size, + const Quantizer& quantizer, + const bool error_diffusion, size_t quant_kind, + size_t xsize, size_t ysize, + const float* JXL_RESTRICT biases, int32_t* quant, + float* JXL_RESTRICT inout, + int32_t* JXL_RESTRICT quantized) { + float thres_y[4] = {0.58f, 0.64f, 0.64f, 0.64f}; + { + int32_t max_quant = 0; + int quant_orig = *quant; + float val[3] = {enc_state->x_qm_multiplier, 1.0f, + enc_state->b_qm_multiplier}; + int clut[3] = {1, 0, 2}; + for (int ii = 0; ii < 3; ++ii) { + float thres[4] = {0.58f, 0.64f, 0.64f, 0.64f}; + int c = clut[ii]; + *quant = quant_orig; + AdjustQuantBlockAC(quantizer, c, val[c], quant_kind, xsize, ysize, + &thres[0], inout + c * size, quant); + // Dead zone adjustment + if (c == 1) { + for (int k = 0; k < 4; ++k) { + thres_y[k] = thres[k]; + } + } + max_quant = std::max(*quant, max_quant); + } + *quant = max_quant; + } + + QuantizeBlockAC(quantizer, error_diffusion, 1, 1.0f, quant_kind, xsize, ysize, + &thres_y[0], inout + size, quant, quantized + size); + + const float* JXL_RESTRICT dequant_matrix = + quantizer.DequantMatrix(quant_kind, 1); + + HWY_CAPPED(float, kDCTBlockSize) df; + HWY_CAPPED(int32_t, kDCTBlockSize) di; + const auto inv_qac = Set(df, quantizer.inv_quant_ac(*quant)); + for (size_t k = 0; k < kDCTBlockSize * xsize * ysize; k += Lanes(df)) { + const auto quant = Load(di, quantized + size + k); + const auto adj_quant = AdjustQuantBias(di, 1, quant, biases); + const auto dequantm = Load(df, dequant_matrix + k); + Store(Mul(Mul(adj_quant, dequantm), inv_qac), df, inout + size + k); + } +} + +void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state, + const Image3F& opsin, Image3F* dc) { + const Rect block_group_rect = enc_state->shared.BlockGroupRect(group_idx); + const Rect group_rect = enc_state->shared.GroupRect(group_idx); + const Rect cmap_rect( + block_group_rect.x0() / kColorTileDimInBlocks, + block_group_rect.y0() / kColorTileDimInBlocks, + DivCeil(block_group_rect.xsize(), kColorTileDimInBlocks), + DivCeil(block_group_rect.ysize(), kColorTileDimInBlocks)); + + const size_t xsize_blocks = block_group_rect.xsize(); + const size_t ysize_blocks = block_group_rect.ysize(); + + const size_t dc_stride = static_cast(dc->PixelsPerRow()); + const size_t opsin_stride = static_cast(opsin.PixelsPerRow()); + + ImageI& full_quant_field = enc_state->shared.raw_quant_field; + const CompressParams& cparams = enc_state->cparams; + + // TODO(veluca): consider strategies to reduce this memory. + auto mem = hwy::AllocateAligned(3 * AcStrategy::kMaxCoeffArea); + auto fmem = hwy::AllocateAligned(5 * AcStrategy::kMaxCoeffArea); + float* JXL_RESTRICT scratch_space = + fmem.get() + 3 * AcStrategy::kMaxCoeffArea; + { + // Only use error diffusion in Squirrel mode or slower. + const bool error_diffusion = cparams.speed_tier <= SpeedTier::kSquirrel; + constexpr HWY_CAPPED(float, kDCTBlockSize) d; + + int32_t* JXL_RESTRICT coeffs[3][kMaxNumPasses] = {}; + size_t num_passes = enc_state->progressive_splitter.GetNumPasses(); + JXL_DASSERT(num_passes > 0); + for (size_t i = 0; i < num_passes; i++) { + // TODO(veluca): 16-bit quantized coeffs are not implemented yet. + JXL_ASSERT(enc_state->coeffs[i]->Type() == ACType::k32); + for (size_t c = 0; c < 3; c++) { + coeffs[c][i] = enc_state->coeffs[i]->PlaneRow(c, group_idx, 0).ptr32; + } + } + + HWY_ALIGN float* coeffs_in = fmem.get(); + HWY_ALIGN int32_t* quantized = mem.get(); + + for (size_t by = 0; by < ysize_blocks; ++by) { + int32_t* JXL_RESTRICT row_quant_ac = + block_group_rect.Row(&full_quant_field, by); + size_t ty = by / kColorTileDimInBlocks; + const int8_t* JXL_RESTRICT row_cmap[3] = { + cmap_rect.ConstRow(enc_state->shared.cmap.ytox_map, ty), + nullptr, + cmap_rect.ConstRow(enc_state->shared.cmap.ytob_map, ty), + }; + const float* JXL_RESTRICT opsin_rows[3] = { + group_rect.ConstPlaneRow(opsin, 0, by * kBlockDim), + group_rect.ConstPlaneRow(opsin, 1, by * kBlockDim), + group_rect.ConstPlaneRow(opsin, 2, by * kBlockDim), + }; + float* JXL_RESTRICT dc_rows[3] = { + block_group_rect.PlaneRow(dc, 0, by), + block_group_rect.PlaneRow(dc, 1, by), + block_group_rect.PlaneRow(dc, 2, by), + }; + AcStrategyRow ac_strategy_row = + enc_state->shared.ac_strategy.ConstRow(block_group_rect, by); + for (size_t tx = 0; tx < DivCeil(xsize_blocks, kColorTileDimInBlocks); + tx++) { + const auto x_factor = + Set(d, enc_state->shared.cmap.YtoXRatio(row_cmap[0][tx])); + const auto b_factor = + Set(d, enc_state->shared.cmap.YtoBRatio(row_cmap[2][tx])); + for (size_t bx = tx * kColorTileDimInBlocks; + bx < xsize_blocks && bx < (tx + 1) * kColorTileDimInBlocks; ++bx) { + const AcStrategy acs = ac_strategy_row[bx]; + if (!acs.IsFirstBlock()) continue; + + size_t xblocks = acs.covered_blocks_x(); + size_t yblocks = acs.covered_blocks_y(); + + CoefficientLayout(&yblocks, &xblocks); + + size_t size = kDCTBlockSize * xblocks * yblocks; + + // DCT Y channel, roundtrip-quantize it and set DC. + int32_t quant_ac = row_quant_ac[bx]; + for (size_t c : {0, 1, 2}) { + TransformFromPixels(acs.Strategy(), opsin_rows[c] + bx * kBlockDim, + opsin_stride, coeffs_in + c * size, + scratch_space); + } + DCFromLowestFrequencies(acs.Strategy(), coeffs_in + size, + dc_rows[1] + bx, dc_stride); + + QuantizeRoundtripYBlockAC( + enc_state, size, enc_state->shared.quantizer, error_diffusion, + acs.RawStrategy(), xblocks, yblocks, kDefaultQuantBias, &quant_ac, + coeffs_in, quantized); + + // Unapply color correlation + for (size_t k = 0; k < size; k += Lanes(d)) { + const auto in_x = Load(d, coeffs_in + k); + const auto in_y = Load(d, coeffs_in + size + k); + const auto in_b = Load(d, coeffs_in + 2 * size + k); + const auto out_x = NegMulAdd(x_factor, in_y, in_x); + const auto out_b = NegMulAdd(b_factor, in_y, in_b); + Store(out_x, d, coeffs_in + k); + Store(out_b, d, coeffs_in + 2 * size + k); + } + + // Quantize X and B channels and set DC. + for (size_t c : {0, 2}) { + float thres[4] = {0.58f, 0.62f, 0.62f, 0.62f}; + QuantizeBlockAC(enc_state->shared.quantizer, error_diffusion, c, + c == 0 ? enc_state->x_qm_multiplier + : enc_state->b_qm_multiplier, + acs.RawStrategy(), xblocks, yblocks, &thres[0], + coeffs_in + c * size, &quant_ac, + quantized + c * size); + DCFromLowestFrequencies(acs.Strategy(), coeffs_in + c * size, + dc_rows[c] + bx, dc_stride); + } + row_quant_ac[bx] = quant_ac; + for (size_t c = 0; c < 3; c++) { + enc_state->progressive_splitter.SplitACCoefficients( + quantized + c * size, acs, bx, by, coeffs[c]); + for (size_t p = 0; p < num_passes; p++) { + coeffs[c][p] += size; + } + } + } + } + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(ComputeCoefficients); +void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state, + const Image3F& opsin, Image3F* dc) { + return HWY_DYNAMIC_DISPATCH(ComputeCoefficients)(group_idx, enc_state, opsin, + dc); +} + +Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx, + size_t histogram_idx, + const PassesEncoderState& enc_state, + BitWriter* writer, AuxOut* aux_out) { + // Select which histogram to use among those of the current pass. + const size_t num_histograms = enc_state.shared.num_histograms; + // num_histograms is 0 only for lossless. + JXL_ASSERT(num_histograms == 0 || histogram_idx < num_histograms); + size_t histo_selector_bits = CeilLog2Nonzero(num_histograms); + + if (histo_selector_bits != 0) { + BitWriter::Allotment allotment(writer, histo_selector_bits); + writer->Write(histo_selector_bits, histogram_idx); + allotment.ReclaimAndCharge(writer, kLayerAC, aux_out); + } + WriteTokens(enc_state.passes[pass_idx].ac_tokens[group_idx], + enc_state.passes[pass_idx].codes, + enc_state.passes[pass_idx].context_map, writer, kLayerACTokens, + aux_out); + + return true; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_group.h b/third-party/libjxl/libjxl/lib/jxl/enc_group.h new file mode 100644 index 0000000000..0caf408a03 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_group.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_GROUP_H_ +#define LIB_JXL_ENC_GROUP_H_ + +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/image.h" + +namespace jxl { + +struct AuxOut; +struct PassesEncoderState; + +// Fills DC +void ComputeCoefficients(size_t group_idx, PassesEncoderState* enc_state, + const Image3F& opsin, Image3F* dc); + +Status EncodeGroupTokenizedCoefficients(size_t group_idx, size_t pass_idx, + size_t histogram_idx, + const PassesEncoderState& enc_state, + BitWriter* writer, AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_GROUP_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.cc b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.cc new file mode 100644 index 0000000000..b5a8cdfa73 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.cc @@ -0,0 +1,947 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_heuristics.h" + +#include +#include + +#include +#include +#include + +#include "lib/jxl/enc_ac_strategy.h" +#include "lib/jxl/enc_adaptive_quantization.h" +#include "lib/jxl/enc_ar_control_field.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_chroma_from_luma.h" +#include "lib/jxl/enc_gaborish.h" +#include "lib/jxl/enc_modular.h" +#include "lib/jxl/enc_noise.h" +#include "lib/jxl/enc_patch_dictionary.h" +#include "lib/jxl/enc_photon_noise.h" +#include "lib/jxl/enc_quant_weights.h" +#include "lib/jxl/enc_splines.h" +#include "lib/jxl/enc_xyb.h" + +namespace jxl { + +struct AuxOut; + +namespace { +void FindBestBlockEntropyModel(PassesEncoderState& enc_state) { + if (enc_state.cparams.decoding_speed_tier >= 1) { + static constexpr uint8_t kSimpleCtxMap[] = { + // Cluster all blocks together + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // + }; + static_assert( + 3 * kNumOrders == sizeof(kSimpleCtxMap) / sizeof *kSimpleCtxMap, + "Update simple context map"); + + auto bcm = enc_state.shared.block_ctx_map; + bcm.ctx_map.assign(std::begin(kSimpleCtxMap), std::end(kSimpleCtxMap)); + bcm.num_ctxs = 2; + bcm.num_dc_ctxs = 1; + return; + } + if (enc_state.cparams.speed_tier >= SpeedTier::kFalcon) { + return; + } + const ImageI& rqf = enc_state.shared.raw_quant_field; + // No need to change context modeling for small images. + size_t tot = rqf.xsize() * rqf.ysize(); + size_t size_for_ctx_model = + (1 << 10) * enc_state.cparams.butteraugli_distance; + if (tot < size_for_ctx_model) return; + + struct OccCounters { + // count the occurrences of each qf value and each strategy type. + OccCounters(const ImageI& rqf, const AcStrategyImage& ac_strategy) { + for (size_t y = 0; y < rqf.ysize(); y++) { + const int32_t* qf_row = rqf.Row(y); + AcStrategyRow acs_row = ac_strategy.ConstRow(y); + for (size_t x = 0; x < rqf.xsize(); x++) { + int ord = kStrategyOrder[acs_row[x].RawStrategy()]; + int qf = qf_row[x] - 1; + qf_counts[qf]++; + qf_ord_counts[ord][qf]++; + ord_counts[ord]++; + } + } + } + + size_t qf_counts[256] = {}; + size_t qf_ord_counts[kNumOrders][256] = {}; + size_t ord_counts[kNumOrders] = {}; + }; + // The OccCounters struct is too big to allocate on the stack. + std::unique_ptr counters( + new OccCounters(rqf, enc_state.shared.ac_strategy)); + + // Splitting the context model according to the quantization field seems to + // mostly benefit only large images. + size_t size_for_qf_split = (1 << 13) * enc_state.cparams.butteraugli_distance; + size_t num_qf_segments = tot < size_for_qf_split ? 1 : 2; + std::vector& qft = enc_state.shared.block_ctx_map.qf_thresholds; + qft.clear(); + // Divide the quant field in up to num_qf_segments segments. + size_t cumsum = 0; + size_t next = 1; + size_t last_cut = 256; + size_t cut = tot * next / num_qf_segments; + for (uint32_t j = 0; j < 256; j++) { + cumsum += counters->qf_counts[j]; + if (cumsum > cut) { + if (j != 0) { + qft.push_back(j); + } + last_cut = j; + while (cumsum > cut) { + next++; + cut = tot * next / num_qf_segments; + } + } else if (next > qft.size() + 1) { + if (j - 1 == last_cut && j != 0) { + qft.push_back(j); + } + } + } + + // Count the occurrences of each segment. + std::vector counts(kNumOrders * (qft.size() + 1)); + size_t qft_pos = 0; + for (size_t j = 0; j < 256; j++) { + if (qft_pos < qft.size() && j == qft[qft_pos]) { + qft_pos++; + } + for (size_t i = 0; i < kNumOrders; i++) { + counts[qft_pos + i * (qft.size() + 1)] += counters->qf_ord_counts[i][j]; + } + } + + // Repeatedly merge the lowest-count pair. + std::vector remap((qft.size() + 1) * kNumOrders); + std::iota(remap.begin(), remap.end(), 0); + std::vector clusters(remap); + size_t nb_clusters = Clamp1((int)(tot / size_for_ctx_model / 2), 2, 9); + size_t nb_clusters_chroma = Clamp1((int)(tot / size_for_ctx_model / 3), 1, 5); + // This is O(n^2 log n), but n is small. + while (clusters.size() > nb_clusters) { + std::sort(clusters.begin(), clusters.end(), + [&](int a, int b) { return counts[a] > counts[b]; }); + counts[clusters[clusters.size() - 2]] += counts[clusters.back()]; + counts[clusters.back()] = 0; + remap[clusters.back()] = clusters[clusters.size() - 2]; + clusters.pop_back(); + } + for (size_t i = 0; i < remap.size(); i++) { + while (remap[remap[i]] != remap[i]) { + remap[i] = remap[remap[i]]; + } + } + // Relabel starting from 0. + std::vector remap_remap(remap.size(), remap.size()); + size_t num = 0; + for (size_t i = 0; i < remap.size(); i++) { + if (remap_remap[remap[i]] == remap.size()) { + remap_remap[remap[i]] = num++; + } + remap[i] = remap_remap[remap[i]]; + } + // Write the block context map. + auto& ctx_map = enc_state.shared.block_ctx_map.ctx_map; + ctx_map = remap; + ctx_map.resize(remap.size() * 3); + // for chroma, only use up to nb_clusters_chroma separate block contexts + // (those for the biggest clusters) + for (size_t i = remap.size(); i < remap.size() * 3; i++) { + ctx_map[i] = num + Clamp1((int)remap[i % remap.size()], 0, + (int)nb_clusters_chroma - 1); + } + enc_state.shared.block_ctx_map.num_ctxs = + *std::max_element(ctx_map.begin(), ctx_map.end()) + 1; +} + +} // namespace + +void FindBestDequantMatrices(const CompressParams& cparams, + const Image3F& opsin, + ModularFrameEncoder* modular_frame_encoder, + DequantMatrices* dequant_matrices) { + // TODO(veluca): quant matrices for no-gaborish. + // TODO(veluca): heuristics for in-bitstream quant tables. + *dequant_matrices = DequantMatrices(); + if (cparams.max_error_mode) { + // Set numerators of all quantization matrices to constant values. + float weights[3][1] = {{1.0f / cparams.max_error[0]}, + {1.0f / cparams.max_error[1]}, + {1.0f / cparams.max_error[2]}}; + DctQuantWeightParams dct_params(weights); + std::vector encodings(DequantMatrices::kNum, + QuantEncoding::DCT(dct_params)); + DequantMatricesSetCustom(dequant_matrices, encodings, + modular_frame_encoder); + float dc_weights[3] = {1.0f / cparams.max_error[0], + 1.0f / cparams.max_error[1], + 1.0f / cparams.max_error[2]}; + DequantMatricesSetCustomDC(dequant_matrices, dc_weights); + } +} + +bool DefaultEncoderHeuristics::HandlesColorConversion( + const CompressParams& cparams, const ImageBundle& ib) { + return cparams.noise != Override::kOn && cparams.patches != Override::kOn && + cparams.speed_tier >= SpeedTier::kWombat && cparams.resampling == 1 && + cparams.color_transform == ColorTransform::kXYB && + !cparams.modular_mode && !ib.HasAlpha(); +} + +namespace { + +void StoreMin2(const float v, float& min1, float& min2) { + if (v < min2) { + if (v < min1) { + min2 = min1; + min1 = v; + } else { + min2 = v; + } + } +} + +void CreateMask(const ImageF& image, ImageF& mask) { + for (size_t y = 0; y < image.ysize(); y++) { + auto* row_n = y > 0 ? image.Row(y - 1) : image.Row(y); + auto* row_in = image.Row(y); + auto* row_s = y + 1 < image.ysize() ? image.Row(y + 1) : image.Row(y); + auto* row_out = mask.Row(y); + for (size_t x = 0; x < image.xsize(); x++) { + // Center, west, east, north, south values and their absolute difference + float c = row_in[x]; + float w = x > 0 ? row_in[x - 1] : row_in[x]; + float e = x + 1 < image.xsize() ? row_in[x + 1] : row_in[x]; + float n = row_n[x]; + float s = row_s[x]; + float dw = std::abs(c - w); + float de = std::abs(c - e); + float dn = std::abs(c - n); + float ds = std::abs(c - s); + float min = std::numeric_limits::max(); + float min2 = std::numeric_limits::max(); + StoreMin2(dw, min, min2); + StoreMin2(de, min, min2); + StoreMin2(dn, min, min2); + StoreMin2(ds, min, min2); + row_out[x] = min2; + } + } +} + +// Downsamples the image by a factor of 2 with a kernel that's sharper than +// the standard 2x2 box kernel used by DownsampleImage. +// The kernel is optimized against the result of the 2x2 upsampling kernel used +// by the decoder. Ringing is slightly reduced by clamping the values of the +// resulting pixels within certain bounds of a small region in the original +// image. +void DownsampleImage2_Sharper(const ImageF& input, ImageF* output) { + const int64_t kernelx = 12; + const int64_t kernely = 12; + + static const float kernel[144] = { + -0.000314256996835, -0.000314256996835, -0.000897597057705, + -0.000562751488849, -0.000176807273646, 0.001864627368902, + 0.001864627368902, -0.000176807273646, -0.000562751488849, + -0.000897597057705, -0.000314256996835, -0.000314256996835, + -0.000314256996835, -0.001527942804748, -0.000121760530512, + 0.000191123989093, 0.010193185932466, 0.058637519197110, + 0.058637519197110, 0.010193185932466, 0.000191123989093, + -0.000121760530512, -0.001527942804748, -0.000314256996835, + -0.000897597057705, -0.000121760530512, 0.000946363683751, + 0.007113577630288, 0.000437956841058, -0.000372823835211, + -0.000372823835211, 0.000437956841058, 0.007113577630288, + 0.000946363683751, -0.000121760530512, -0.000897597057705, + -0.000562751488849, 0.000191123989093, 0.007113577630288, + 0.044592622228814, 0.000222278879007, -0.162864473015945, + -0.162864473015945, 0.000222278879007, 0.044592622228814, + 0.007113577630288, 0.000191123989093, -0.000562751488849, + -0.000176807273646, 0.010193185932466, 0.000437956841058, + 0.000222278879007, -0.000913092543974, -0.017071696107902, + -0.017071696107902, -0.000913092543974, 0.000222278879007, + 0.000437956841058, 0.010193185932466, -0.000176807273646, + 0.001864627368902, 0.058637519197110, -0.000372823835211, + -0.162864473015945, -0.017071696107902, 0.414660099370354, + 0.414660099370354, -0.017071696107902, -0.162864473015945, + -0.000372823835211, 0.058637519197110, 0.001864627368902, + 0.001864627368902, 0.058637519197110, -0.000372823835211, + -0.162864473015945, -0.017071696107902, 0.414660099370354, + 0.414660099370354, -0.017071696107902, -0.162864473015945, + -0.000372823835211, 0.058637519197110, 0.001864627368902, + -0.000176807273646, 0.010193185932466, 0.000437956841058, + 0.000222278879007, -0.000913092543974, -0.017071696107902, + -0.017071696107902, -0.000913092543974, 0.000222278879007, + 0.000437956841058, 0.010193185932466, -0.000176807273646, + -0.000562751488849, 0.000191123989093, 0.007113577630288, + 0.044592622228814, 0.000222278879007, -0.162864473015945, + -0.162864473015945, 0.000222278879007, 0.044592622228814, + 0.007113577630288, 0.000191123989093, -0.000562751488849, + -0.000897597057705, -0.000121760530512, 0.000946363683751, + 0.007113577630288, 0.000437956841058, -0.000372823835211, + -0.000372823835211, 0.000437956841058, 0.007113577630288, + 0.000946363683751, -0.000121760530512, -0.000897597057705, + -0.000314256996835, -0.001527942804748, -0.000121760530512, + 0.000191123989093, 0.010193185932466, 0.058637519197110, + 0.058637519197110, 0.010193185932466, 0.000191123989093, + -0.000121760530512, -0.001527942804748, -0.000314256996835, + -0.000314256996835, -0.000314256996835, -0.000897597057705, + -0.000562751488849, -0.000176807273646, 0.001864627368902, + 0.001864627368902, -0.000176807273646, -0.000562751488849, + -0.000897597057705, -0.000314256996835, -0.000314256996835}; + + int64_t xsize = input.xsize(); + int64_t ysize = input.ysize(); + + ImageF box_downsample(xsize, ysize); + CopyImageTo(input, &box_downsample); + DownsampleImage(&box_downsample, 2); + + ImageF mask(box_downsample.xsize(), box_downsample.ysize()); + CreateMask(box_downsample, mask); + + for (size_t y = 0; y < output->ysize(); y++) { + float* row_out = output->Row(y); + const float* row_in[kernely]; + const float* row_mask = mask.Row(y); + // get the rows in the support + for (size_t ky = 0; ky < kernely; ky++) { + int64_t iy = y * 2 + ky - (kernely - 1) / 2; + if (iy < 0) iy = 0; + if (iy >= ysize) iy = ysize - 1; + row_in[ky] = input.Row(iy); + } + + for (size_t x = 0; x < output->xsize(); x++) { + // get min and max values of the original image in the support + float min = std::numeric_limits::max(); + float max = std::numeric_limits::min(); + // kernelx - R and kernely - R are the radius of a rectangular region in + // which the values of a pixel are bounded to reduce ringing. + static constexpr int64_t R = 5; + for (int64_t ky = R; ky + R < kernely; ky++) { + for (int64_t kx = R; kx + R < kernelx; kx++) { + int64_t ix = x * 2 + kx - (kernelx - 1) / 2; + if (ix < 0) ix = 0; + if (ix >= xsize) ix = xsize - 1; + min = std::min(min, row_in[ky][ix]); + max = std::max(max, row_in[ky][ix]); + } + } + + float sum = 0; + for (int64_t ky = 0; ky < kernely; ky++) { + for (int64_t kx = 0; kx < kernelx; kx++) { + int64_t ix = x * 2 + kx - (kernelx - 1) / 2; + if (ix < 0) ix = 0; + if (ix >= xsize) ix = xsize - 1; + sum += row_in[ky][ix] * kernel[ky * kernelx + kx]; + } + } + + row_out[x] = sum; + + // Clamp the pixel within the value of a small area to prevent ringning. + // The mask determines how much to clamp, clamp more to reduce more + // ringing in smooth areas, clamp less in noisy areas to get more + // sharpness. Higher mask_multiplier gives less clamping, so less + // ringing reduction. + const constexpr float mask_multiplier = 1; + float a = row_mask[x] * mask_multiplier; + float clip_min = min - a; + float clip_max = max + a; + if (row_out[x] < clip_min) { + row_out[x] = clip_min; + } else if (row_out[x] > clip_max) { + row_out[x] = clip_max; + } + } + } +} + +void DownsampleImage2_Sharper(Image3F* opsin) { + // Allocate extra space to avoid a reallocation when padding. + Image3F downsampled(DivCeil(opsin->xsize(), 2) + kBlockDim, + DivCeil(opsin->ysize(), 2) + kBlockDim); + downsampled.ShrinkTo(downsampled.xsize() - kBlockDim, + downsampled.ysize() - kBlockDim); + + for (size_t c = 0; c < 3; c++) { + DownsampleImage2_Sharper(opsin->Plane(c), &downsampled.Plane(c)); + } + *opsin = std::move(downsampled); +} + +// The default upsampling kernels used by Upsampler in the decoder. +static const constexpr int64_t kSize = 5; + +static const float kernel00[25] = { + -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f, + -0.03452303f, 0.14111091f, 0.28896755f, 0.00278718f, -0.01610267f, + -0.04022174f, 0.28896755f, 0.56661550f, 0.03777607f, -0.01986694f, + -0.02921014f, 0.00278718f, 0.03777607f, -0.03144731f, -0.01185068f, + -0.00624645f, -0.01610267f, -0.01986694f, -0.01185068f, -0.00213539f, +}; +static const float kernel01[25] = { + -0.00624645f, -0.01610267f, -0.01986694f, -0.01185068f, -0.00213539f, + -0.02921014f, 0.00278718f, 0.03777607f, -0.03144731f, -0.01185068f, + -0.04022174f, 0.28896755f, 0.56661550f, 0.03777607f, -0.01986694f, + -0.03452303f, 0.14111091f, 0.28896755f, 0.00278718f, -0.01610267f, + -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f, +}; +static const float kernel10[25] = { + -0.00624645f, -0.02921014f, -0.04022174f, -0.03452303f, -0.01716200f, + -0.01610267f, 0.00278718f, 0.28896755f, 0.14111091f, -0.03452303f, + -0.01986694f, 0.03777607f, 0.56661550f, 0.28896755f, -0.04022174f, + -0.01185068f, -0.03144731f, 0.03777607f, 0.00278718f, -0.02921014f, + -0.00213539f, -0.01185068f, -0.01986694f, -0.01610267f, -0.00624645f, +}; +static const float kernel11[25] = { + -0.00213539f, -0.01185068f, -0.01986694f, -0.01610267f, -0.00624645f, + -0.01185068f, -0.03144731f, 0.03777607f, 0.00278718f, -0.02921014f, + -0.01986694f, 0.03777607f, 0.56661550f, 0.28896755f, -0.04022174f, + -0.01610267f, 0.00278718f, 0.28896755f, 0.14111091f, -0.03452303f, + -0.00624645f, -0.02921014f, -0.04022174f, -0.03452303f, -0.01716200f, +}; + +// Does exactly the same as the Upsampler in dec_upsampler for 2x2 pixels, with +// default CustomTransformData. +// TODO(lode): use Upsampler instead. However, it requires pre-initialization +// and padding on the left side of the image which requires refactoring the +// other code using this. +static void UpsampleImage(const ImageF& input, ImageF* output) { + int64_t xsize = input.xsize(); + int64_t ysize = input.ysize(); + int64_t xsize2 = output->xsize(); + int64_t ysize2 = output->ysize(); + for (int64_t y = 0; y < ysize2; y++) { + for (int64_t x = 0; x < xsize2; x++) { + auto kernel = kernel00; + if ((x & 1) && (y & 1)) { + kernel = kernel11; + } else if (x & 1) { + kernel = kernel10; + } else if (y & 1) { + kernel = kernel01; + } + float sum = 0; + int64_t x2 = x / 2; + int64_t y2 = y / 2; + + // get min and max values of the original image in the support + float min = std::numeric_limits::max(); + float max = std::numeric_limits::min(); + + for (int64_t ky = 0; ky < kSize; ky++) { + for (int64_t kx = 0; kx < kSize; kx++) { + int64_t xi = x2 - kSize / 2 + kx; + int64_t yi = y2 - kSize / 2 + ky; + if (xi < 0) xi = 0; + if (xi >= xsize) xi = input.xsize() - 1; + if (yi < 0) yi = 0; + if (yi >= ysize) yi = input.ysize() - 1; + min = std::min(min, input.Row(yi)[xi]); + max = std::max(max, input.Row(yi)[xi]); + } + } + + for (int64_t ky = 0; ky < kSize; ky++) { + for (int64_t kx = 0; kx < kSize; kx++) { + int64_t xi = x2 - kSize / 2 + kx; + int64_t yi = y2 - kSize / 2 + ky; + if (xi < 0) xi = 0; + if (xi >= xsize) xi = input.xsize() - 1; + if (yi < 0) yi = 0; + if (yi >= ysize) yi = input.ysize() - 1; + sum += input.Row(yi)[xi] * kernel[ky * kSize + kx]; + } + } + output->Row(y)[x] = sum; + if (output->Row(y)[x] < min) output->Row(y)[x] = min; + if (output->Row(y)[x] > max) output->Row(y)[x] = max; + } + } +} + +// Returns the derivative of Upsampler, with respect to input pixel x2, y2, to +// output pixel x, y (ignoring the clamping). +float UpsamplerDeriv(int64_t x2, int64_t y2, int64_t x, int64_t y) { + auto kernel = kernel00; + if ((x & 1) && (y & 1)) { + kernel = kernel11; + } else if (x & 1) { + kernel = kernel10; + } else if (y & 1) { + kernel = kernel01; + } + + int64_t ix = x / 2; + int64_t iy = y / 2; + int64_t kx = x2 - ix + kSize / 2; + int64_t ky = y2 - iy + kSize / 2; + + // This should not happen. + if (kx < 0 || kx >= kSize || ky < 0 || ky >= kSize) return 0; + + return kernel[ky * kSize + kx]; +} + +// Apply the derivative of the Upsampler to the input, reversing the effect of +// its coefficients. The output image is 2x2 times smaller than the input. +void AntiUpsample(const ImageF& input, ImageF* d) { + int64_t xsize = input.xsize(); + int64_t ysize = input.ysize(); + int64_t xsize2 = d->xsize(); + int64_t ysize2 = d->ysize(); + int64_t k0 = kSize - 1; + int64_t k1 = kSize; + for (int64_t y2 = 0; y2 < ysize2; ++y2) { + auto* row = d->Row(y2); + for (int64_t x2 = 0; x2 < xsize2; ++x2) { + int64_t x0 = x2 * 2 - k0; + if (x0 < 0) x0 = 0; + int64_t x1 = x2 * 2 + k1 + 1; + if (x1 > xsize) x1 = xsize; + int64_t y0 = y2 * 2 - k0; + if (y0 < 0) y0 = 0; + int64_t y1 = y2 * 2 + k1 + 1; + if (y1 > ysize) y1 = ysize; + + float sum = 0; + for (int64_t y = y0; y < y1; ++y) { + const auto* row_in = input.Row(y); + for (int64_t x = x0; x < x1; ++x) { + double deriv = UpsamplerDeriv(x2, y2, x, y); + sum += deriv * row_in[x]; + } + } + row[x2] = sum; + } + } +} + +// Element-wise multiplies two images. +template +void ElwiseMul(const Plane& image1, const Plane& image2, Plane* out) { + const size_t xsize = image1.xsize(); + const size_t ysize = image1.ysize(); + JXL_CHECK(xsize == image2.xsize()); + JXL_CHECK(ysize == image2.ysize()); + JXL_CHECK(xsize == out->xsize()); + JXL_CHECK(ysize == out->ysize()); + for (size_t y = 0; y < ysize; ++y) { + const T* const JXL_RESTRICT row1 = image1.Row(y); + const T* const JXL_RESTRICT row2 = image2.Row(y); + T* const JXL_RESTRICT row_out = out->Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row1[x] * row2[x]; + } + } +} + +// Element-wise divides two images. +template +void ElwiseDiv(const Plane& image1, const Plane& image2, Plane* out) { + const size_t xsize = image1.xsize(); + const size_t ysize = image1.ysize(); + JXL_CHECK(xsize == image2.xsize()); + JXL_CHECK(ysize == image2.ysize()); + JXL_CHECK(xsize == out->xsize()); + JXL_CHECK(ysize == out->ysize()); + for (size_t y = 0; y < ysize; ++y) { + const T* const JXL_RESTRICT row1 = image1.Row(y); + const T* const JXL_RESTRICT row2 = image2.Row(y); + T* const JXL_RESTRICT row_out = out->Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row1[x] / row2[x]; + } + } +} + +void ReduceRinging(const ImageF& initial, const ImageF& mask, ImageF& down) { + int64_t xsize2 = down.xsize(); + int64_t ysize2 = down.ysize(); + + for (size_t y = 0; y < down.ysize(); y++) { + const float* row_mask = mask.Row(y); + float* row_out = down.Row(y); + for (size_t x = 0; x < down.xsize(); x++) { + float v = down.Row(y)[x]; + float min = initial.Row(y)[x]; + float max = initial.Row(y)[x]; + for (int64_t yi = -1; yi < 2; yi++) { + for (int64_t xi = -1; xi < 2; xi++) { + int64_t x2 = (int64_t)x + xi; + int64_t y2 = (int64_t)y + yi; + if (x2 < 0 || y2 < 0 || x2 >= (int64_t)xsize2 || + y2 >= (int64_t)ysize2) + continue; + min = std::min(min, initial.Row(y2)[x2]); + max = std::max(max, initial.Row(y2)[x2]); + } + } + + row_out[x] = v; + + // Clamp the pixel within the value of a small area to prevent ringning. + // The mask determines how much to clamp, clamp more to reduce more + // ringing in smooth areas, clamp less in noisy areas to get more + // sharpness. Higher mask_multiplier gives less clamping, so less + // ringing reduction. + const constexpr float mask_multiplier = 2; + float a = row_mask[x] * mask_multiplier; + float clip_min = min - a; + float clip_max = max + a; + if (row_out[x] < clip_min) row_out[x] = clip_min; + if (row_out[x] > clip_max) row_out[x] = clip_max; + } + } +} + +// TODO(lode): move this to a separate file enc_downsample.cc +void DownsampleImage2_Iterative(const ImageF& orig, ImageF* output) { + int64_t xsize = orig.xsize(); + int64_t ysize = orig.ysize(); + int64_t xsize2 = DivCeil(orig.xsize(), 2); + int64_t ysize2 = DivCeil(orig.ysize(), 2); + + ImageF box_downsample(xsize, ysize); + CopyImageTo(orig, &box_downsample); + DownsampleImage(&box_downsample, 2); + ImageF mask(box_downsample.xsize(), box_downsample.ysize()); + CreateMask(box_downsample, mask); + + output->ShrinkTo(xsize2, ysize2); + + // Initial result image using the sharper downsampling. + // Allocate extra space to avoid a reallocation when padding. + ImageF initial(DivCeil(orig.xsize(), 2) + kBlockDim, + DivCeil(orig.ysize(), 2) + kBlockDim); + initial.ShrinkTo(initial.xsize() - kBlockDim, initial.ysize() - kBlockDim); + DownsampleImage2_Sharper(orig, &initial); + + ImageF down(initial.xsize(), initial.ysize()); + CopyImageTo(initial, &down); + ImageF up(xsize, ysize); + ImageF corr(xsize, ysize); + ImageF corr2(xsize2, ysize2); + + // In the weights map, relatively higher values will allow less ringing but + // also less sharpness. With all constant values, it optimizes equally + // everywhere. Even in this case, the weights2 computed from + // this is still used and differs at the borders of the image. + // TODO(lode): Make use of the weights field for anti-ringing and clamping, + // the values are all set to 1 for now, but it is intended to be used for + // reducing ringing based on the mask, and taking clamping into account. + ImageF weights(xsize, ysize); + for (size_t y = 0; y < weights.ysize(); y++) { + auto* row = weights.Row(y); + for (size_t x = 0; x < weights.xsize(); x++) { + row[x] = 1; + } + } + ImageF weights2(xsize2, ysize2); + AntiUpsample(weights, &weights2); + + const size_t num_it = 3; + for (size_t it = 0; it < num_it; ++it) { + UpsampleImage(down, &up); + corr = LinComb(1, orig, -1, up); + ElwiseMul(corr, weights, &corr); + AntiUpsample(corr, &corr2); + ElwiseDiv(corr2, weights2, &corr2); + + down = LinComb(1, down, 1, corr2); + } + + ReduceRinging(initial, mask, down); + + // can't just use CopyImage, because the output image was prepared with + // padding. + for (size_t y = 0; y < down.ysize(); y++) { + for (size_t x = 0; x < down.xsize(); x++) { + float v = down.Row(y)[x]; + output->Row(y)[x] = v; + } + } +} + +void DownsampleImage2_Iterative(Image3F* opsin) { + // Allocate extra space to avoid a reallocation when padding. + Image3F downsampled(DivCeil(opsin->xsize(), 2) + kBlockDim, + DivCeil(opsin->ysize(), 2) + kBlockDim); + downsampled.ShrinkTo(downsampled.xsize() - kBlockDim, + downsampled.ysize() - kBlockDim); + + Image3F rgb(opsin->xsize(), opsin->ysize()); + OpsinParams opsin_params; // TODO: use the ones that are actually used + opsin_params.Init(kDefaultIntensityTarget); + OpsinToLinear(*opsin, Rect(rgb), nullptr, &rgb, opsin_params); + + ImageF mask(opsin->xsize(), opsin->ysize()); + ButteraugliParams butter_params; + ButteraugliComparator butter(rgb, butter_params); + butter.Mask(&mask); + ImageF mask_fuzzy(opsin->xsize(), opsin->ysize()); + + for (size_t c = 0; c < 3; c++) { + DownsampleImage2_Iterative(opsin->Plane(c), &downsampled.Plane(c)); + } + *opsin = std::move(downsampled); +} +} // namespace + +Status DefaultEncoderHeuristics::LossyFrameHeuristics( + PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder, + const ImageBundle* original_pixels, Image3F* opsin, + const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out) { + CompressParams& cparams = enc_state->cparams; + PassesSharedState& shared = enc_state->shared; + + // Compute parameters for noise synthesis. + if (shared.frame_header.flags & FrameHeader::kNoise) { + if (cparams.photon_noise_iso == 0) { + // Don't start at zero amplitude since adding noise is expensive -- it + // significantly slows down decoding, and this is unlikely to + // completely go away even with advanced optimizations. After the + // kNoiseModelingRampUpDistanceRange we have reached the full level, + // i.e. noise is no longer represented by the compressed image, so we + // can add full noise by the noise modeling itself. + static const float kNoiseModelingRampUpDistanceRange = 0.6; + static const float kNoiseLevelAtStartOfRampUp = 0.25; + static const float kNoiseRampupStart = 1.0; + // TODO(user) test and properly select quality_coef with smooth + // filter + float quality_coef = 1.0f; + const float rampup = (cparams.butteraugli_distance - kNoiseRampupStart) / + kNoiseModelingRampUpDistanceRange; + if (rampup < 1.0f) { + quality_coef = kNoiseLevelAtStartOfRampUp + + (1.0f - kNoiseLevelAtStartOfRampUp) * rampup; + } + if (rampup < 0.0f) { + quality_coef = kNoiseRampupStart; + } + if (!GetNoiseParameter(*opsin, &shared.image_features.noise_params, + quality_coef)) { + shared.frame_header.flags &= ~FrameHeader::kNoise; + } + } + } + if (enc_state->shared.frame_header.upsampling != 1 && + !cparams.already_downsampled) { + // In VarDCT mode, LossyFrameHeuristics takes care of running downsampling + // after noise, if necessary. + if (cparams.resampling == 2) { + // TODO(lode): use the regular DownsampleImage, or adapt to the custom + // coefficients, if there is are custom upscaling coefficients in + // CustomTransformData + if (cparams.speed_tier <= SpeedTier::kSquirrel) { + // TODO(lode): DownsampleImage2_Iterative is currently too slow to + // be used for squirrel, make it faster, and / or enable it only for + // kitten. + DownsampleImage2_Iterative(opsin); + } else { + DownsampleImage2_Sharper(opsin); + } + } else { + DownsampleImage(opsin, cparams.resampling); + } + PadImageToBlockMultipleInPlace(opsin); + } + + if (cparams.butteraugli_distance < 0) { + return JXL_FAILURE("Expected non-negative distance"); + } + + // Find and subtract splines. + if (cparams.speed_tier <= SpeedTier::kSquirrel) { + // If we do already have them, they were passed upstream to EncodeFile. + if (!shared.image_features.splines.HasAny()) { + shared.image_features.splines = FindSplines(*opsin); + } + JXL_RETURN_IF_ERROR(shared.image_features.splines.InitializeDrawCache( + opsin->xsize(), opsin->ysize(), shared.cmap)); + shared.image_features.splines.SubtractFrom(opsin); + } + + // Find and subtract patches/dots. + if (ApplyOverride(cparams.patches, + cparams.speed_tier <= SpeedTier::kSquirrel)) { + FindBestPatchDictionary(*opsin, enc_state, cms, pool, aux_out); + PatchDictionaryEncoder::SubtractFrom(shared.image_features.patches, opsin); + } + + static const float kAcQuant = 0.79f; + const float quant_dc = InitialQuantDC(cparams.butteraugli_distance); + Quantizer& quantizer = enc_state->shared.quantizer; + // We don't know the quant field yet, but for computing the global scale + // assuming that it will be the same as for Falcon mode is good enough. + quantizer.ComputeGlobalScaleAndQuant( + quant_dc, kAcQuant / cparams.butteraugli_distance, 0); + + // TODO(veluca): we can now run all the code from here to FindBestQuantizer + // (excluded) one rect at a time. Do that. + + // Dependency graph: + // + // input: either XYB or input image + // + // input image -> XYB [optional] + // XYB -> initial quant field + // XYB -> Gaborished XYB + // Gaborished XYB -> CfL1 + // initial quant field, Gaborished XYB, CfL1 -> ACS + // initial quant field, ACS, Gaborished XYB -> EPF control field + // initial quant field -> adjusted initial quant field + // adjusted initial quant field, ACS -> raw quant field + // raw quant field, ACS, Gaborished XYB -> CfL2 + // + // output: Gaborished XYB, CfL, ACS, raw quant field, EPF control field. + + ArControlFieldHeuristics ar_heuristics; + AcStrategyHeuristics acs_heuristics; + CfLHeuristics cfl_heuristics; + + if (!opsin->xsize()) { + JXL_ASSERT(HandlesColorConversion(cparams, *original_pixels)); + *opsin = Image3F(RoundUpToBlockDim(original_pixels->xsize()), + RoundUpToBlockDim(original_pixels->ysize())); + opsin->ShrinkTo(original_pixels->xsize(), original_pixels->ysize()); + ToXYB(*original_pixels, pool, opsin, cms, /*linear=*/nullptr); + PadImageToBlockMultipleInPlace(opsin); + } + + // Compute an initial estimate of the quantization field. + // Call InitialQuantField only in Hare mode or slower. Otherwise, rely + // on simple heuristics in FindBestAcStrategy, or set a constant for Falcon + // mode. + if (cparams.speed_tier > SpeedTier::kHare) { + enc_state->initial_quant_field = + ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks); + enc_state->initial_quant_masking = + ImageF(shared.frame_dim.xsize_blocks, shared.frame_dim.ysize_blocks); + float q = kAcQuant / cparams.butteraugli_distance; + FillImage(q, &enc_state->initial_quant_field); + FillImage(1.0f / (q + 0.001f), &enc_state->initial_quant_masking); + } else { + // Call this here, as it relies on pre-gaborish values. + float butteraugli_distance_for_iqf = cparams.butteraugli_distance; + if (!shared.frame_header.loop_filter.gab) { + butteraugli_distance_for_iqf *= 0.73f; + } + enc_state->initial_quant_field = InitialQuantField( + butteraugli_distance_for_iqf, *opsin, shared.frame_dim, pool, 1.0f, + &enc_state->initial_quant_masking); + quantizer.SetQuantField(quant_dc, enc_state->initial_quant_field, nullptr); + } + + // TODO(veluca): do something about animations. + + // Apply inverse-gaborish. + if (shared.frame_header.loop_filter.gab) { + // Unsure why better to do some more gaborish on X and B than Y. + float weight[3] = { + 1.0036278514398933f, + 0.99406123118127299f, + 0.99719338015886894f, + }; + GaborishInverse(opsin, weight, pool); + } + + FindBestDequantMatrices(cparams, *opsin, modular_frame_encoder, + &enc_state->shared.matrices); + + cfl_heuristics.Init(*opsin); + acs_heuristics.Init(*opsin, enc_state); + + auto process_tile = [&](const uint32_t tid, const size_t thread) { + size_t n_enc_tiles = + DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks); + size_t tx = tid % n_enc_tiles; + size_t ty = tid / n_enc_tiles; + size_t by0 = ty * kEncTileDimInBlocks; + size_t by1 = std::min((ty + 1) * kEncTileDimInBlocks, + enc_state->shared.frame_dim.ysize_blocks); + size_t bx0 = tx * kEncTileDimInBlocks; + size_t bx1 = std::min((tx + 1) * kEncTileDimInBlocks, + enc_state->shared.frame_dim.xsize_blocks); + Rect r(bx0, by0, bx1 - bx0, by1 - by0); + + // For speeds up to Wombat, we only compute the color correlation map + // once we know the transform type and the quantization map. + if (cparams.speed_tier <= SpeedTier::kSquirrel) { + cfl_heuristics.ComputeTile(r, *opsin, enc_state->shared.matrices, + /*ac_strategy=*/nullptr, + /*raw_quant_field=*/nullptr, + /*quantizer=*/nullptr, /*fast=*/false, thread, + &enc_state->shared.cmap); + } + + // Choose block sizes. + acs_heuristics.ProcessRect(r); + + // Choose amount of post-processing smoothing. + // TODO(veluca): should this go *after* AdjustQuantField? + ar_heuristics.RunRect(r, *opsin, enc_state, thread); + + // Always set the initial quant field, so we can compute the CfL map with + // more accuracy. The initial quant field might change in slower modes, but + // adjusting the quant field with butteraugli when all the other encoding + // parameters are fixed is likely a more reliable choice anyway. + AdjustQuantField(enc_state->shared.ac_strategy, r, + cparams.butteraugli_distance, + &enc_state->initial_quant_field); + quantizer.SetQuantFieldRect(enc_state->initial_quant_field, r, + &enc_state->shared.raw_quant_field); + + // Compute a non-default CfL map if we are at Hare speed, or slower. + if (cparams.speed_tier <= SpeedTier::kHare) { + cfl_heuristics.ComputeTile( + r, *opsin, enc_state->shared.matrices, &enc_state->shared.ac_strategy, + &enc_state->shared.raw_quant_field, &enc_state->shared.quantizer, + /*fast=*/cparams.speed_tier >= SpeedTier::kWombat, thread, + &enc_state->shared.cmap); + } + }; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, + DivCeil(enc_state->shared.frame_dim.xsize_blocks, kEncTileDimInBlocks) * + DivCeil(enc_state->shared.frame_dim.ysize_blocks, + kEncTileDimInBlocks), + [&](const size_t num_threads) { + ar_heuristics.PrepareForThreads(num_threads); + cfl_heuristics.PrepareForThreads(num_threads); + return true; + }, + process_tile, "Enc Heuristics")); + + acs_heuristics.Finalize(aux_out); + if (cparams.speed_tier <= SpeedTier::kHare) { + cfl_heuristics.ComputeDC(/*fast=*/cparams.speed_tier >= SpeedTier::kWombat, + &enc_state->shared.cmap); + } + + // Refine quantization levels. + FindBestQuantizer(original_pixels, *opsin, enc_state, cms, pool, aux_out); + + // Choose a context model that depends on the amount of quantization for AC. + if (cparams.speed_tier < SpeedTier::kFalcon) { + FindBestBlockEntropyModel(*enc_state); + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.h b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.h new file mode 100644 index 0000000000..3cb9b506a6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_heuristics.h @@ -0,0 +1,81 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_HEURISTICS_H_ +#define LIB_JXL_ENC_HEURISTICS_H_ + +// Hook for custom encoder heuristics (VarDCT only for now). + +#include +#include +#include + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/image.h" +#include "lib/jxl/modular/encoding/enc_ma.h" + +namespace jxl { + +struct AuxOut; +struct PassesEncoderState; +class DequantMatrices; +class ImageBundle; +class ModularFrameEncoder; + +class EncoderHeuristics { + public: + virtual ~EncoderHeuristics() = default; + // Initializes encoder structures in `enc_state` using the original image data + // in `original_pixels`, and the XYB image data in `opsin`. Also modifies the + // `opsin` image by applying Gaborish, and doing other modifications if + // necessary. `pool` is used for running the computations on multiple threads. + // `aux_out` collects statistics and can be used to print debug images. + virtual Status LossyFrameHeuristics( + PassesEncoderState* enc_state, ModularFrameEncoder* modular_frame_encoder, + const ImageBundle* original_pixels, Image3F* opsin, + const JxlCmsInterface& cms, ThreadPool* pool, AuxOut* aux_out) = 0; + + // Custom fixed tree for lossless mode. Must set `tree` to a valid tree if + // the function returns true. + virtual bool CustomFixedTreeLossless(const FrameDimensions& frame_dim, + Tree* tree) { + return false; + } + + // If this method returns `true`, the `opsin` parameter to + // LossyFrameHeuristics will not be initialized, and should be initialized + // during the call. Moreover, `original_pixels` may not be in a linear + // colorspace (but will be the same as the `ib` value passed to this + // function). + virtual bool HandlesColorConversion(const CompressParams& cparams, + const ImageBundle& ib) { + return false; + } +}; + +class DefaultEncoderHeuristics : public EncoderHeuristics { + public: + Status LossyFrameHeuristics(PassesEncoderState* enc_state, + ModularFrameEncoder* modular_frame_encoder, + const ImageBundle* original_pixels, + Image3F* opsin, const JxlCmsInterface& cms, + ThreadPool* pool, AuxOut* aux_out) override; + bool HandlesColorConversion(const CompressParams& cparams, + const ImageBundle& ib) override; +}; + +// Exposed here since it may be used by other EncoderHeuristics implementations +// outside this project. +void FindBestDequantMatrices(const CompressParams& cparams, + const Image3F& opsin, + ModularFrameEncoder* modular_frame_encoder, + DequantMatrices* dequant_matrices); + +} // namespace jxl + +#endif // LIB_JXL_ENC_HEURISTICS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman.cc b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.cc new file mode 100644 index 0000000000..3eab2c218a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.cc @@ -0,0 +1,214 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_huffman.h" + +#include +#include + +#include "lib/jxl/enc_huffman_tree.h" + +namespace jxl { + +namespace { + +constexpr int kCodeLengthCodes = 18; + +void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes, + const uint8_t* code_length_bitdepth, + BitWriter* writer) { + static const uint8_t kStorageOrder[kCodeLengthCodes] = { + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + // The bit lengths of the Huffman code over the code length alphabet + // are compressed with the following static Huffman code: + // Symbol Code + // ------ ---- + // 0 00 + // 1 1110 + // 2 110 + // 3 01 + // 4 10 + // 5 1111 + static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3, + 2, 1, 15}; + static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3, + 2, 2, 4}; + + // Throw away trailing zeros: + size_t codes_to_store = kCodeLengthCodes; + if (num_codes > 1) { + for (; codes_to_store > 0; --codes_to_store) { + if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) { + break; + } + } + } + size_t skip_some = 0; // skips none. + if (code_length_bitdepth[kStorageOrder[0]] == 0 && + code_length_bitdepth[kStorageOrder[1]] == 0) { + skip_some = 2; // skips two. + if (code_length_bitdepth[kStorageOrder[2]] == 0) { + skip_some = 3; // skips three. + } + } + writer->Write(2, skip_some); + for (size_t i = skip_some; i < codes_to_store; ++i) { + size_t l = code_length_bitdepth[kStorageOrder[i]]; + writer->Write(kHuffmanBitLengthHuffmanCodeBitLengths[l], + kHuffmanBitLengthHuffmanCodeSymbols[l]); + } +} + +void StoreHuffmanTreeToBitMask(const size_t huffman_tree_size, + const uint8_t* huffman_tree, + const uint8_t* huffman_tree_extra_bits, + const uint8_t* code_length_bitdepth, + const uint16_t* code_length_bitdepth_symbols, + BitWriter* writer) { + for (size_t i = 0; i < huffman_tree_size; ++i) { + size_t ix = huffman_tree[i]; + writer->Write(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix]); + // Extra bits + switch (ix) { + case 16: + writer->Write(2, huffman_tree_extra_bits[i]); + break; + case 17: + writer->Write(3, huffman_tree_extra_bits[i]); + break; + } + } +} + +void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4], + size_t num_symbols, size_t max_bits, + BitWriter* writer) { + // value of 1 indicates a simple Huffman code + writer->Write(2, 1); + writer->Write(2, num_symbols - 1); // NSYM - 1 + + // Sort + for (size_t i = 0; i < num_symbols; i++) { + for (size_t j = i + 1; j < num_symbols; j++) { + if (depths[symbols[j]] < depths[symbols[i]]) { + std::swap(symbols[j], symbols[i]); + } + } + } + + if (num_symbols == 2) { + writer->Write(max_bits, symbols[0]); + writer->Write(max_bits, symbols[1]); + } else if (num_symbols == 3) { + writer->Write(max_bits, symbols[0]); + writer->Write(max_bits, symbols[1]); + writer->Write(max_bits, symbols[2]); + } else { + writer->Write(max_bits, symbols[0]); + writer->Write(max_bits, symbols[1]); + writer->Write(max_bits, symbols[2]); + writer->Write(max_bits, symbols[3]); + // tree-select + writer->Write(1, depths[symbols[0]] == 1 ? 1 : 0); + } +} + +// num = alphabet size +// depths = symbol depths +void StoreHuffmanTree(const uint8_t* depths, size_t num, BitWriter* writer) { + // Write the Huffman tree into the compact representation. + std::unique_ptr arena(new uint8_t[2 * num]); + uint8_t* huffman_tree = arena.get(); + uint8_t* huffman_tree_extra_bits = arena.get() + num; + size_t huffman_tree_size = 0; + WriteHuffmanTree(depths, num, &huffman_tree_size, huffman_tree, + huffman_tree_extra_bits); + + // Calculate the statistics of the Huffman tree in the compact representation. + uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0}; + for (size_t i = 0; i < huffman_tree_size; ++i) { + ++huffman_tree_histogram[huffman_tree[i]]; + } + + int num_codes = 0; + int code = 0; + for (int i = 0; i < kCodeLengthCodes; ++i) { + if (huffman_tree_histogram[i]) { + if (num_codes == 0) { + code = i; + num_codes = 1; + } else if (num_codes == 1) { + num_codes = 2; + break; + } + } + } + + // Calculate another Huffman tree to use for compressing both the + // earlier Huffman tree with. + uint8_t code_length_bitdepth[kCodeLengthCodes] = {0}; + uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0}; + CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5, + &code_length_bitdepth[0]); + ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, + &code_length_bitdepth_symbols[0]); + + // Now, we have all the data, let's start storing it + StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, + writer); + + if (num_codes == 1) { + code_length_bitdepth[code] = 0; + } + + // Store the real huffman tree now. + StoreHuffmanTreeToBitMask(huffman_tree_size, huffman_tree, + huffman_tree_extra_bits, &code_length_bitdepth[0], + code_length_bitdepth_symbols, writer); +} + +} // namespace + +void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length, + uint8_t* depth, uint16_t* bits, + BitWriter* writer) { + size_t count = 0; + size_t s4[4] = {0}; + for (size_t i = 0; i < length; i++) { + if (histogram[i]) { + if (count < 4) { + s4[count] = i; + } else if (count > 4) { + break; + } + count++; + } + } + + size_t max_bits_counter = length - 1; + size_t max_bits = 0; + while (max_bits_counter) { + max_bits_counter >>= 1; + ++max_bits; + } + + if (count <= 1) { + // Output symbol bits and depths are initialized with 0, nothing to do. + writer->Write(4, 1); + writer->Write(max_bits, s4[0]); + return; + } + + CreateHuffmanTree(histogram, length, 15, depth); + ConvertBitDepthsToSymbols(depth, length, bits); + + if (count <= 4) { + StoreSimpleHuffmanTree(depth, s4, count, max_bits, writer); + } else { + StoreHuffmanTree(depth, length, writer); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman.h b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.h new file mode 100644 index 0000000000..d7a66584e8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_HUFFMAN_H_ +#define LIB_JXL_ENC_HUFFMAN_H_ + +#include "lib/jxl/enc_bit_writer.h" + +namespace jxl { + +// Builds a Huffman tree for the given histogram, and encodes it into writer +// in a format that can be read by HuffmanDecodingData::ReadFromBitstream. +// An allotment for `writer` must already have been created by the caller. +void BuildAndStoreHuffmanTree(const uint32_t* histogram, size_t length, + uint8_t* depth, uint16_t* bits, + BitWriter* writer); + +} // namespace jxl + +#endif // LIB_JXL_ENC_HUFFMAN_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.cc b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.cc new file mode 100644 index 0000000000..5c40dea770 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.cc @@ -0,0 +1,328 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_huffman_tree.h" + +#include +#include +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { + +void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, + uint8_t level) { + if (p.index_left >= 0) { + ++level; + SetDepth(pool[p.index_left], pool, depth, level); + SetDepth(pool[p.index_right_or_value], pool, depth, level); + } else { + depth[p.index_right_or_value] = level; + } +} + +// Sort the root nodes, least popular first. +static JXL_INLINE bool Compare(const HuffmanTree& v0, const HuffmanTree& v1) { + return v0.total_count < v1.total_count; +} + +// This function will create a Huffman tree. +// +// The catch here is that the tree cannot be arbitrarily deep. +// Brotli specifies a maximum depth of 15 bits for "code trees" +// and 7 bits for "code length code trees." +// +// count_limit is the value that is to be faked as the minimum value +// and this minimum value is raised until the tree matches the +// maximum length requirement. +// +// This algorithm is not of excellent performance for very long data blocks, +// especially when population counts are longer than 2**tree_limit, but +// we are not planning to use this with extremely long blocks. +// +// See http://en.wikipedia.org/wiki/Huffman_coding +void CreateHuffmanTree(const uint32_t* data, const size_t length, + const int tree_limit, uint8_t* depth) { + // For block sizes below 64 kB, we never need to do a second iteration + // of this loop. Probably all of our block sizes will be smaller than + // that, so this loop is mostly of academic interest. If we actually + // would need this, we would be better off with the Katajainen algorithm. + for (uint32_t count_limit = 1;; count_limit *= 2) { + std::vector tree; + tree.reserve(2 * length + 1); + + for (size_t i = length; i != 0;) { + --i; + if (data[i]) { + const uint32_t count = std::max(data[i], count_limit - 1); + tree.emplace_back(count, -1, static_cast(i)); + } + } + + const size_t n = tree.size(); + if (n == 1) { + // Fake value; will be fixed on upper level. + depth[tree[0].index_right_or_value] = 1; + break; + } + + std::stable_sort(tree.begin(), tree.end(), Compare); + + // The nodes are: + // [0, n): the sorted leaf nodes that we start with. + // [n]: we add a sentinel here. + // [n + 1, 2n): new parent nodes are added here, starting from + // (n+1). These are naturally in ascending order. + // [2n]: we add a sentinel at the end as well. + // There will be (2n+1) elements at the end. + const HuffmanTree sentinel(std::numeric_limits::max(), -1, -1); + tree.push_back(sentinel); + tree.push_back(sentinel); + + size_t i = 0; // Points to the next leaf node. + size_t j = n + 1; // Points to the next non-leaf node. + for (size_t k = n - 1; k != 0; --k) { + size_t left, right; + if (tree[i].total_count <= tree[j].total_count) { + left = i; + ++i; + } else { + left = j; + ++j; + } + if (tree[i].total_count <= tree[j].total_count) { + right = i; + ++i; + } else { + right = j; + ++j; + } + + // The sentinel node becomes the parent node. + size_t j_end = tree.size() - 1; + tree[j_end].total_count = + tree[left].total_count + tree[right].total_count; + tree[j_end].index_left = static_cast(left); + tree[j_end].index_right_or_value = static_cast(right); + + // Add back the last sentinel node. + tree.push_back(sentinel); + } + JXL_DASSERT(tree.size() == 2 * n + 1); + SetDepth(tree[2 * n - 1], &tree[0], depth, 0); + + // We need to pack the Huffman tree in tree_limit bits. + // If this was not successful, add fake entities to the lowest values + // and retry. + if (*std::max_element(&depth[0], &depth[length]) <= tree_limit) { + break; + } + } +} + +void Reverse(uint8_t* v, size_t start, size_t end) { + --end; + while (start < end) { + uint8_t tmp = v[start]; + v[start] = v[end]; + v[end] = tmp; + ++start; + --end; + } +} + +void WriteHuffmanTreeRepetitions(const uint8_t previous_value, + const uint8_t value, size_t repetitions, + size_t* tree_size, uint8_t* tree, + uint8_t* extra_bits_data) { + JXL_DASSERT(repetitions > 0); + if (previous_value != value) { + tree[*tree_size] = value; + extra_bits_data[*tree_size] = 0; + ++(*tree_size); + --repetitions; + } + if (repetitions == 7) { + tree[*tree_size] = value; + extra_bits_data[*tree_size] = 0; + ++(*tree_size); + --repetitions; + } + if (repetitions < 3) { + for (size_t i = 0; i < repetitions; ++i) { + tree[*tree_size] = value; + extra_bits_data[*tree_size] = 0; + ++(*tree_size); + } + } else { + repetitions -= 3; + size_t start = *tree_size; + while (true) { + tree[*tree_size] = 16; + extra_bits_data[*tree_size] = repetitions & 0x3; + ++(*tree_size); + repetitions >>= 2; + if (repetitions == 0) { + break; + } + --repetitions; + } + Reverse(tree, start, *tree_size); + Reverse(extra_bits_data, start, *tree_size); + } +} + +void WriteHuffmanTreeRepetitionsZeros(size_t repetitions, size_t* tree_size, + uint8_t* tree, uint8_t* extra_bits_data) { + if (repetitions == 11) { + tree[*tree_size] = 0; + extra_bits_data[*tree_size] = 0; + ++(*tree_size); + --repetitions; + } + if (repetitions < 3) { + for (size_t i = 0; i < repetitions; ++i) { + tree[*tree_size] = 0; + extra_bits_data[*tree_size] = 0; + ++(*tree_size); + } + } else { + repetitions -= 3; + size_t start = *tree_size; + while (true) { + tree[*tree_size] = 17; + extra_bits_data[*tree_size] = repetitions & 0x7; + ++(*tree_size); + repetitions >>= 3; + if (repetitions == 0) { + break; + } + --repetitions; + } + Reverse(tree, start, *tree_size); + Reverse(extra_bits_data, start, *tree_size); + } +} + +static void DecideOverRleUse(const uint8_t* depth, const size_t length, + bool* use_rle_for_non_zero, + bool* use_rle_for_zero) { + size_t total_reps_zero = 0; + size_t total_reps_non_zero = 0; + size_t count_reps_zero = 1; + size_t count_reps_non_zero = 1; + for (size_t i = 0; i < length;) { + const uint8_t value = depth[i]; + size_t reps = 1; + for (size_t k = i + 1; k < length && depth[k] == value; ++k) { + ++reps; + } + if (reps >= 3 && value == 0) { + total_reps_zero += reps; + ++count_reps_zero; + } + if (reps >= 4 && value != 0) { + total_reps_non_zero += reps; + ++count_reps_non_zero; + } + i += reps; + } + *use_rle_for_non_zero = total_reps_non_zero > count_reps_non_zero * 2; + *use_rle_for_zero = total_reps_zero > count_reps_zero * 2; +} + +void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size, + uint8_t* tree, uint8_t* extra_bits_data) { + uint8_t previous_value = 8; + + // Throw away trailing zeros. + size_t new_length = length; + for (size_t i = 0; i < length; ++i) { + if (depth[length - i - 1] == 0) { + --new_length; + } else { + break; + } + } + + // First gather statistics on if it is a good idea to do rle. + bool use_rle_for_non_zero = false; + bool use_rle_for_zero = false; + if (length > 50) { + // Find rle coding for longer codes. + // Shorter codes seem not to benefit from rle. + DecideOverRleUse(depth, new_length, &use_rle_for_non_zero, + &use_rle_for_zero); + } + + // Actual rle coding. + for (size_t i = 0; i < new_length;) { + const uint8_t value = depth[i]; + size_t reps = 1; + if ((value != 0 && use_rle_for_non_zero) || + (value == 0 && use_rle_for_zero)) { + for (size_t k = i + 1; k < new_length && depth[k] == value; ++k) { + ++reps; + } + } + if (value == 0) { + WriteHuffmanTreeRepetitionsZeros(reps, tree_size, tree, extra_bits_data); + } else { + WriteHuffmanTreeRepetitions(previous_value, value, reps, tree_size, tree, + extra_bits_data); + previous_value = value; + } + i += reps; + } +} + +namespace { + +uint16_t ReverseBits(int num_bits, uint16_t bits) { + static const size_t kLut[16] = {// Pre-reversed 4-bit values. + 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, + 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf}; + size_t retval = kLut[bits & 0xf]; + for (int i = 4; i < num_bits; i += 4) { + retval <<= 4; + bits = static_cast(bits >> 4); + retval |= kLut[bits & 0xf]; + } + retval >>= (-num_bits & 0x3); + return static_cast(retval); +} + +} // namespace + +void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, + uint16_t* bits) { + // In Brotli, all bit depths are [1..15] + // 0 bit depth means that the symbol does not exist. + const int kMaxBits = 16; // 0..15 are values for bits + uint16_t bl_count[kMaxBits] = {0}; + { + for (size_t i = 0; i < len; ++i) { + ++bl_count[depth[i]]; + } + bl_count[0] = 0; + } + uint16_t next_code[kMaxBits]; + next_code[0] = 0; + { + int code = 0; + for (size_t i = 1; i < kMaxBits; ++i) { + code = (code + bl_count[i - 1]) << 1; + next_code[i] = static_cast(code); + } + } + for (size_t i = 0; i < len; ++i) { + if (depth[i]) { + bits[i] = ReverseBits(depth[i], next_code[depth[i]]++); + } + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.h b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.h new file mode 100644 index 0000000000..7d716cd3b5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_huffman_tree.h @@ -0,0 +1,52 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Library for creating Huffman codes from population counts. + +#ifndef LIB_JXL_HUFFMAN_TREE_H_ +#define LIB_JXL_HUFFMAN_TREE_H_ + +#include +#include + +namespace jxl { + +// A node of a Huffman tree. +struct HuffmanTree { + HuffmanTree(uint32_t count, int16_t left, int16_t right) + : total_count(count), index_left(left), index_right_or_value(right) {} + uint32_t total_count; + int16_t index_left; + int16_t index_right_or_value; +}; + +void SetDepth(const HuffmanTree& p, HuffmanTree* pool, uint8_t* depth, + uint8_t level); + +// This function will create a Huffman tree. +// +// The (data,length) contains the population counts. +// The tree_limit is the maximum bit depth of the Huffman codes. +// +// The depth contains the tree, i.e., how many bits are used for +// the symbol. +// +// See http://en.wikipedia.org/wiki/Huffman_coding +void CreateHuffmanTree(const uint32_t* data, size_t length, int tree_limit, + uint8_t* depth); + +// Write a Huffman tree from bit depths into the bitstream representation +// of a Huffman tree. The generated Huffman tree is to be compressed once +// more using a Huffman tree +void WriteHuffmanTree(const uint8_t* depth, size_t length, size_t* tree_size, + uint8_t* tree, uint8_t* extra_bits_data); + +// Get the actual bit values for a tree of bit depths. +void ConvertBitDepthsToSymbols(const uint8_t* depth, size_t len, + uint16_t* bits); + +} // namespace jxl + +#endif // LIB_JXL_HUFFMAN_TREE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.cc b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.cc new file mode 100644 index 0000000000..a6782f6a45 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.cc @@ -0,0 +1,406 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_icc_codec.h" + +#include + +#include +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/icc_codec_common.h" + +namespace jxl { +namespace { + +// Unshuffles or de-interleaves bytes, for example with width 2, turns +// "AaBbCcDc" into "ABCDabcd", this for example de-interleaves UTF-16 bytes into +// first all the high order bytes, then all the low order bytes. +// Transposes a matrix of width columns and ceil(size / width) rows. There are +// size elements, size may be < width * height, if so the +// last elements of the bottom row are missing, the missing spots are +// transposed along with the filled spots, and the result has the missing +// elements at the bottom of the rightmost column. The input is the input matrix +// in scanline order, the output is the result matrix in scanline order, with +// missing elements skipped over (this may occur at multiple positions). +void Unshuffle(uint8_t* data, size_t size, size_t width) { + size_t height = (size + width - 1) / width; // amount of rows of input + PaddedBytes result(size); + // i = input index, j output index + size_t s = 0, j = 0; + for (size_t i = 0; i < size; i++) { + result[j] = data[i]; + j += height; + if (j >= size) j = ++s; + } + + for (size_t i = 0; i < size; i++) { + data[i] = result[i]; + } +} + +// This is performed by the encoder, the encoder must be able to encode any +// random byte stream (not just byte streams that are a valid ICC profile), so +// an error returned by this function is an implementation error. +Status PredictAndShuffle(size_t stride, size_t width, int order, size_t num, + const uint8_t* data, size_t size, size_t* pos, + PaddedBytes* result) { + JXL_RETURN_IF_ERROR(CheckOutOfBounds(*pos, num, size)); + // Required by the specification, see decoder. stride * 4 must be < *pos. + if (!*pos || ((*pos - 1u) >> 2u) < stride) { + return JXL_FAILURE("Invalid stride"); + } + if (*pos < stride * 4) return JXL_FAILURE("Too large stride"); + size_t start = result->size(); + for (size_t i = 0; i < num; i++) { + uint8_t predicted = + LinearPredictICCValue(data, *pos, i, stride, width, order); + result->push_back(data[*pos + i] - predicted); + } + *pos += num; + if (width > 1) Unshuffle(result->data() + start, num, width); + return true; +} +} // namespace + +// Outputs a transformed form of the given icc profile. The result itself is +// not particularly smaller than the input data in bytes, but it will be in a +// form that is easier to compress (more zeroes, ...) and will compress better +// with brotli. +Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result) { + PaddedBytes commands; + PaddedBytes data; + + EncodeVarInt(size, result); + + // Header + PaddedBytes header = ICCInitialHeaderPrediction(); + EncodeUint32(0, size, &header); + for (size_t i = 0; i < kICCHeaderSize && i < size; i++) { + ICCPredictHeader(icc, size, header.data(), i); + data.push_back(icc[i] - header[i]); + } + if (size <= kICCHeaderSize) { + EncodeVarInt(0, result); // 0 commands + for (size_t i = 0; i < data.size(); i++) { + result->push_back(data[i]); + } + return true; + } + + std::vector tags; + std::vector tagstarts; + std::vector tagsizes; + std::map tagmap; + + // Tag list + size_t pos = kICCHeaderSize; + if (pos + 4 <= size) { + uint64_t numtags = DecodeUint32(icc, size, pos); + pos += 4; + EncodeVarInt(numtags + 1, &commands); + uint64_t prevtagstart = kICCHeaderSize + numtags * 12; + uint32_t prevtagsize = 0; + for (size_t i = 0; i < numtags; i++) { + if (pos + 12 > size) break; + + Tag tag = DecodeKeyword(icc, size, pos + 0); + uint32_t tagstart = DecodeUint32(icc, size, pos + 4); + uint32_t tagsize = DecodeUint32(icc, size, pos + 8); + pos += 12; + + tags.push_back(tag); + tagstarts.push_back(tagstart); + tagsizes.push_back(tagsize); + tagmap[tagstart] = tags.size() - 1; + + uint8_t tagcode = kCommandTagUnknown; + for (size_t j = 0; j < kNumTagStrings; j++) { + if (tag == *kTagStrings[j]) { + tagcode = j + kCommandTagStringFirst; + break; + } + } + + if (tag == kRtrcTag && pos + 24 < size) { + bool ok = true; + ok &= DecodeKeyword(icc, size, pos + 0) == kGtrcTag; + ok &= DecodeKeyword(icc, size, pos + 12) == kBtrcTag; + if (ok) { + for (size_t kk = 0; kk < 8; kk++) { + if (icc[pos - 8 + kk] != icc[pos + 4 + kk]) ok = false; + if (icc[pos - 8 + kk] != icc[pos + 16 + kk]) ok = false; + } + } + if (ok) { + tagcode = kCommandTagTRC; + pos += 24; + i += 2; + } + } + + if (tag == kRxyzTag && pos + 24 < size) { + bool ok = true; + ok &= DecodeKeyword(icc, size, pos + 0) == kGxyzTag; + ok &= DecodeKeyword(icc, size, pos + 12) == kBxyzTag; + uint32_t offsetr = tagstart; + uint32_t offsetg = DecodeUint32(icc, size, pos + 4); + uint32_t offsetb = DecodeUint32(icc, size, pos + 16); + uint32_t sizer = tagsize; + uint32_t sizeg = DecodeUint32(icc, size, pos + 8); + uint32_t sizeb = DecodeUint32(icc, size, pos + 20); + ok &= sizer == 20; + ok &= sizeg == 20; + ok &= sizeb == 20; + ok &= (offsetg == offsetr + 20); + ok &= (offsetb == offsetr + 40); + if (ok) { + tagcode = kCommandTagXYZ; + pos += 24; + i += 2; + } + } + + uint8_t command = tagcode; + uint64_t predicted_tagstart = prevtagstart + prevtagsize; + if (predicted_tagstart != tagstart) command |= kFlagBitOffset; + size_t predicted_tagsize = prevtagsize; + if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag || + tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag || + tag == kLumiTag) { + predicted_tagsize = 20; + } + if (predicted_tagsize != tagsize) command |= kFlagBitSize; + commands.push_back(command); + if (tagcode == 1) { + AppendKeyword(tag, &data); + } + if (command & kFlagBitOffset) EncodeVarInt(tagstart, &commands); + if (command & kFlagBitSize) EncodeVarInt(tagsize, &commands); + + prevtagstart = tagstart; + prevtagsize = tagsize; + } + } + // Indicate end of tag list or varint indicating there's none + commands.push_back(0); + + // Main content + // The main content in a valid ICC profile contains tagged elements, with the + // tag types (4 letter names) given by the tag list above, and the tag list + // pointing to the start and indicating the size of each tagged element. It is + // allowed for tagged elements to overlap, e.g. the curve for R, G and B could + // all point to the same one. + Tag tag; + size_t tagstart = 0, tagsize = 0, clutstart = 0; + + size_t last0 = pos; + // This loop appends commands to the output, processing some sub-section of a + // current tagged element each time. We need to keep track of the tagtype of + // the current element, and update it when we encounter the boundary of a + // next one. + // It is not required that the input data is a valid ICC profile, if the + // encoder does not recognize the data it will still be able to output bytes + // but will not predict as well. + while (pos <= size) { + size_t last1 = pos; + PaddedBytes commands_add; + PaddedBytes data_add; + + // This means the loop brought the position beyond the tag end. + if (pos > tagstart + tagsize) { + tag = {{0, 0, 0, 0}}; // nonsensical value + } + + if (commands_add.empty() && data_add.empty() && tagmap.count(pos) && + pos + 4 <= size) { + size_t index = tagmap[pos]; + tag = DecodeKeyword(icc, size, pos); + tagstart = tagstarts[index]; + tagsize = tagsizes[index]; + + if (tag == kMlucTag && pos + tagsize <= size && tagsize > 8 && + icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 && + icc[pos + 7] == 0) { + size_t num = tagsize - 8; + commands_add.push_back(kCommandTypeStartFirst + 3); + pos += 8; + commands_add.push_back(kCommandShuffle2); + EncodeVarInt(num, &commands_add); + size_t start = data_add.size(); + for (size_t i = 0; i < num; i++) { + data_add.push_back(icc[pos]); + pos++; + } + Unshuffle(data_add.data() + start, num, 2); + } + + if (tag == kCurvTag && pos + tagsize <= size && tagsize > 8 && + icc[pos + 4] == 0 && icc[pos + 5] == 0 && icc[pos + 6] == 0 && + icc[pos + 7] == 0) { + size_t num = tagsize - 8; + if (num > 16 && num < (1 << 28) && pos + num <= size && pos > 0) { + commands_add.push_back(kCommandTypeStartFirst + 5); + pos += 8; + commands_add.push_back(kCommandPredict); + int order = 1, width = 2, stride = width; + commands_add.push_back((order << 2) | (width - 1)); + EncodeVarInt(num, &commands_add); + JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc, + size, &pos, &data_add)); + } + } + } + + if (tag == kMab_Tag || tag == kMba_Tag) { + Tag subTag = DecodeKeyword(icc, size, pos); + if (pos + 12 < size && (subTag == kCurvTag || subTag == kVcgtTag) && + DecodeUint32(icc, size, pos + 4) == 0) { + uint32_t num = DecodeUint32(icc, size, pos + 8) * 2; + if (num > 16 && num < (1 << 28) && pos + 12 + num <= size) { + pos += 12; + last1 = pos; + commands_add.push_back(kCommandPredict); + int order = 1, width = 2, stride = width; + commands_add.push_back((order << 2) | (width - 1)); + EncodeVarInt(num, &commands_add); + JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc, + size, &pos, &data_add)); + } + } + + if (pos == tagstart + 24 && pos + 4 < size) { + // Note that this value can be remembered for next iterations of the + // loop, so the "pos == clutstart" if below can trigger during a later + // iteration. + clutstart = tagstart + DecodeUint32(icc, size, pos); + } + + if (pos == clutstart && clutstart + 16 < size) { + size_t numi = icc[tagstart + 8]; + size_t numo = icc[tagstart + 9]; + size_t width = icc[clutstart + 16]; + size_t stride = width * numo; + size_t num = width * numo; + for (size_t i = 0; i < numi && clutstart + i < size; i++) { + num *= icc[clutstart + i]; + } + if ((width == 1 || width == 2) && num > 64 && num < (1 << 28) && + pos + num <= size && pos > stride * 4) { + commands_add.push_back(kCommandPredict); + int order = 1; + uint8_t flags = + (order << 2) | (width - 1) | (stride == width ? 0 : 16); + commands_add.push_back(flags); + if (flags & 16) EncodeVarInt(stride, &commands_add); + EncodeVarInt(num, &commands_add); + JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc, + size, &pos, &data_add)); + } + } + } + + if (commands_add.empty() && data_add.empty() && tag == kGbd_Tag && + pos == tagstart + 8 && pos + tagsize - 8 <= size && pos > 16 && + tagsize > 8) { + size_t width = 4, order = 0, stride = width; + size_t num = tagsize - 8; + uint8_t flags = (order << 2) | (width - 1) | (stride == width ? 0 : 16); + commands_add.push_back(kCommandPredict); + commands_add.push_back(flags); + if (flags & 16) EncodeVarInt(stride, &commands_add); + EncodeVarInt(num, &commands_add); + JXL_RETURN_IF_ERROR(PredictAndShuffle(stride, width, order, num, icc, + size, &pos, &data_add)); + } + + if (commands_add.empty() && data_add.empty() && pos + 20 <= size) { + Tag subTag = DecodeKeyword(icc, size, pos); + if (subTag == kXyz_Tag && DecodeUint32(icc, size, pos + 4) == 0) { + commands_add.push_back(kCommandXYZ); + pos += 8; + for (size_t j = 0; j < 12; j++) data_add.push_back(icc[pos++]); + } + } + + if (commands_add.empty() && data_add.empty() && pos + 8 <= size) { + if (DecodeUint32(icc, size, pos + 4) == 0) { + Tag subTag = DecodeKeyword(icc, size, pos); + for (size_t i = 0; i < kNumTypeStrings; i++) { + if (subTag == *kTypeStrings[i]) { + commands_add.push_back(kCommandTypeStartFirst + i); + pos += 8; + break; + } + } + } + } + + if (!(commands_add.empty() && data_add.empty()) || pos == size) { + if (last0 < last1) { + commands.push_back(kCommandInsert); + EncodeVarInt(last1 - last0, &commands); + while (last0 < last1) { + data.push_back(icc[last0++]); + } + } + for (size_t i = 0; i < commands_add.size(); i++) { + commands.push_back(commands_add[i]); + } + for (size_t i = 0; i < data_add.size(); i++) { + data.push_back(data_add[i]); + } + last0 = pos; + } + if (commands_add.empty() && data_add.empty()) { + pos++; + } + } + + EncodeVarInt(commands.size(), result); + for (size_t i = 0; i < commands.size(); i++) { + result->push_back(commands[i]); + } + for (size_t i = 0; i < data.size(); i++) { + result->push_back(data[i]); + } + + return true; +} + +Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer, + size_t layer, AuxOut* JXL_RESTRICT aux_out) { + if (icc.empty()) return JXL_FAILURE("ICC must be non-empty"); + PaddedBytes enc; + JXL_RETURN_IF_ERROR(PredictICC(icc.data(), icc.size(), &enc)); + std::vector> tokens(1); + BitWriter::Allotment allotment(writer, 128); + JXL_RETURN_IF_ERROR(U64Coder::Write(enc.size(), writer)); + allotment.ReclaimAndCharge(writer, layer, aux_out); + + for (size_t i = 0; i < enc.size(); i++) { + tokens[0].emplace_back( + ICCANSContext(i, i > 0 ? enc[i - 1] : 0, i > 1 ? enc[i - 2] : 0), + enc[i]); + } + HistogramParams params; + params.lz77_method = enc.size() < 4096 ? HistogramParams::LZ77Method::kOptimal + : HistogramParams::LZ77Method::kLZ77; + EntropyEncodingData code; + std::vector context_map; + params.force_huffman = true; + BuildAndEncodeHistograms(params, kNumICCContexts, tokens, &code, &context_map, + writer, layer, aux_out); + WriteTokens(tokens[0], code, context_map, writer, layer, aux_out); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.h b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.h new file mode 100644 index 0000000000..c22cf5994e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_icc_codec.h @@ -0,0 +1,33 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_ICC_CODEC_H_ +#define LIB_JXL_ENC_ICC_CODEC_H_ + +// Compressed representation of ICC profiles. + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_bit_writer.h" + +namespace jxl { + +struct AuxOut; + +// Should still be called if `icc.empty()` - if so, writes only 1 bit. +Status WriteICC(const PaddedBytes& icc, BitWriter* JXL_RESTRICT writer, + size_t layer, AuxOut* JXL_RESTRICT aux_out); + +// Exposed only for testing +Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result); + +} // namespace jxl + +#endif // LIB_JXL_ENC_ICC_CODEC_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.cc b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.cc new file mode 100644 index 0000000000..0eab1c3e08 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.cc @@ -0,0 +1,155 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_image_bundle.h" + +#include + +#include +#include +#include + +#include "lib/jxl/alpha.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +namespace { + +// Copies ib:rect, converts, and copies into out. +Status CopyToT(const ImageMetadata* metadata, const ImageBundle* ib, + const Rect& rect, const ColorEncoding& c_desired, + const JxlCmsInterface& cms, ThreadPool* pool, Image3F* out) { + ColorSpaceTransform c_transform(cms); + // Changing IsGray is probably a bug. + JXL_CHECK(ib->IsGray() == c_desired.IsGray()); + bool is_gray = ib->IsGray(); + if (out->xsize() < rect.xsize() || out->ysize() < rect.ysize()) { + *out = Image3F(rect.xsize(), rect.ysize()); + } else { + out->ShrinkTo(rect.xsize(), rect.ysize()); + } + std::atomic ok{true}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, rect.ysize(), + [&](const size_t num_threads) { + return c_transform.Init(ib->c_current(), c_desired, + metadata->IntensityTarget(), rect.xsize(), + num_threads); + }, + [&](const uint32_t y, const size_t thread) { + float* mutable_src_buf = c_transform.BufSrc(thread); + const float* src_buf = mutable_src_buf; + // Interleave input. + if (is_gray) { + src_buf = rect.ConstPlaneRow(ib->color(), 0, y); + } else if (ib->c_current().IsCMYK()) { + if (!ib->HasBlack()) { + ok.store(false); + return; + } + const float* JXL_RESTRICT row_in0 = + rect.ConstPlaneRow(ib->color(), 0, y); + const float* JXL_RESTRICT row_in1 = + rect.ConstPlaneRow(ib->color(), 1, y); + const float* JXL_RESTRICT row_in2 = + rect.ConstPlaneRow(ib->color(), 2, y); + const float* JXL_RESTRICT row_in3 = rect.ConstRow(ib->black(), y); + for (size_t x = 0; x < rect.xsize(); x++) { + // CMYK convention in JXL: 0 = max ink, 1 = white + mutable_src_buf[4 * x + 0] = row_in0[x]; + mutable_src_buf[4 * x + 1] = row_in1[x]; + mutable_src_buf[4 * x + 2] = row_in2[x]; + mutable_src_buf[4 * x + 3] = row_in3[x]; + } + } else { + const float* JXL_RESTRICT row_in0 = + rect.ConstPlaneRow(ib->color(), 0, y); + const float* JXL_RESTRICT row_in1 = + rect.ConstPlaneRow(ib->color(), 1, y); + const float* JXL_RESTRICT row_in2 = + rect.ConstPlaneRow(ib->color(), 2, y); + for (size_t x = 0; x < rect.xsize(); x++) { + mutable_src_buf[3 * x + 0] = row_in0[x]; + mutable_src_buf[3 * x + 1] = row_in1[x]; + mutable_src_buf[3 * x + 2] = row_in2[x]; + } + } + float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread); + if (!c_transform.Run(thread, src_buf, dst_buf)) { + ok.store(false); + return; + } + float* JXL_RESTRICT row_out0 = out->PlaneRow(0, y); + float* JXL_RESTRICT row_out1 = out->PlaneRow(1, y); + float* JXL_RESTRICT row_out2 = out->PlaneRow(2, y); + // De-interleave output and convert type. + if (is_gray) { + for (size_t x = 0; x < rect.xsize(); x++) { + row_out0[x] = dst_buf[x]; + row_out1[x] = dst_buf[x]; + row_out2[x] = dst_buf[x]; + } + } else { + for (size_t x = 0; x < rect.xsize(); x++) { + row_out0[x] = dst_buf[3 * x + 0]; + row_out1[x] = dst_buf[3 * x + 1]; + row_out2[x] = dst_buf[3 * x + 2]; + } + } + }, + "Colorspace transform")); + return ok.load(); +} + +} // namespace + +Status ImageBundle::TransformTo(const ColorEncoding& c_desired, + const JxlCmsInterface& cms, ThreadPool* pool) { + JXL_RETURN_IF_ERROR(CopyTo(Rect(color_), c_desired, cms, &color_, pool)); + c_current_ = c_desired; + return true; +} +Status ImageBundle::CopyTo(const Rect& rect, const ColorEncoding& c_desired, + const JxlCmsInterface& cms, Image3F* out, + ThreadPool* pool) const { + return CopyToT(metadata_, this, rect, c_desired, cms, pool, out); +} +Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired, + const JxlCmsInterface& cms, ThreadPool* pool, + ImageBundle* store, const ImageBundle** out) { + if (in.c_current().SameColorEncoding(c_desired) && !in.HasBlack()) { + *out = ∈ + return true; + } + // TODO(janwas): avoid copying via createExternal+copyBackToIO + // instead of copy+createExternal+copyBackToIO + Image3F color(in.color().xsize(), in.color().ysize()); + CopyImageTo(in.color(), &color); + store->SetFromImage(std::move(color), in.c_current()); + + // Must at least copy the alpha channel for use by external_image. + if (in.HasExtraChannels()) { + std::vector extra_channels; + for (const ImageF& extra_channel : in.extra_channels()) { + ImageF ec(extra_channel.xsize(), extra_channel.ysize()); + CopyImageTo(extra_channel, &ec); + extra_channels.emplace_back(std::move(ec)); + } + store->SetExtraChannels(std::move(extra_channels)); + } + + if (!store->TransformTo(c_desired, cms, pool)) { + return false; + } + *out = store; + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.h b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.h new file mode 100644 index 0000000000..85f8e14e1c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_image_bundle.h @@ -0,0 +1,25 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_IMAGE_BUNDLE_H_ +#define LIB_JXL_ENC_IMAGE_BUNDLE_H_ + +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +// Does color transformation from in.c_current() to c_desired if the color +// encodings are different, or nothing if they are already the same. +// If color transformation is done, stores the transformed values into store and +// sets the out pointer to store, else leaves store untouched and sets the out +// pointer to &in. +// Returns false if color transform fails. +Status TransformIfNeeded(const ImageBundle& in, const ColorEncoding& c_desired, + const JxlCmsInterface& cms, ThreadPool* pool, + ImageBundle* store, const ImageBundle** out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_IMAGE_BUNDLE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_jxl_skcms.h b/third-party/libjxl/libjxl/lib/jxl/enc_jxl_skcms.h new file mode 100644 index 0000000000..3c364e883d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_jxl_skcms.h @@ -0,0 +1,54 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_JXL_SKCMS_H_ +#define LIB_JXL_ENC_JXL_SKCMS_H_ + +// skcms wrapper to rename the skcms symbols to avoid conflicting names with +// other projects using skcms as well. When using JPEGXL_BUNDLE_SKCMS the +// bundled functions will be renamed from skcms_ to jxl_skcms_ + +#ifdef SKCMS_API +#error "Must include enc_jxl_skcms.h and not skcms.h directly" +#endif // SKCMS_API + +#if JPEGXL_BUNDLE_SKCMS + +#define skcms_252_random_bytes jxl_skcms_252_random_bytes +#define skcms_AdaptToXYZD50 jxl_skcms_AdaptToXYZD50 +#define skcms_ApproximateCurve jxl_skcms_ApproximateCurve +#define skcms_ApproximatelyEqualProfiles jxl_skcms_ApproximatelyEqualProfiles +#define skcms_AreApproximateInverses jxl_skcms_AreApproximateInverses +#define skcms_GetCHAD jxl_skcms_GetCHAD +#define skcms_GetTagByIndex jxl_skcms_GetTagByIndex +#define skcms_GetTagBySignature jxl_skcms_GetTagBySignature +#define skcms_GetWTPT jxl_skcms_GetWTPT +#define skcms_Identity_TransferFunction jxl_skcms_Identity_TransferFunction +#define skcms_MakeUsableAsDestination jxl_skcms_MakeUsableAsDestination +#define skcms_MakeUsableAsDestinationWithSingleCurve \ + jxl_skcms_MakeUsableAsDestinationWithSingleCurve +#define skcms_Matrix3x3_concat jxl_skcms_Matrix3x3_concat +#define skcms_Matrix3x3_invert jxl_skcms_Matrix3x3_invert +#define skcms_MaxRoundtripError jxl_skcms_MaxRoundtripError +#define skcms_Parse jxl_skcms_Parse +#define skcms_PrimariesToXYZD50 jxl_skcms_PrimariesToXYZD50 +#define skcms_sRGB_Inverse_TransferFunction \ + jxl_skcms_sRGB_Inverse_TransferFunction +#define skcms_sRGB_profile jxl_skcms_sRGB_profile +#define skcms_sRGB_TransferFunction jxl_skcms_sRGB_TransferFunction +#define skcms_TransferFunction_eval jxl_skcms_TransferFunction_eval +#define skcms_TransferFunction_invert jxl_skcms_TransferFunction_invert +#define skcms_TransferFunction_makeHLGish jxl_skcms_TransferFunction_makeHLGish +#define skcms_TransferFunction_makePQish jxl_skcms_TransferFunction_makePQish +#define skcms_Transform jxl_skcms_Transform +#define skcms_TransformWithPalette jxl_skcms_TransformWithPalette +#define skcms_TRCs_AreApproximateInverse jxl_skcms_TRCs_AreApproximateInverse +#define skcms_XYZD50_profile jxl_skcms_XYZD50_profile + +#endif // JPEGXL_BUNDLE_SKCMS + +#include "skcms.h" + +#endif // LIB_JXL_ENC_JXL_SKCMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_linalg.cc b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.cc new file mode 100644 index 0000000000..fe2090a909 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.cc @@ -0,0 +1,52 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_linalg.h" + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +void ConvertToDiagonal(const ImageD& A, ImageD* const JXL_RESTRICT diag, + ImageD* const JXL_RESTRICT U) { +#if JXL_ENABLE_ASSERT + JXL_ASSERT(A.xsize() == 2); + JXL_ASSERT(A.ysize() == 2); + JXL_ASSERT(std::abs(A.Row(0)[1] - A.Row(1)[0]) < 1e-15); +#endif + + if (std::abs(A.ConstRow(0)[1]) < 1e-15) { + // Already diagonal. + diag->Row(0)[0] = A.ConstRow(0)[0]; + diag->Row(0)[1] = A.ConstRow(1)[1]; + U->Row(0)[0] = U->Row(1)[1] = 1.0; + U->Row(0)[1] = U->Row(1)[0] = 0.0; + return; + } + double b = -(A.Row(0)[0] + A.Row(1)[1]); + double c = A.Row(0)[0] * A.Row(1)[1] - A.Row(0)[1] * A.Row(0)[1]; + double d = b * b - 4.0 * c; + double sqd = std::sqrt(d); + double l1 = (-b - sqd) * 0.5; + double l2 = (-b + sqd) * 0.5; + + double v1[2] = {A.Row(0)[0] - l1, A.Row(1)[0]}; + double v1n = 1.0 / std::hypot(v1[0], v1[1]); + v1[0] = v1[0] * v1n; + v1[1] = v1[1] * v1n; + + diag->Row(0)[0] = l1; + diag->Row(0)[1] = l2; + + U->Row(0)[0] = v1[1]; + U->Row(0)[1] = -v1[0]; + U->Row(1)[0] = v1[0]; + U->Row(1)[1] = v1[1]; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_linalg.h b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.h new file mode 100644 index 0000000000..791770d5d4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_linalg.h @@ -0,0 +1,24 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_LINALG_H_ +#define LIB_JXL_LINALG_H_ + +// Linear algebra. + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/image.h" + +namespace jxl { + +using ImageD = Plane; + +// A is symmetric, U is orthogonal, and A = U * Diagonal(diag) * Transpose(U). +void ConvertToDiagonal(const ImageD& A, ImageD* JXL_RESTRICT diag, + ImageD* JXL_RESTRICT U); + +} // namespace jxl + +#endif // LIB_JXL_LINALG_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_linalg_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_linalg_test.cc new file mode 100644 index 0000000000..967b9a3afb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_linalg_test.cc @@ -0,0 +1,118 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_linalg.h" + +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +ImageD Identity(const size_t N) { + ImageD out(N, N); + for (size_t i = 0; i < N; ++i) { + double* JXL_RESTRICT row = out.Row(i); + std::fill(row, row + N, 0); + row[i] = 1.0; + } + return out; +} + +ImageD Diagonal(const ImageD& d) { + JXL_ASSERT(d.ysize() == 1); + ImageD out(d.xsize(), d.xsize()); + const double* JXL_RESTRICT row_diag = d.Row(0); + for (size_t k = 0; k < d.xsize(); ++k) { + double* JXL_RESTRICT row_out = out.Row(k); + std::fill(row_out, row_out + d.xsize(), 0.0); + row_out[k] = row_diag[k]; + } + return out; +} + +ImageD MatMul(const ImageD& A, const ImageD& B) { + JXL_ASSERT(A.ysize() == B.xsize()); + ImageD out(A.xsize(), B.ysize()); + for (size_t y = 0; y < B.ysize(); ++y) { + const double* const JXL_RESTRICT row_b = B.Row(y); + double* const JXL_RESTRICT row_out = out.Row(y); + for (size_t x = 0; x < A.xsize(); ++x) { + row_out[x] = 0.0; + for (size_t k = 0; k < B.xsize(); ++k) { + row_out[x] += A.Row(k)[x] * row_b[k]; + } + } + } + return out; +} + +ImageD Transpose(const ImageD& A) { + ImageD out(A.ysize(), A.xsize()); + for (size_t x = 0; x < A.xsize(); ++x) { + double* const JXL_RESTRICT row_out = out.Row(x); + for (size_t y = 0; y < A.ysize(); ++y) { + row_out[y] = A.Row(y)[x]; + } + } + return out; +} + +ImageD RandomSymmetricMatrix(const size_t N, Rng& rng, const double vmin, + const double vmax) { + ImageD A(N, N); + GenerateImage(rng, &A, vmin, vmax); + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < i; ++j) { + A.Row(j)[i] = A.Row(i)[j]; + } + } + return A; +} + +void VerifyMatrixEqual(const ImageD& A, const ImageD& B, const double eps) { + ASSERT_EQ(A.xsize(), B.xsize()); + ASSERT_EQ(A.ysize(), B.ysize()); + for (size_t y = 0; y < A.ysize(); ++y) { + for (size_t x = 0; x < A.xsize(); ++x) { + ASSERT_NEAR(A.Row(y)[x], B.Row(y)[x], eps); + } + } +} + +void VerifyOrthogonal(const ImageD& A, const double eps) { + VerifyMatrixEqual(Identity(A.xsize()), MatMul(Transpose(A), A), eps); +} + +TEST(LinAlgTest, ConvertToDiagonal) { + { + ImageD I = Identity(2); + ImageD U(2, 2), d(2, 1); + ConvertToDiagonal(I, &d, &U); + VerifyMatrixEqual(I, U, 1e-15); + for (size_t k = 0; k < 2; ++k) { + ASSERT_NEAR(d.Row(0)[k], 1.0, 1e-15); + } + } + { + ImageD A = Identity(2); + A.Row(0)[1] = A.Row(1)[0] = 2.0; + ImageD U(2, 2), d(2, 1); + ConvertToDiagonal(A, &d, &U); + VerifyOrthogonal(U, 1e-12); + VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12); + } + Rng rng(0); + for (size_t i = 0; i < 100; ++i) { + ImageD A = RandomSymmetricMatrix(2, rng, -1.0, 1.0); + ImageD U(2, 2), d(2, 1); + ConvertToDiagonal(A, &d, &U); + VerifyOrthogonal(U, 1e-12); + VerifyMatrixEqual(A, MatMul(U, MatMul(Diagonal(d), Transpose(U))), 1e-12); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_modular.cc b/third-party/libjxl/libjxl/lib/jxl/enc_modular.cc new file mode 100644 index 0000000000..3c323404b7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_modular.cc @@ -0,0 +1,1747 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_modular.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/compressed_dc.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_cluster.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_gaborish.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_patch_dictionary.h" +#include "lib/jxl/enc_quant_weights.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/encoding/enc_debug_tree.h" +#include "lib/jxl/modular/encoding/enc_encoding.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/encoding/ma_common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/modular/transform/enc_transform.h" +#include "lib/jxl/toc.h" + +namespace jxl { + +namespace { +// constexpr bool kPrintTree = false; + +// Squeeze default quantization factors +// these quantization factors are for -Q 50 (other qualities simply scale the +// factors; things are rounded down and obviously cannot get below 1) +static const float squeeze_quality_factor = + 0.35; // for easy tweaking of the quality range (decrease this number for + // higher quality) +static const float squeeze_luma_factor = + 1.1; // for easy tweaking of the balance between luma (or anything + // non-chroma) and chroma (decrease this number for higher quality + // luma) +static const float squeeze_quality_factor_xyb = 2.4f; +static const float squeeze_xyb_qtable[3][16] = { + {163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, 0.64, 0.32, 0.16, + 0.08, 0.04, 0.02, 0.01, 0.005}, // Y + {1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, + 0.5}, // X + {2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, + 0.5}, // B-Y +}; + +static const float squeeze_luma_qtable[16] = { + 163.84, 81.92, 40.96, 20.48, 10.24, 5.12, 2.56, 1.28, + 0.64, 0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005}; +// for 8-bit input, the range of YCoCg chroma is -255..255 so basically this +// does 4:2:0 subsampling (two most fine grained layers get quantized away) +static const float squeeze_chroma_qtable[16] = { + 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1, 0.5, 0.5, 0.5, 0.5, 0.5}; + +// `cutoffs` must be sorted. +Tree MakeFixedTree(int property, const std::vector& cutoffs, + Predictor pred, size_t num_pixels) { + size_t log_px = CeilLog2Nonzero(num_pixels); + size_t min_gap = 0; + // Reduce fixed tree height when encoding small images. + if (log_px < 14) { + min_gap = 8 * (14 - log_px); + } + Tree tree; + struct NodeInfo { + size_t begin, end, pos; + }; + std::queue q; + // Leaf IDs will be set by roundtrip decoding the tree. + tree.push_back(PropertyDecisionNode::Leaf(pred)); + q.push(NodeInfo{0, cutoffs.size(), 0}); + while (!q.empty()) { + NodeInfo info = q.front(); + q.pop(); + if (info.begin + min_gap >= info.end) continue; + uint32_t split = (info.begin + info.end) / 2; + tree[info.pos] = + PropertyDecisionNode::Split(property, cutoffs[split], tree.size()); + q.push(NodeInfo{split + 1, info.end, tree.size()}); + tree.push_back(PropertyDecisionNode::Leaf(pred)); + q.push(NodeInfo{info.begin, split, tree.size()}); + tree.push_back(PropertyDecisionNode::Leaf(pred)); + } + return tree; +} + +Tree PredefinedTree(ModularOptions::TreeKind tree_kind, size_t total_pixels) { + if (tree_kind == ModularOptions::TreeKind::kJpegTranscodeACMeta || + tree_kind == ModularOptions::TreeKind::kTrivialTreeNoPredictor) { + // All the data is 0, so no need for a fancy tree. + return {PropertyDecisionNode::Leaf(Predictor::Zero)}; + } + if (tree_kind == ModularOptions::TreeKind::kFalconACMeta) { + // All the data is 0 except the quant field. TODO(veluca): make that 0 too. + return {PropertyDecisionNode::Leaf(Predictor::Left)}; + } + if (tree_kind == ModularOptions::TreeKind::kACMeta) { + // Small image. + if (total_pixels < 1024) { + return {PropertyDecisionNode::Leaf(Predictor::Left)}; + } + Tree tree; + // 0: c > 1 + tree.push_back(PropertyDecisionNode::Split(0, 1, 1)); + // 1: c > 2 + tree.push_back(PropertyDecisionNode::Split(0, 2, 3)); + // 2: c > 0 + tree.push_back(PropertyDecisionNode::Split(0, 0, 5)); + // 3: EPF control field (all 0 or 4), top > 0 + tree.push_back(PropertyDecisionNode::Split(6, 0, 21)); + // 4: ACS+QF, y > 0 + tree.push_back(PropertyDecisionNode::Split(2, 0, 7)); + // 5: CfL x + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient)); + // 6: CfL b + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Gradient)); + // 7: QF: split according to the left quant value. + tree.push_back(PropertyDecisionNode::Split(7, 5, 9)); + // 8: ACS: split in 4 segments (8x8 from 0 to 3, large square 4-5, large + // rectangular 6-11, 8x8 12+), according to previous ACS value. + tree.push_back(PropertyDecisionNode::Split(7, 5, 15)); + // QF + tree.push_back(PropertyDecisionNode::Split(7, 11, 11)); + tree.push_back(PropertyDecisionNode::Split(7, 3, 13)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Left)); + // ACS + tree.push_back(PropertyDecisionNode::Split(7, 11, 17)); + tree.push_back(PropertyDecisionNode::Split(7, 3, 19)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + // EPF, left > 0 + tree.push_back(PropertyDecisionNode::Split(7, 0, 23)); + tree.push_back(PropertyDecisionNode::Split(7, 0, 25)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + tree.push_back(PropertyDecisionNode::Leaf(Predictor::Zero)); + return tree; + } + if (tree_kind == ModularOptions::TreeKind::kWPFixedDC) { + std::vector cutoffs = { + -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15, + -11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11, + 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500}; + return MakeFixedTree(kWPProp, cutoffs, Predictor::Weighted, total_pixels); + } + if (tree_kind == ModularOptions::TreeKind::kGradientFixedDC) { + std::vector cutoffs = { + -500, -392, -255, -191, -127, -95, -63, -47, -31, -23, -15, + -11, -7, -4, -3, -1, 0, 1, 3, 5, 7, 11, + 15, 23, 31, 47, 63, 95, 127, 191, 255, 392, 500}; + return MakeFixedTree(kGradientProp, cutoffs, Predictor::Gradient, + total_pixels); + } + JXL_UNREACHABLE("Unreachable"); + return {}; +} + +// Merges the trees in `trees` using nodes that decide on stream_id, as defined +// by `tree_splits`. +void MergeTrees(const std::vector& trees, + const std::vector& tree_splits, size_t begin, + size_t end, Tree* tree) { + JXL_ASSERT(trees.size() + 1 == tree_splits.size()); + JXL_ASSERT(end > begin); + JXL_ASSERT(end <= trees.size()); + if (end == begin + 1) { + // Insert the tree, adding the opportune offset to all child nodes. + // This will make the leaf IDs wrong, but subsequent roundtripping will fix + // them. + size_t sz = tree->size(); + tree->insert(tree->end(), trees[begin].begin(), trees[begin].end()); + for (size_t i = sz; i < tree->size(); i++) { + (*tree)[i].lchild += sz; + (*tree)[i].rchild += sz; + } + return; + } + size_t mid = (begin + end) / 2; + size_t splitval = tree_splits[mid] - 1; + size_t cur = tree->size(); + tree->emplace_back(1 /*stream_id*/, splitval, 0, 0, Predictor::Zero, 0, 1); + (*tree)[cur].lchild = tree->size(); + MergeTrees(trees, tree_splits, mid, end, tree); + (*tree)[cur].rchild = tree->size(); + MergeTrees(trees, tree_splits, begin, mid, tree); +} + +void QuantizeChannel(Channel& ch, const int q) { + if (q == 1) return; + for (size_t y = 0; y < ch.plane.ysize(); y++) { + pixel_type* row = ch.plane.Row(y); + for (size_t x = 0; x < ch.plane.xsize(); x++) { + if (row[x] < 0) { + row[x] = -((-row[x] + q / 2) / q) * q; + } else { + row[x] = ((row[x] + q / 2) / q) * q; + } + } + } +} + +// convert binary32 float that corresponds to custom [bits]-bit float (with +// [exp_bits] exponent bits) to a [bits]-bit integer representation that should +// fit in pixel_type +Status float_to_int(const float* const row_in, pixel_type* const row_out, + size_t xsize, unsigned int bits, unsigned int exp_bits, + bool fp, double dfactor) { + JXL_ASSERT(sizeof(pixel_type) * 8 >= bits); + if (!fp) { + if (bits > 22) { + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row_in[x] * dfactor + (row_in[x] < 0 ? -0.5 : 0.5); + } + } else { + float factor = dfactor; + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row_in[x] * factor + (row_in[x] < 0 ? -0.5f : 0.5f); + } + } + return true; + } + if (bits == 32 && fp) { + JXL_ASSERT(exp_bits == 8); + memcpy((void*)row_out, (const void*)row_in, 4 * xsize); + return true; + } + + int exp_bias = (1 << (exp_bits - 1)) - 1; + int max_exp = (1 << exp_bits) - 1; + uint32_t sign = (1u << (bits - 1)); + int mant_bits = bits - exp_bits - 1; + int mant_shift = 23 - mant_bits; + for (size_t x = 0; x < xsize; ++x) { + uint32_t f; + memcpy(&f, &row_in[x], 4); + int signbit = (f >> 31); + f &= 0x7fffffff; + if (f == 0) { + row_out[x] = (signbit ? sign : 0); + continue; + } + int exp = (f >> 23) - 127; + if (exp == 128) return JXL_FAILURE("Inf/NaN not allowed"); + int mantissa = (f & 0x007fffff); + // broke up the binary32 into its parts, now reassemble into + // arbitrary float + exp += exp_bias; + if (exp < 0) { // will become a subnormal number + // add implicit leading 1 to mantissa + mantissa |= 0x00800000; + if (exp < -mant_bits) { + return JXL_FAILURE( + "Invalid float number: %g cannot be represented with %i " + "exp_bits and %i mant_bits (exp %i)", + row_in[x], exp_bits, mant_bits, exp); + } + mantissa >>= 1 - exp; + exp = 0; + } + // exp should be representable in exp_bits, otherwise input was + // invalid + if (exp > max_exp) return JXL_FAILURE("Invalid float exponent"); + if (mantissa & ((1 << mant_shift) - 1)) { + return JXL_FAILURE("%g is losing precision (mant: %x)", row_in[x], + mantissa); + } + mantissa >>= mant_shift; + f = (signbit ? sign : 0); + f |= (exp << mant_bits); + f |= mantissa; + row_out[x] = (pixel_type)f; + } + return true; +} +} // namespace + +ModularFrameEncoder::ModularFrameEncoder(const FrameHeader& frame_header, + const CompressParams& cparams_orig) + : frame_dim_(frame_header.ToFrameDimensions()), cparams_(cparams_orig) { + size_t num_streams = + ModularStreamId::Num(frame_dim_, frame_header.passes.num_passes); + if (cparams_.ModularPartIsLossless()) { + switch (cparams_.decoding_speed_tier) { + case 0: + break; + case 1: + cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kWPOnly; + break; + case 2: { + cparams_.options.wp_tree_mode = ModularOptions::TreeMode::kGradientOnly; + cparams_.options.predictor = Predictor::Gradient; + break; + } + case 3: { // LZ77, no Gradient. + cparams_.options.nb_repeats = 0; + cparams_.options.predictor = Predictor::Gradient; + break; + } + default: { // LZ77, no predictor. + cparams_.options.nb_repeats = 0; + cparams_.options.predictor = Predictor::Zero; + break; + } + } + } + if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive && + cparams_.ModularPartIsLossless()) { + cparams_.options.tree_kind = + ModularOptions::TreeKind::kTrivialTreeNoPredictor; + cparams_.options.nb_repeats = 0; + } + stream_images_.resize(num_streams); + + // use a sensible default if nothing explicit is specified: + // Squeeze for lossy, no squeeze for lossless + if (cparams_.responsive < 0) { + if (cparams_.ModularPartIsLossless()) { + cparams_.responsive = 0; + } else { + cparams_.responsive = 1; + } + } + + cparams_.options.splitting_heuristics_node_threshold = + 82 + 14 * static_cast(cparams_.speed_tier); + + { + // Set properties. + std::vector prop_order; + if (cparams_.responsive) { + // Properties in order of their likelihood of being useful for Squeeze + // residuals. + prop_order = {0, 1, 4, 5, 6, 7, 8, 15, 9, 10, 11, 12, 13, 14, 2, 3}; + } else { + // Same, but for the non-Squeeze case. + prop_order = {0, 1, 15, 9, 10, 11, 12, 13, 14, 2, 3, 4, 5, 6, 7, 8}; + // if few groups, don't use group as a property + if (num_streams < 30 && cparams_.speed_tier > SpeedTier::kTortoise) { + prop_order.erase(prop_order.begin() + 1); + } + } + switch (cparams_.speed_tier) { + case SpeedTier::kHare: + cparams_.options.splitting_heuristics_properties.assign( + prop_order.begin(), prop_order.begin() + 4); + cparams_.options.max_property_values = 24; + break; + case SpeedTier::kWombat: + cparams_.options.splitting_heuristics_properties.assign( + prop_order.begin(), prop_order.begin() + 5); + cparams_.options.max_property_values = 32; + break; + case SpeedTier::kSquirrel: + cparams_.options.splitting_heuristics_properties.assign( + prop_order.begin(), prop_order.begin() + 7); + cparams_.options.max_property_values = 48; + break; + case SpeedTier::kKitten: + cparams_.options.splitting_heuristics_properties.assign( + prop_order.begin(), prop_order.begin() + 10); + cparams_.options.max_property_values = 96; + break; + case SpeedTier::kTortoise: + cparams_.options.splitting_heuristics_properties = prop_order; + cparams_.options.max_property_values = 256; + break; + default: + cparams_.options.splitting_heuristics_properties.assign( + prop_order.begin(), prop_order.begin() + 3); + cparams_.options.max_property_values = 16; + break; + } + if (cparams_.speed_tier > SpeedTier::kTortoise) { + // Gradient in previous channels. + for (int i = 0; i < cparams_.options.max_properties; i++) { + cparams_.options.splitting_heuristics_properties.push_back( + kNumNonrefProperties + i * 4 + 3); + } + } else { + // All the extra properties in Tortoise mode. + for (int i = 0; i < cparams_.options.max_properties * 4; i++) { + cparams_.options.splitting_heuristics_properties.push_back( + kNumNonrefProperties + i); + } + } + } + + if (cparams_.options.predictor == static_cast(-1)) { + // no explicit predictor(s) given, set a good default + if ((cparams_.speed_tier <= SpeedTier::kTortoise || + cparams_.modular_mode == false) && + cparams_.IsLossless() && cparams_.responsive == false) { + // TODO(veluca): allow all predictors that don't break residual + // multipliers in lossy mode. + cparams_.options.predictor = Predictor::Variable; + } else if (cparams_.responsive || cparams_.lossy_palette) { + // zero predictor for Squeeze residues and lossy palette + cparams_.options.predictor = Predictor::Zero; + } else if (!cparams_.IsLossless()) { + // If not responsive and lossy. TODO(veluca): use near_lossless instead? + cparams_.options.predictor = Predictor::Gradient; + } else if (cparams_.speed_tier < SpeedTier::kFalcon) { + // try median and weighted predictor for anything else + cparams_.options.predictor = Predictor::Best; + } else if (cparams_.speed_tier == SpeedTier::kFalcon) { + // just weighted predictor in falcon mode + cparams_.options.predictor = Predictor::Weighted; + } else if (cparams_.speed_tier > SpeedTier::kFalcon) { + // just gradient predictor in thunder mode + cparams_.options.predictor = Predictor::Gradient; + } + } else { + delta_pred_ = cparams_.options.predictor; + if (cparams_.lossy_palette) cparams_.options.predictor = Predictor::Zero; + } + if (!cparams_.ModularPartIsLossless()) { + if (cparams_.options.predictor == Predictor::Weighted || + cparams_.options.predictor == Predictor::Variable || + cparams_.options.predictor == Predictor::Best) + cparams_.options.predictor = Predictor::Zero; + } + tree_splits_.push_back(0); + if (cparams_.modular_mode == false) { + cparams_.options.fast_decode_multiplier = 1.0f; + tree_splits_.push_back(ModularStreamId::VarDCTDC(0).ID(frame_dim_)); + tree_splits_.push_back(ModularStreamId::ModularDC(0).ID(frame_dim_)); + tree_splits_.push_back(ModularStreamId::ACMetadata(0).ID(frame_dim_)); + tree_splits_.push_back(ModularStreamId::QuantTable(0).ID(frame_dim_)); + tree_splits_.push_back(ModularStreamId::ModularAC(0, 0).ID(frame_dim_)); + ac_metadata_size.resize(frame_dim_.num_dc_groups); + extra_dc_precision.resize(frame_dim_.num_dc_groups); + } + tree_splits_.push_back(num_streams); + cparams_.options.max_chan_size = frame_dim_.group_dim; + cparams_.options.group_dim = frame_dim_.group_dim; + + // TODO(veluca): figure out how to use different predictor sets per channel. + stream_options_.resize(num_streams, cparams_.options); +} + +bool do_transform(Image& image, const Transform& tr, + const weighted::Header& wp_header, + jxl::ThreadPool* pool = nullptr, bool force_jxlart = false) { + Transform t = tr; + bool did_it = true; + if (force_jxlart) { + if (!t.MetaApply(image)) return false; + } else { + did_it = TransformForward(t, image, wp_header, pool); + } + if (did_it) image.transform.push_back(t); + return did_it; +} + +Status ModularFrameEncoder::ComputeEncodingData( + const FrameHeader& frame_header, const ImageMetadata& metadata, + Image3F* JXL_RESTRICT color, const std::vector& extra_channels, + PassesEncoderState* JXL_RESTRICT enc_state, const JxlCmsInterface& cms, + ThreadPool* pool, AuxOut* aux_out, bool do_color) { + JXL_DEBUG_V(6, "Computing modular encoding data for frame %s", + frame_header.DebugString().c_str()); + + if (do_color && frame_header.loop_filter.gab) { + float w = 0.9908511000000001f; + float weights[3] = {w, w, w}; + GaborishInverse(color, weights, pool); + } + + if (do_color && metadata.bit_depth.bits_per_sample <= 16 && + cparams_.speed_tier < SpeedTier::kCheetah && + cparams_.decoding_speed_tier < 2) { + FindBestPatchDictionary(*color, enc_state, cms, nullptr, aux_out, + cparams_.color_transform == ColorTransform::kXYB); + PatchDictionaryEncoder::SubtractFrom( + enc_state->shared.image_features.patches, color); + } + + // Convert ImageBundle to modular Image object + const size_t xsize = frame_dim_.xsize; + const size_t ysize = frame_dim_.ysize; + + int nb_chans = 3; + if (metadata.color_encoding.IsGray() && + cparams_.color_transform == ColorTransform::kNone) { + nb_chans = 1; + } + if (!do_color) nb_chans = 0; + + nb_chans += extra_channels.size(); + + bool fp = metadata.bit_depth.floating_point_sample && + cparams_.color_transform != ColorTransform::kXYB; + + // bits_per_sample is just metadata for XYB images. + if (metadata.bit_depth.bits_per_sample >= 32 && do_color && + cparams_.color_transform != ColorTransform::kXYB) { + if (metadata.bit_depth.bits_per_sample == 32 && fp == false) { + return JXL_FAILURE("uint32_t not supported in enc_modular"); + } else if (metadata.bit_depth.bits_per_sample > 32) { + return JXL_FAILURE("bits_per_sample > 32 not supported"); + } + } + + // in the non-float case, there is an implicit 0 sign bit + int max_bitdepth = + do_color ? metadata.bit_depth.bits_per_sample + (fp ? 0 : 1) : 0; + Image& gi = stream_images_[0]; + gi = Image(xsize, ysize, metadata.bit_depth.bits_per_sample, nb_chans); + int c = 0; + if (cparams_.color_transform == ColorTransform::kXYB && + cparams_.modular_mode == true) { + float enc_factors[3] = {32768.0f, 2048.0f, 2048.0f}; + if (cparams_.butteraugli_distance > 0 && !cparams_.responsive) { + // quantize XYB here and then treat it as a lossless image + enc_factors[0] *= 1.f / (1.f + 23.f * cparams_.butteraugli_distance); + enc_factors[1] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); + enc_factors[2] *= 1.f / (1.f + 14.f * cparams_.butteraugli_distance); + cparams_.butteraugli_distance = 0; + } + if (cparams_.manual_xyb_factors.size() == 3) { + DequantMatricesSetCustomDC(&enc_state->shared.matrices, + cparams_.manual_xyb_factors.data()); + // TODO(jon): update max_bitdepth in this case + } else { + DequantMatricesSetCustomDC(&enc_state->shared.matrices, enc_factors); + max_bitdepth = 12; + } + } + pixel_type maxval = gi.bitdepth < 32 ? (1u << gi.bitdepth) - 1 : 0; + if (do_color) { + for (; c < 3; c++) { + if (metadata.color_encoding.IsGray() && + cparams_.color_transform == ColorTransform::kNone && + c != (cparams_.color_transform == ColorTransform::kXYB ? 1 : 0)) + continue; + int c_out = c; + // XYB is encoded as YX(B-Y) + if (cparams_.color_transform == ColorTransform::kXYB && c < 2) + c_out = 1 - c_out; + double factor = maxval; + if (cparams_.color_transform == ColorTransform::kXYB) + factor = enc_state->shared.matrices.InvDCQuant(c); + if (c == 2 && cparams_.color_transform == ColorTransform::kXYB) { + JXL_ASSERT(!fp); + for (size_t y = 0; y < ysize; ++y) { + const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); + pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); + pixel_type* const JXL_RESTRICT row_Y = gi.channel[0].Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row_in[x] * factor + 0.5f; + row_out[x] -= row_Y[x]; + // zero the lsb of B + row_out[x] = row_out[x] / 2 * 2; + } + } + } else { + int bits = metadata.bit_depth.bits_per_sample; + int exp_bits = metadata.bit_depth.exponent_bits_per_sample; + gi.channel[c_out].hshift = + enc_state->shared.frame_header.chroma_subsampling.HShift(c); + gi.channel[c_out].vshift = + enc_state->shared.frame_header.chroma_subsampling.VShift(c); + size_t xsize_shifted = DivCeil(xsize, 1 << gi.channel[c_out].hshift); + size_t ysize_shifted = DivCeil(ysize, 1 << gi.channel[c_out].vshift); + gi.channel[c_out].shrink(xsize_shifted, ysize_shifted); + std::atomic has_error{false}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, ysize_shifted, ThreadPool::NoInit, + [&](const int task, const int thread) { + const size_t y = task; + const float* const JXL_RESTRICT row_in = color->PlaneRow(c, y); + pixel_type* const JXL_RESTRICT row_out = gi.channel[c_out].Row(y); + if (!float_to_int(row_in, row_out, xsize_shifted, bits, exp_bits, + fp, factor)) { + has_error = true; + }; + }, + "float2int")); + if (has_error) { + return JXL_FAILURE("Error in float to integer conversion"); + } + } + } + if (metadata.color_encoding.IsGray() && + cparams_.color_transform == ColorTransform::kNone) + c = 1; + } + + for (size_t ec = 0; ec < extra_channels.size(); ec++, c++) { + const ExtraChannelInfo& eci = metadata.extra_channel_info[ec]; + size_t ecups = frame_header.extra_channel_upsampling[ec]; + gi.channel[c].shrink(DivCeil(frame_dim_.xsize_upsampled, ecups), + DivCeil(frame_dim_.ysize_upsampled, ecups)); + gi.channel[c].hshift = gi.channel[c].vshift = + CeilLog2Nonzero(ecups) - CeilLog2Nonzero(frame_header.upsampling); + + int bits = eci.bit_depth.bits_per_sample; + int exp_bits = eci.bit_depth.exponent_bits_per_sample; + bool fp = eci.bit_depth.floating_point_sample; + double factor = (fp ? 1 : ((1u << eci.bit_depth.bits_per_sample) - 1)); + if (bits + (fp ? 0 : 1) > max_bitdepth) max_bitdepth = bits + (fp ? 0 : 1); + std::atomic has_error{false}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, gi.channel[c].plane.ysize(), ThreadPool::NoInit, + [&](const int task, const int thread) { + const size_t y = task; + const float* const JXL_RESTRICT row_in = extra_channels[ec].Row(y); + pixel_type* const JXL_RESTRICT row_out = gi.channel[c].Row(y); + if (!float_to_int(row_in, row_out, gi.channel[c].plane.xsize(), bits, + exp_bits, fp, factor)) { + has_error = true; + }; + }, + "float2int")); + if (has_error) return JXL_FAILURE("Error in float to integer conversion"); + } + JXL_ASSERT(c == nb_chans); + + int level_max_bitdepth = (cparams_.level == 5 ? 16 : 32); + if (max_bitdepth > level_max_bitdepth) + return JXL_FAILURE( + "Bitdepth too high for level %i (need %i bits, have only %i in this " + "level)", + cparams_.level, max_bitdepth, level_max_bitdepth); + + // Set options and apply transformations + if (!cparams_.ModularPartIsLossless()) { + if (cparams_.palette_colors != 0) { + JXL_DEBUG_V(3, "Lossy encode, not doing palette transforms"); + } + if (cparams_.color_transform == ColorTransform::kXYB) { + cparams_.channel_colors_pre_transform_percent = 0; + } + cparams_.channel_colors_percent = 0; + cparams_.palette_colors = 0; + cparams_.lossy_palette = false; + } + + // Global palette + if (cparams_.palette_colors != 0 || cparams_.lossy_palette) { + // all-channel palette (e.g. RGBA) + if (gi.channel.size() - gi.nb_meta_channels > 1) { + Transform maybe_palette(TransformId::kPalette); + maybe_palette.begin_c = gi.nb_meta_channels; + maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; + maybe_palette.nb_colors = + std::min((int)(xsize * ysize / 2), std::abs(cparams_.palette_colors)); + maybe_palette.ordered_palette = cparams_.palette_colors >= 0; + maybe_palette.lossy_palette = + (cparams_.lossy_palette && maybe_palette.num_c == 3); + if (maybe_palette.lossy_palette) { + maybe_palette.predictor = delta_pred_; + } + // TODO(veluca): use a custom weighted header if using the weighted + // predictor. + do_transform(gi, maybe_palette, weighted::Header(), pool, + cparams_.options.zero_tokens); + } + // all-minus-one-channel palette (RGB with separate alpha, or CMY with + // separate K) + if (gi.channel.size() - gi.nb_meta_channels > 3) { + Transform maybe_palette_3(TransformId::kPalette); + maybe_palette_3.begin_c = gi.nb_meta_channels; + maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1; + maybe_palette_3.nb_colors = + std::min((int)(xsize * ysize / 3), std::abs(cparams_.palette_colors)); + maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; + maybe_palette_3.lossy_palette = cparams_.lossy_palette; + if (maybe_palette_3.lossy_palette) { + maybe_palette_3.predictor = delta_pred_; + } + do_transform(gi, maybe_palette_3, weighted::Header(), pool, + cparams_.options.zero_tokens); + } + } + + // Global channel palette + if (cparams_.channel_colors_pre_transform_percent > 0 && + !cparams_.lossy_palette && + (cparams_.speed_tier <= SpeedTier::kThunder || + (do_color && metadata.bit_depth.bits_per_sample > 8))) { + // single channel palette (like FLIF's ChannelCompact) + size_t nb_channels = gi.channel.size() - gi.nb_meta_channels; + int orig_bitdepth = max_bitdepth; + max_bitdepth = 0; + for (size_t i = 0; i < nb_channels; i++) { + int32_t min, max; + compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); + int64_t colors = (int64_t)max - min + 1; + JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); + Transform maybe_palette_1(TransformId::kPalette); + maybe_palette_1.begin_c = i + gi.nb_meta_channels; + maybe_palette_1.num_c = 1; + // simple heuristic: if less than X percent of the values in the range + // actually occur, it is probably worth it to do a compaction + // (but only if the channel palette is less than 6% the size of the + // image itself) + maybe_palette_1.nb_colors = std::min( + (int)(xsize * ysize / 16), + (int)(cparams_.channel_colors_pre_transform_percent / 100. * colors)); + if (do_transform(gi, maybe_palette_1, weighted::Header(), pool)) { + // effective bit depth is lower, adjust quantization accordingly + compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); + if (max < maxval) maxval = max; + int ch_bitdepth = + (max > 0 ? CeilLog2Nonzero(static_cast(max)) : 0); + if (ch_bitdepth > max_bitdepth) max_bitdepth = ch_bitdepth; + } else + max_bitdepth = orig_bitdepth; + } + } + + // don't do an RCT if we're short on bits + if (cparams_.color_transform == ColorTransform::kNone && do_color && + gi.channel.size() - gi.nb_meta_channels >= 3 && + max_bitdepth + 1 < level_max_bitdepth) { + if (cparams_.colorspace < 0 && (!cparams_.ModularPartIsLossless() || + cparams_.speed_tier > SpeedTier::kHare)) { + Transform ycocg{TransformId::kRCT}; + ycocg.rct_type = 6; + ycocg.begin_c = gi.nb_meta_channels; + do_transform(gi, ycocg, weighted::Header(), pool); + max_bitdepth++; + } else if (cparams_.colorspace > 0) { + Transform sg(TransformId::kRCT); + sg.begin_c = gi.nb_meta_channels; + sg.rct_type = cparams_.colorspace; + do_transform(gi, sg, weighted::Header(), pool); + max_bitdepth++; + } + } + + // don't do squeeze if we don't have some spare bits + if (cparams_.responsive && !gi.channel.empty() && + max_bitdepth + 2 < level_max_bitdepth) { + Transform t(TransformId::kSqueeze); + t.squeezes = cparams_.squeezes; + do_transform(gi, t, weighted::Header(), pool); + max_bitdepth += 2; + } + + if (max_bitdepth + 1 > level_max_bitdepth) { + // force no group RCTs if we don't have a spare bit + cparams_.colorspace = 0; + } + JXL_ASSERT(max_bitdepth <= level_max_bitdepth); + + std::vector quants; + + if (!cparams_.ModularPartIsLossless()) { + quants.resize(gi.channel.size(), 1); + float quantizer = 0.25f; + if (!cparams_.responsive) { + JXL_DEBUG_V(1, + "Warning: lossy compression without Squeeze " + "transform is just color quantization."); + quantizer *= 0.1f; + } + float bitdepth_correction = 1.f; + if (cparams_.color_transform != ColorTransform::kXYB) { + bitdepth_correction = maxval / 255.f; + } + std::vector quantizers; + float dist = cparams_.butteraugli_distance; + for (size_t i = 0; i < 3; i++) { + quantizers.push_back(quantizer * dist * bitdepth_correction); + } + for (size_t i = 0; i < extra_channels.size(); i++) { + int ec_bitdepth = + metadata.extra_channel_info[i].bit_depth.bits_per_sample; + pixel_type ec_maxval = ec_bitdepth < 32 ? (1u << ec_bitdepth) - 1 : 0; + bitdepth_correction = ec_maxval / 255.f; + if (i < cparams_.ec_distance.size()) dist = cparams_.ec_distance[i]; + if (dist < 0) dist = cparams_.butteraugli_distance; + quantizers.push_back(quantizer * dist * bitdepth_correction); + } + if (cparams_.options.nb_repeats == 0) { + return JXL_FAILURE("nb_repeats = 0 not supported with modular lossy!"); + } + for (uint32_t i = gi.nb_meta_channels; i < gi.channel.size(); i++) { + Channel& ch = gi.channel[i]; + int shift = ch.hshift + ch.vshift; // number of pixel halvings + if (shift > 16) shift = 16; + if (shift > 0) shift--; + int q; + // assuming default Squeeze here + int component = + (do_color ? 0 : 3) + ((i - gi.nb_meta_channels) % nb_chans); + // last 4 channels are final chroma residuals + if (nb_chans > 2 && i >= gi.channel.size() - 4 && cparams_.responsive) { + component = 1; + } + if (cparams_.color_transform == ColorTransform::kXYB && component < 3) { + q = quantizers[component] * squeeze_quality_factor_xyb * + squeeze_xyb_qtable[component][shift]; + } else { + if (cparams_.colorspace != 0 && component > 0 && component < 3) { + q = quantizers[component] * squeeze_quality_factor * + squeeze_chroma_qtable[shift]; + } else { + q = quantizers[component] * squeeze_quality_factor * + squeeze_luma_factor * squeeze_luma_qtable[shift]; + } + } + if (q < 1) q = 1; + QuantizeChannel(gi.channel[i], q); + quants[i] = q; + } + } + + // Fill other groups. + struct GroupParams { + Rect rect; + int minShift; + int maxShift; + ModularStreamId id; + }; + std::vector stream_params; + + stream_options_[0] = cparams_.options; + + // DC + for (size_t group_id = 0; group_id < frame_dim_.num_dc_groups; group_id++) { + const size_t gx = group_id % frame_dim_.xsize_dc_groups; + const size_t gy = group_id / frame_dim_.xsize_dc_groups; + const Rect rect(gx * frame_dim_.dc_group_dim, gy * frame_dim_.dc_group_dim, + frame_dim_.dc_group_dim, frame_dim_.dc_group_dim); + // minShift==3 because (frame_dim.dc_group_dim >> 3) == frame_dim.group_dim + // maxShift==1000 is infinity + stream_params.push_back( + GroupParams{rect, 3, 1000, ModularStreamId::ModularDC(group_id)}); + } + // AC global -> nothing. + // AC + for (size_t group_id = 0; group_id < frame_dim_.num_groups; group_id++) { + const size_t gx = group_id % frame_dim_.xsize_groups; + const size_t gy = group_id / frame_dim_.xsize_groups; + const Rect mrect(gx * frame_dim_.group_dim, gy * frame_dim_.group_dim, + frame_dim_.group_dim, frame_dim_.group_dim); + for (size_t i = 0; i < enc_state->progressive_splitter.GetNumPasses(); + i++) { + int maxShift, minShift; + frame_header.passes.GetDownsamplingBracket(i, minShift, maxShift); + stream_params.push_back(GroupParams{ + mrect, minShift, maxShift, ModularStreamId::ModularAC(group_id, i)}); + } + } + // if there's only one group, everything ends up in GlobalModular + // in that case, also try RCTs/WP params for the one group + if (stream_params.size() == 2) { + stream_params.push_back(GroupParams{Rect(0, 0, xsize, ysize), 0, 1000, + ModularStreamId::Global()}); + } + gi_channel_.resize(stream_images_.size()); + + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, stream_params.size(), ThreadPool::NoInit, + [&](const uint32_t i, size_t /* thread */) { + stream_options_[stream_params[i].id.ID(frame_dim_)] = cparams_.options; + JXL_CHECK(PrepareStreamParams( + stream_params[i].rect, cparams_, stream_params[i].minShift, + stream_params[i].maxShift, stream_params[i].id, do_color)); + }, + "ChooseParams")); + { + // Clear out channels that have been copied to groups. + Image& full_image = stream_images_[0]; + size_t c = full_image.nb_meta_channels; + for (; c < full_image.channel.size(); c++) { + Channel& fc = full_image.channel[c]; + if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; + } + for (; c < full_image.channel.size(); c++) { + full_image.channel[c].plane = ImageI(); + } + } + + if (!quants.empty()) { + for (uint32_t stream_id = 0; stream_id < stream_images_.size(); + stream_id++) { + // skip non-modular stream_ids + if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; + const Image& image = stream_images_[stream_id]; + const ModularOptions& options = stream_options_[stream_id]; + for (uint32_t i = image.nb_meta_channels; i < image.channel.size(); i++) { + if (i >= image.nb_meta_channels && + (image.channel[i].w > options.max_chan_size || + image.channel[i].h > options.max_chan_size)) { + continue; + } + if (stream_id > 0 && gi_channel_[stream_id].empty()) continue; + size_t ch_id = stream_id == 0 + ? i + : gi_channel_[stream_id][i - image.nb_meta_channels]; + uint32_t q = quants[ch_id]; + // Inform the tree splitting heuristics that each channel in each group + // used this quantization factor. This will produce a tree with the + // given multipliers. + if (multiplier_info_.empty() || + multiplier_info_.back().range[1][0] != stream_id || + multiplier_info_.back().multiplier != q) { + StaticPropRange range; + range[0] = {{i, i + 1}}; + range[1] = {{stream_id, stream_id + 1}}; + multiplier_info_.push_back({range, (uint32_t)q}); + } else { + // Previous channel in the same group had the same quantization + // factor. Don't provide two different ranges, as that creates + // unnecessary nodes. + multiplier_info_.back().range[0][1] = i + 1; + } + } + } + // Merge group+channel settings that have the same channels and quantization + // factors, to avoid unnecessary nodes. + std::sort(multiplier_info_.begin(), multiplier_info_.end(), + [](ModularMultiplierInfo a, ModularMultiplierInfo b) { + return std::make_tuple(a.range, a.multiplier) < + std::make_tuple(b.range, b.multiplier); + }); + size_t new_num = 1; + for (size_t i = 1; i < multiplier_info_.size(); i++) { + ModularMultiplierInfo& prev = multiplier_info_[new_num - 1]; + ModularMultiplierInfo& cur = multiplier_info_[i]; + if (prev.range[0] == cur.range[0] && prev.multiplier == cur.multiplier && + prev.range[1][1] == cur.range[1][0]) { + prev.range[1][1] = cur.range[1][1]; + } else { + multiplier_info_[new_num++] = multiplier_info_[i]; + } + } + multiplier_info_.resize(new_num); + } + + JXL_RETURN_IF_ERROR(ValidateChannelDimensions(gi, stream_options_[0])); + + return PrepareEncoding(frame_header, pool, enc_state->heuristics.get(), + aux_out); +} + +Status ModularFrameEncoder::PrepareEncoding(const FrameHeader& frame_header, + ThreadPool* pool, + EncoderHeuristics* heuristics, + AuxOut* aux_out) { + if (!tree_.empty()) return true; + + // Compute tree. + size_t num_streams = stream_images_.size(); + stream_headers_.resize(num_streams); + tokens_.resize(num_streams); + + if (heuristics->CustomFixedTreeLossless(frame_dim_, &tree_)) { + // Using a fixed tree. + } else if (cparams_.speed_tier < SpeedTier::kFalcon || + !cparams_.modular_mode) { + // Avoid creating a tree with leaves that don't correspond to any pixels. + std::vector useful_splits; + useful_splits.reserve(tree_splits_.size()); + for (size_t chunk = 0; chunk < tree_splits_.size() - 1; chunk++) { + bool has_pixels = false; + size_t start = tree_splits_[chunk]; + size_t stop = tree_splits_[chunk + 1]; + for (size_t i = start; i < stop; i++) { + if (!stream_images_[i].empty()) has_pixels = true; + } + if (has_pixels) { + useful_splits.push_back(tree_splits_[chunk]); + } + } + // Don't do anything if modular mode does not have any pixels in this image + if (useful_splits.empty()) return true; + useful_splits.push_back(tree_splits_.back()); + + std::atomic_flag invalid_force_wp = ATOMIC_FLAG_INIT; + + std::vector trees(useful_splits.size() - 1); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, useful_splits.size() - 1, ThreadPool::NoInit, + [&](const uint32_t chunk, size_t /* thread */) { + // TODO(veluca): parallelize more. + size_t total_pixels = 0; + uint32_t start = useful_splits[chunk]; + uint32_t stop = useful_splits[chunk + 1]; + while (start < stop && stream_images_[start].empty()) ++start; + while (start < stop && stream_images_[stop - 1].empty()) --stop; + uint32_t max_c = 0; + if (stream_options_[start].tree_kind != + ModularOptions::TreeKind::kLearn) { + for (size_t i = start; i < stop; i++) { + for (const Channel& ch : stream_images_[i].channel) { + total_pixels += ch.w * ch.h; + } + } + trees[chunk] = + PredefinedTree(stream_options_[start].tree_kind, total_pixels); + return; + } + TreeSamples tree_samples; + if (!tree_samples.SetPredictor(stream_options_[start].predictor, + stream_options_[start].wp_tree_mode)) { + invalid_force_wp.test_and_set(std::memory_order_acq_rel); + return; + } + if (!tree_samples.SetProperties( + stream_options_[start].splitting_heuristics_properties, + stream_options_[start].wp_tree_mode)) { + invalid_force_wp.test_and_set(std::memory_order_acq_rel); + return; + } + std::vector pixel_samples; + std::vector diff_samples; + std::vector group_pixel_count; + std::vector channel_pixel_count; + for (size_t i = start; i < stop; i++) { + max_c = std::max(stream_images_[i].channel.size(), max_c); + CollectPixelSamples(stream_images_[i], stream_options_[i], i, + group_pixel_count, channel_pixel_count, + pixel_samples, diff_samples); + } + StaticPropRange range; + range[0] = {{0, max_c}}; + range[1] = {{start, stop}}; + auto local_multiplier_info = multiplier_info_; + + tree_samples.PreQuantizeProperties( + range, local_multiplier_info, group_pixel_count, + channel_pixel_count, pixel_samples, diff_samples, + stream_options_[start].max_property_values); + for (size_t i = start; i < stop; i++) { + JXL_CHECK(ModularGenericCompress( + stream_images_[i], stream_options_[i], /*writer=*/nullptr, + /*aux_out=*/nullptr, 0, i, &tree_samples, &total_pixels)); + } + + // TODO(veluca): parallelize more. + trees[chunk] = + LearnTree(std::move(tree_samples), total_pixels, + stream_options_[start], local_multiplier_info, range); + }, + "LearnTrees")); + if (invalid_force_wp.test_and_set(std::memory_order_acq_rel)) { + return JXL_FAILURE("PrepareEncoding: force_no_wp with {Weighted}"); + } + tree_.clear(); + MergeTrees(trees, useful_splits, 0, useful_splits.size() - 1, &tree_); + } else { + // Fixed tree. + size_t total_pixels = 0; + for (const Image& img : stream_images_) { + for (const Channel& ch : img.channel) { + total_pixels += ch.w * ch.h; + } + } + if (cparams_.speed_tier <= SpeedTier::kFalcon) { + tree_ = + PredefinedTree(ModularOptions::TreeKind::kWPFixedDC, total_pixels); + } else if (cparams_.speed_tier <= SpeedTier::kThunder) { + tree_ = PredefinedTree(ModularOptions::TreeKind::kGradientFixedDC, + total_pixels); + } else { + tree_ = {PropertyDecisionNode::Leaf(Predictor::Gradient)}; + } + } + tree_tokens_.resize(1); + tree_tokens_[0].clear(); + Tree decoded_tree; + TokenizeTree(tree_, &tree_tokens_[0], &decoded_tree); + JXL_ASSERT(tree_.size() == decoded_tree.size()); + tree_ = std::move(decoded_tree); + + /* TODO(szabadka) Add text output callback to cparams + if (kPrintTree && WantDebugOutput(aux_out)) { + if (frame_header.dc_level > 0) { + PrintTree(tree_, aux_out->debug_prefix + "/dc_frame_level" + + std::to_string(frame_header.dc_level) + "_tree"); + } else { + PrintTree(tree_, aux_out->debug_prefix + "/global_tree"); + } + } */ + + image_widths_.resize(num_streams); + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, num_streams, ThreadPool::NoInit, + [&](const uint32_t stream_id, size_t /* thread */) { + AuxOut my_aux_out; + tokens_[stream_id].clear(); + JXL_CHECK(ModularGenericCompress( + stream_images_[stream_id], stream_options_[stream_id], + /*writer=*/nullptr, &my_aux_out, 0, stream_id, + /*tree_samples=*/nullptr, + /*total_pixels=*/nullptr, + /*tree=*/&tree_, /*header=*/&stream_headers_[stream_id], + /*tokens=*/&tokens_[stream_id], + /*widths=*/&image_widths_[stream_id])); + }, + "ComputeTokens")); + return true; +} + +Status ModularFrameEncoder::EncodeGlobalInfo(BitWriter* writer, + AuxOut* aux_out) { + BitWriter::Allotment allotment(writer, 1); + // If we are using brotli, or not using modular mode. + if (tree_tokens_.empty() || tree_tokens_[0].empty()) { + writer->Write(1, 0); + allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out); + return true; + } + writer->Write(1, 1); + allotment.ReclaimAndCharge(writer, kLayerModularTree, aux_out); + + // Write tree + HistogramParams params; + if (cparams_.speed_tier > SpeedTier::kKitten) { + params.clustering = HistogramParams::ClusteringType::kFast; + params.ans_histogram_strategy = + cparams_.speed_tier > SpeedTier::kThunder + ? HistogramParams::ANSHistogramStrategy::kFast + : HistogramParams::ANSHistogramStrategy::kApproximate; + params.lz77_method = + cparams_.decoding_speed_tier >= 3 && cparams_.modular_mode + ? (cparams_.speed_tier >= SpeedTier::kFalcon + ? HistogramParams::LZ77Method::kRLE + : HistogramParams::LZ77Method::kLZ77) + : HistogramParams::LZ77Method::kNone; + // Near-lossless DC, as well as modular mode, require choosing hybrid uint + // more carefully. + if ((!extra_dc_precision.empty() && extra_dc_precision[0] != 0) || + (cparams_.modular_mode && cparams_.speed_tier < SpeedTier::kCheetah)) { + params.uint_method = HistogramParams::HybridUintMethod::kFast; + } else { + params.uint_method = HistogramParams::HybridUintMethod::kNone; + } + } else if (cparams_.speed_tier <= SpeedTier::kTortoise) { + params.lz77_method = HistogramParams::LZ77Method::kOptimal; + } else { + params.lz77_method = HistogramParams::LZ77Method::kLZ77; + } + if (cparams_.decoding_speed_tier >= 1) { + params.max_histograms = 12; + } + if (cparams_.decoding_speed_tier >= 1 && cparams_.responsive) { + params.lz77_method = cparams_.speed_tier >= SpeedTier::kCheetah + ? HistogramParams::LZ77Method::kRLE + : cparams_.speed_tier >= SpeedTier::kKitten + ? HistogramParams::LZ77Method::kLZ77 + : HistogramParams::LZ77Method::kOptimal; + } + if (cparams_.decoding_speed_tier >= 2 && cparams_.responsive) { + params.uint_method = HistogramParams::HybridUintMethod::k000; + params.force_huffman = true; + } + BuildAndEncodeHistograms(params, kNumTreeContexts, tree_tokens_, &code_, + &context_map_, writer, kLayerModularTree, aux_out); + WriteTokens(tree_tokens_[0], code_, context_map_, writer, kLayerModularTree, + aux_out); + params.image_widths = image_widths_; + // Write histograms. + BuildAndEncodeHistograms(params, (tree_.size() + 1) / 2, tokens_, &code_, + &context_map_, writer, kLayerModularGlobal, aux_out); + return true; +} + +Status ModularFrameEncoder::EncodeStream(BitWriter* writer, AuxOut* aux_out, + size_t layer, + const ModularStreamId& stream) { + size_t stream_id = stream.ID(frame_dim_); + if (stream_images_[stream_id].channel.empty()) { + return true; // Image with no channels, header never gets decoded. + } + JXL_RETURN_IF_ERROR( + Bundle::Write(stream_headers_[stream_id], writer, layer, aux_out)); + WriteTokens(tokens_[stream_id], code_, context_map_, writer, layer, aux_out); + return true; +} + +namespace { +float EstimateWPCost(const Image& img, size_t i) { + size_t extra_bits = 0; + float histo_cost = 0; + HybridUintConfig config; + int32_t cutoffs[] = {-500, -392, -255, -191, -127, -95, -63, -47, -31, + -23, -15, -11, -7, -4, -3, -1, 0, 1, + 3, 5, 7, 11, 15, 23, 31, 47, 63, + 95, 127, 191, 255, 392, 500}; + constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; + Histogram histo[nc] = {}; + weighted::Header wp_header; + PredictorMode(i, &wp_header); + for (const Channel& ch : img.channel) { + const intptr_t onerow = ch.plane.PixelsPerRow(); + weighted::State wp_state(wp_header, ch.w, ch.h); + Properties properties(1); + for (size_t y = 0; y < ch.h; y++) { + const pixel_type* JXL_RESTRICT r = ch.Row(y); + for (size_t x = 0; x < ch.w; x++) { + size_t offset = 0; + pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type_w top = (y ? *(r + x - onerow) : left); + pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); + pixel_type_w topright = + (x + 1 < ch.w && y ? *(r + x + 1 - onerow) : top); + pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top); + pixel_type guess = wp_state.Predict( + x, y, ch.w, top, left, topright, topleft, toptop, &properties, + offset); + size_t ctx = 0; + for (int c : cutoffs) { + ctx += c >= properties[0]; + } + pixel_type res = r[x] - guess; + uint32_t token, nbits, bits; + config.Encode(PackSigned(res), &token, &nbits, &bits); + histo[ctx].Add(token); + extra_bits += nbits; + wp_state.UpdateErrors(r[x], x, y, ch.w); + } + } + for (size_t h = 0; h < nc; h++) { + histo_cost += histo[h].ShannonEntropy(); + histo[h].Clear(); + } + } + return histo_cost + extra_bits; +} + +float EstimateCost(const Image& img) { + // TODO(veluca): consider SIMDfication of this code. + size_t extra_bits = 0; + float histo_cost = 0; + HybridUintConfig config; + uint32_t cutoffs[] = {0, 1, 3, 5, 7, 11, 15, 23, 31, + 47, 63, 95, 127, 191, 255, 392, 500}; + constexpr size_t nc = sizeof(cutoffs) / sizeof(*cutoffs) + 1; + Histogram histo[nc] = {}; + for (const Channel& ch : img.channel) { + const intptr_t onerow = ch.plane.PixelsPerRow(); + for (size_t y = 0; y < ch.h; y++) { + const pixel_type* JXL_RESTRICT r = ch.Row(y); + for (size_t x = 0; x < ch.w; x++) { + pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type_w top = (y ? *(r + x - onerow) : left); + pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); + size_t maxdiff = std::max(std::max(left, top), topleft) - + std::min(std::min(left, top), topleft); + size_t ctx = 0; + for (uint32_t c : cutoffs) { + ctx += c > maxdiff; + } + pixel_type res = r[x] - ClampedGradient(top, left, topleft); + uint32_t token, nbits, bits; + config.Encode(PackSigned(res), &token, &nbits, &bits); + histo[ctx].Add(token); + extra_bits += nbits; + } + } + for (size_t h = 0; h < nc; h++) { + histo_cost += histo[h].ShannonEntropy(); + histo[h].Clear(); + } + } + return histo_cost + extra_bits; +} + +} // namespace + +Status ModularFrameEncoder::PrepareStreamParams(const Rect& rect, + const CompressParams& cparams_, + int minShift, int maxShift, + const ModularStreamId& stream, + bool do_color) { + size_t stream_id = stream.ID(frame_dim_); + Image& full_image = stream_images_[0]; + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + Image& gi = stream_images_[stream_id]; + if (stream_id > 0) { + gi = Image(xsize, ysize, full_image.bitdepth, 0); + // start at the first bigger-than-frame_dim.group_dim non-metachannel + size_t c = full_image.nb_meta_channels; + for (; c < full_image.channel.size(); c++) { + Channel& fc = full_image.channel[c]; + if (fc.w > frame_dim_.group_dim || fc.h > frame_dim_.group_dim) break; + } + for (; c < full_image.channel.size(); c++) { + Channel& fc = full_image.channel[c]; + int shift = std::min(fc.hshift, fc.vshift); + if (shift > maxShift) continue; + if (shift < minShift) continue; + Rect r(rect.x0() >> fc.hshift, rect.y0() >> fc.vshift, + rect.xsize() >> fc.hshift, rect.ysize() >> fc.vshift, fc.w, fc.h); + if (r.xsize() == 0 || r.ysize() == 0) continue; + gi_channel_[stream_id].push_back(c); + Channel gc(r.xsize(), r.ysize()); + gc.hshift = fc.hshift; + gc.vshift = fc.vshift; + for (size_t y = 0; y < r.ysize(); ++y) { + memcpy(gc.Row(y), r.ConstRow(fc.plane, y), + r.xsize() * sizeof(pixel_type)); + } + gi.channel.emplace_back(std::move(gc)); + } + + if (gi.channel.empty()) return true; + // Do some per-group transforms + + // Local palette + // TODO(veluca): make this work with quantize-after-prediction in lossy + // mode. + if (cparams_.butteraugli_distance == 0.f && cparams_.palette_colors != 0 && + cparams_.speed_tier < SpeedTier::kCheetah) { + // all-channel palette (e.g. RGBA) + if (gi.channel.size() - gi.nb_meta_channels > 1) { + Transform maybe_palette(TransformId::kPalette); + maybe_palette.begin_c = gi.nb_meta_channels; + maybe_palette.num_c = gi.channel.size() - gi.nb_meta_channels; + maybe_palette.nb_colors = std::abs(cparams_.palette_colors); + maybe_palette.ordered_palette = cparams_.palette_colors >= 0; + do_transform(gi, maybe_palette, weighted::Header()); + } + // all-minus-one-channel palette (RGB with separate alpha, or CMY with + // separate K) + if (gi.channel.size() - gi.nb_meta_channels > 3) { + Transform maybe_palette_3(TransformId::kPalette); + maybe_palette_3.begin_c = gi.nb_meta_channels; + maybe_palette_3.num_c = gi.channel.size() - gi.nb_meta_channels - 1; + maybe_palette_3.nb_colors = std::abs(cparams_.palette_colors); + maybe_palette_3.ordered_palette = cparams_.palette_colors >= 0; + maybe_palette_3.lossy_palette = cparams_.lossy_palette; + if (maybe_palette_3.lossy_palette) { + maybe_palette_3.predictor = Predictor::Weighted; + } + do_transform(gi, maybe_palette_3, weighted::Header()); + } + } + + // Local channel palette + if (cparams_.channel_colors_percent > 0 && + cparams_.butteraugli_distance == 0.f && !cparams_.lossy_palette && + cparams_.speed_tier < SpeedTier::kCheetah && + !(cparams_.responsive && cparams_.decoding_speed_tier >= 1)) { + // single channel palette (like FLIF's ChannelCompact) + size_t nb_channels = gi.channel.size() - gi.nb_meta_channels; + for (size_t i = 0; i < nb_channels; i++) { + int32_t min, max; + compute_minmax(gi.channel[gi.nb_meta_channels + i], &min, &max); + int64_t colors = (int64_t)max - min + 1; + JXL_DEBUG_V(10, "Channel %" PRIuS ": range=%i..%i", i, min, max); + Transform maybe_palette_1(TransformId::kPalette); + maybe_palette_1.begin_c = i + gi.nb_meta_channels; + maybe_palette_1.num_c = 1; + // simple heuristic: if less than X percent of the values in the range + // actually occur, it is probably worth it to do a compaction + // (but only if the channel palette is less than 80% the size of the + // image itself) + maybe_palette_1.nb_colors = + std::min((int)(xsize * ysize * 0.8), + (int)(cparams_.channel_colors_percent / 100. * colors)); + do_transform(gi, maybe_palette_1, weighted::Header()); + } + } + } + + // lossless and no specific color transform specified: try Nothing, YCoCg, + // and 17 RCTs + if (cparams_.color_transform == ColorTransform::kNone && + cparams_.IsLossless() && cparams_.colorspace < 0 && + gi.channel.size() - gi.nb_meta_channels >= 3 && + cparams_.responsive == false && do_color && + cparams_.speed_tier <= SpeedTier::kHare) { + Transform sg(TransformId::kRCT); + sg.begin_c = gi.nb_meta_channels; + size_t nb_rcts_to_try = 0; + switch (cparams_.speed_tier) { + case SpeedTier::kLightning: + case SpeedTier::kThunder: + case SpeedTier::kFalcon: + case SpeedTier::kCheetah: + nb_rcts_to_try = 0; // Just do global YCoCg + break; + case SpeedTier::kHare: + nb_rcts_to_try = 4; + break; + case SpeedTier::kWombat: + nb_rcts_to_try = 5; + break; + case SpeedTier::kSquirrel: + nb_rcts_to_try = 7; + break; + case SpeedTier::kKitten: + nb_rcts_to_try = 9; + break; + case SpeedTier::kGlacier: + case SpeedTier::kTortoise: + nb_rcts_to_try = 19; + break; + } + float best_cost = std::numeric_limits::max(); + size_t best_rct = 0; + // These should be 19 actually different transforms; the remaining ones + // are equivalent to one of these (note that the first two are do-nothing + // and YCoCg) modulo channel reordering (which only matters in the case of + // MA-with-prev-channels-properties) and/or sign (e.g. RmG vs GmR) + for (int i : {0 * 7 + 0, 0 * 7 + 6, 0 * 7 + 5, 1 * 7 + 3, 3 * 7 + 5, + 5 * 7 + 5, 1 * 7 + 5, 2 * 7 + 5, 1 * 7 + 1, 0 * 7 + 4, + 1 * 7 + 2, 2 * 7 + 1, 2 * 7 + 2, 2 * 7 + 3, 4 * 7 + 4, + 4 * 7 + 5, 0 * 7 + 2, 0 * 7 + 1, 0 * 7 + 3}) { + if (nb_rcts_to_try == 0) break; + sg.rct_type = i; + nb_rcts_to_try--; + if (do_transform(gi, sg, weighted::Header())) { + float cost = EstimateCost(gi); + if (cost < best_cost) { + best_rct = i; + best_cost = cost; + } + Transform t = gi.transform.back(); + JXL_RETURN_IF_ERROR(t.Inverse(gi, weighted::Header(), nullptr)); + gi.transform.pop_back(); + } + } + // Apply the best RCT to the image for future encoding. + sg.rct_type = best_rct; + do_transform(gi, sg, weighted::Header()); + } else { + // No need to try anything, just use the default options. + } + size_t nb_wp_modes = 1; + if (cparams_.speed_tier <= SpeedTier::kTortoise) { + nb_wp_modes = 5; + } else if (cparams_.speed_tier <= SpeedTier::kKitten) { + nb_wp_modes = 2; + } + if (nb_wp_modes > 1 && + (stream_options_[stream_id].predictor == Predictor::Weighted || + stream_options_[stream_id].predictor == Predictor::Best || + stream_options_[stream_id].predictor == Predictor::Variable)) { + float best_cost = std::numeric_limits::max(); + stream_options_[stream_id].wp_mode = 0; + for (size_t i = 0; i < nb_wp_modes; i++) { + float cost = EstimateWPCost(gi, i); + if (cost < best_cost) { + best_cost = cost; + stream_options_[stream_id].wp_mode = i; + } + } + } + return true; +} + +constexpr float q_deadzone = 0.62f; +int QuantizeWP(const int32_t* qrow, size_t onerow, size_t c, size_t x, size_t y, + size_t w, weighted::State* wp_state, float value, + float inv_factor) { + float svalue = value * inv_factor; + PredictionResult pred = + PredictNoTreeWP(w, qrow + x, onerow, x, y, Predictor::Weighted, wp_state); + svalue -= pred.guess; + if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; + int residual = roundf(svalue); + if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; + return residual + pred.guess; +} + +int QuantizeGradient(const int32_t* qrow, size_t onerow, size_t c, size_t x, + size_t y, size_t w, float value, float inv_factor) { + float svalue = value * inv_factor; + PredictionResult pred = + PredictNoTreeNoWP(w, qrow + x, onerow, x, y, Predictor::Gradient); + svalue -= pred.guess; + if (svalue > -q_deadzone && svalue < q_deadzone) svalue = 0; + int residual = roundf(svalue); + if (residual > 2 || residual < -2) residual = roundf(svalue * 0.5) * 2; + return residual + pred.guess; +} + +void ModularFrameEncoder::AddVarDCTDC(const Image3F& dc, size_t group_index, + bool nl_dc, PassesEncoderState* enc_state, + bool jpeg_transcode) { + const Rect r = enc_state->shared.DCGroupRect(group_index); + extra_dc_precision[group_index] = nl_dc ? 1 : 0; + float mul = 1 << extra_dc_precision[group_index]; + + size_t stream_id = ModularStreamId::VarDCTDC(group_index).ID(frame_dim_); + stream_options_[stream_id].max_chan_size = 0xFFFFFF; + stream_options_[stream_id].predictor = Predictor::Weighted; + stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kWPOnly; + if (cparams_.speed_tier >= SpeedTier::kSquirrel) { + stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kWPFixedDC; + } + if (cparams_.speed_tier < SpeedTier::kSquirrel && !nl_dc) { + stream_options_[stream_id].predictor = + (cparams_.speed_tier < SpeedTier::kKitten ? Predictor::Variable + : Predictor::Best); + stream_options_[stream_id].wp_tree_mode = + ModularOptions::TreeMode::kDefault; + stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; + } + if (cparams_.decoding_speed_tier >= 1) { + stream_options_[stream_id].tree_kind = + ModularOptions::TreeKind::kGradientFixedDC; + } + + stream_images_[stream_id] = Image(r.xsize(), r.ysize(), 8, 3); + if (nl_dc && stream_options_[stream_id].tree_kind == + ModularOptions::TreeKind::kGradientFixedDC) { + JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444()); + for (size_t c : {1, 0, 2}) { + float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; + float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; + float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; + for (size_t y = 0; y < r.ysize(); y++) { + int32_t* quant_row = + stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); + size_t stride = stream_images_[stream_id] + .channel[c < 2 ? c ^ 1 : c] + .plane.PixelsPerRow(); + const float* row = r.ConstPlaneRow(dc, c, y); + if (c == 1) { + for (size_t x = 0; x < r.xsize(); x++) { + quant_row[x] = QuantizeGradient(quant_row, stride, c, x, y, + r.xsize(), row[x], inv_factor); + } + } else { + int32_t* quant_row_y = + stream_images_[stream_id].channel[0].plane.Row(y); + for (size_t x = 0; x < r.xsize(); x++) { + quant_row[x] = QuantizeGradient( + quant_row, stride, c, x, y, r.xsize(), + row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); + } + } + } + } + } else if (nl_dc) { + JXL_ASSERT(enc_state->shared.frame_header.chroma_subsampling.Is444()); + for (size_t c : {1, 0, 2}) { + float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; + float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; + float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; + weighted::Header header; + weighted::State wp_state(header, r.xsize(), r.ysize()); + for (size_t y = 0; y < r.ysize(); y++) { + int32_t* quant_row = + stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); + size_t stride = stream_images_[stream_id] + .channel[c < 2 ? c ^ 1 : c] + .plane.PixelsPerRow(); + const float* row = r.ConstPlaneRow(dc, c, y); + if (c == 1) { + for (size_t x = 0; x < r.xsize(); x++) { + quant_row[x] = QuantizeWP(quant_row, stride, c, x, y, r.xsize(), + &wp_state, row[x], inv_factor); + wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); + } + } else { + int32_t* quant_row_y = + stream_images_[stream_id].channel[0].plane.Row(y); + for (size_t x = 0; x < r.xsize(); x++) { + quant_row[x] = QuantizeWP( + quant_row, stride, c, x, y, r.xsize(), &wp_state, + row[x] - quant_row_y[x] * (y_factor * cfl_factor), inv_factor); + wp_state.UpdateErrors(quant_row[x], x, y, r.xsize()); + } + } + } + } + } else if (enc_state->shared.frame_header.chroma_subsampling.Is444()) { + for (size_t c : {1, 0, 2}) { + float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; + float y_factor = enc_state->shared.quantizer.GetDcStep(1) / mul; + float cfl_factor = enc_state->shared.cmap.DCFactors()[c]; + for (size_t y = 0; y < r.ysize(); y++) { + int32_t* quant_row = + stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c].plane.Row(y); + const float* row = r.ConstPlaneRow(dc, c, y); + if (c == 1) { + for (size_t x = 0; x < r.xsize(); x++) { + quant_row[x] = roundf(row[x] * inv_factor); + } + } else { + int32_t* quant_row_y = + stream_images_[stream_id].channel[0].plane.Row(y); + for (size_t x = 0; x < r.xsize(); x++) { + quant_row[x] = + roundf((row[x] - quant_row_y[x] * (y_factor * cfl_factor)) * + inv_factor); + } + } + } + } + } else { + for (size_t c : {1, 0, 2}) { + Rect rect( + r.x0() >> enc_state->shared.frame_header.chroma_subsampling.HShift(c), + r.y0() >> enc_state->shared.frame_header.chroma_subsampling.VShift(c), + r.xsize() >> + enc_state->shared.frame_header.chroma_subsampling.HShift(c), + r.ysize() >> + enc_state->shared.frame_header.chroma_subsampling.VShift(c)); + float inv_factor = enc_state->shared.quantizer.GetInvDcStep(c) * mul; + size_t ys = rect.ysize(); + size_t xs = rect.xsize(); + Channel& ch = stream_images_[stream_id].channel[c < 2 ? c ^ 1 : c]; + ch.w = xs; + ch.h = ys; + ch.shrink(); + for (size_t y = 0; y < ys; y++) { + int32_t* quant_row = ch.plane.Row(y); + const float* row = rect.ConstPlaneRow(dc, c, y); + for (size_t x = 0; x < xs; x++) { + quant_row[x] = roundf(row[x] * inv_factor); + } + } + } + } + + DequantDC(r, &enc_state->shared.dc_storage, &enc_state->shared.quant_dc, + stream_images_[stream_id], enc_state->shared.quantizer.MulDC(), + 1.0 / mul, enc_state->shared.cmap.DCFactors(), + enc_state->shared.frame_header.chroma_subsampling, + enc_state->shared.block_ctx_map); +} + +void ModularFrameEncoder::AddACMetadata(size_t group_index, bool jpeg_transcode, + PassesEncoderState* enc_state) { + const Rect r = enc_state->shared.DCGroupRect(group_index); + size_t stream_id = ModularStreamId::ACMetadata(group_index).ID(frame_dim_); + stream_options_[stream_id].max_chan_size = 0xFFFFFF; + stream_options_[stream_id].wp_tree_mode = ModularOptions::TreeMode::kNoWP; + if (jpeg_transcode) { + stream_options_[stream_id].tree_kind = + ModularOptions::TreeKind::kJpegTranscodeACMeta; + } else if (cparams_.speed_tier >= SpeedTier::kFalcon) { + stream_options_[stream_id].tree_kind = + ModularOptions::TreeKind::kFalconACMeta; + } else if (cparams_.speed_tier > SpeedTier::kKitten) { + stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kACMeta; + } + // If we are using a non-constant CfL field, and are in a slow enough mode, + // re-enable tree computation for it. + if (cparams_.speed_tier < SpeedTier::kSquirrel && + cparams_.force_cfl_jpeg_recompression) { + stream_options_[stream_id].tree_kind = ModularOptions::TreeKind::kLearn; + } + // YToX, YToB, ACS + QF, EPF + Image& image = stream_images_[stream_id]; + image = Image(r.xsize(), r.ysize(), 8, 4); + static_assert(kColorTileDimInBlocks == 8, "Color tile size changed"); + Rect cr(r.x0() >> 3, r.y0() >> 3, (r.xsize() + 7) >> 3, (r.ysize() + 7) >> 3); + image.channel[0] = Channel(cr.xsize(), cr.ysize(), 3, 3); + image.channel[1] = Channel(cr.xsize(), cr.ysize(), 3, 3); + image.channel[2] = Channel(r.xsize() * r.ysize(), 2, 0, 0); + ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytox_map, + Rect(image.channel[0].plane), &image.channel[0].plane); + ConvertPlaneAndClamp(cr, enc_state->shared.cmap.ytob_map, + Rect(image.channel[1].plane), &image.channel[1].plane); + size_t num = 0; + for (size_t y = 0; y < r.ysize(); y++) { + AcStrategyRow row_acs = enc_state->shared.ac_strategy.ConstRow(r, y); + const int32_t* row_qf = r.ConstRow(enc_state->shared.raw_quant_field, y); + const uint8_t* row_epf = r.ConstRow(enc_state->shared.epf_sharpness, y); + int32_t* out_acs = image.channel[2].plane.Row(0); + int32_t* out_qf = image.channel[2].plane.Row(1); + int32_t* row_out_epf = image.channel[3].plane.Row(y); + for (size_t x = 0; x < r.xsize(); x++) { + row_out_epf[x] = row_epf[x]; + if (!row_acs[x].IsFirstBlock()) continue; + out_acs[num] = row_acs[x].RawStrategy(); + out_qf[num] = row_qf[x] - 1; + num++; + } + } + image.channel[2].w = num; + ac_metadata_size[group_index] = num; +} + +void ModularFrameEncoder::EncodeQuantTable( + size_t size_x, size_t size_y, BitWriter* writer, + const QuantEncoding& encoding, size_t idx, + ModularFrameEncoder* modular_frame_encoder) { + JXL_ASSERT(encoding.qraw.qtable != nullptr); + JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size()); + JXL_CHECK(F16Coder::Write(encoding.qraw.qtable_den, writer)); + if (modular_frame_encoder) { + JXL_CHECK(modular_frame_encoder->EncodeStream( + writer, nullptr, 0, ModularStreamId::QuantTable(idx))); + return; + } + Image image(size_x, size_y, 8, 3); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < size_y; y++) { + int32_t* JXL_RESTRICT row = image.channel[c].Row(y); + for (size_t x = 0; x < size_x; x++) { + row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x]; + } + } + } + ModularOptions cfopts; + JXL_CHECK(ModularGenericCompress(image, cfopts, writer)); +} + +void ModularFrameEncoder::AddQuantTable(size_t size_x, size_t size_y, + const QuantEncoding& encoding, + size_t idx) { + size_t stream_id = ModularStreamId::QuantTable(idx).ID(frame_dim_); + JXL_ASSERT(encoding.qraw.qtable != nullptr); + JXL_ASSERT(size_x * size_y * 3 == encoding.qraw.qtable->size()); + Image& image = stream_images_[stream_id]; + image = Image(size_x, size_y, 8, 3); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < size_y; y++) { + int32_t* JXL_RESTRICT row = image.channel[c].Row(y); + for (size_t x = 0; x < size_x; x++) { + row[x] = (*encoding.qraw.qtable)[c * size_x * size_y + y * size_x + x]; + } + } + } +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_modular.h b/third-party/libjxl/libjxl/lib/jxl/enc_modular.h new file mode 100644 index 0000000000..2af66e951f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_modular.h @@ -0,0 +1,92 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_MODULAR_H_ +#define LIB_JXL_ENC_MODULAR_H_ + +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_modular.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +struct AuxOut; + +class ModularFrameEncoder { + public: + ModularFrameEncoder(const FrameHeader& frame_header, + const CompressParams& cparams_orig); + Status ComputeEncodingData(const FrameHeader& frame_header, + const ImageMetadata& metadata, + Image3F* JXL_RESTRICT color, + const std::vector& extra_channels, + PassesEncoderState* JXL_RESTRICT enc_state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out, bool do_color); + // Encodes global info (tree + histograms) in the `writer`. + Status EncodeGlobalInfo(BitWriter* writer, AuxOut* aux_out); + // Encodes a specific modular image (identified by `stream`) in the `writer`, + // assigning bits to the provided `layer`. + Status EncodeStream(BitWriter* writer, AuxOut* aux_out, size_t layer, + const ModularStreamId& stream); + // Creates a modular image for a given DC group of VarDCT mode. `dc` is the + // input DC image, not quantized; the group is specified by `group_index`, and + // `nl_dc` decides whether to apply a near-lossless processing to the DC or + // not. + void AddVarDCTDC(const Image3F& dc, size_t group_index, bool nl_dc, + PassesEncoderState* enc_state, bool jpeg_transcode); + // Creates a modular image for the AC metadata of the given group + // (`group_index`). + void AddACMetadata(size_t group_index, bool jpeg_transcode, + PassesEncoderState* enc_state); + // Encodes a RAW quantization table in `writer`. If `modular_frame_encoder` is + // null, the quantization table in `encoding` is used, with dimensions `size_x + // x size_y`. Otherwise, the table with ID `idx` is encoded from the given + // `modular_frame_encoder`. + static void EncodeQuantTable(size_t size_x, size_t size_y, BitWriter* writer, + const QuantEncoding& encoding, size_t idx, + ModularFrameEncoder* modular_frame_encoder); + // Stores a quantization table for future usage with `EncodeQuantTable`. + void AddQuantTable(size_t size_x, size_t size_y, + const QuantEncoding& encoding, size_t idx); + + std::vector ac_metadata_size; + std::vector extra_dc_precision; + + private: + Status PrepareEncoding(const FrameHeader& frame_header, ThreadPool* pool, + EncoderHeuristics* heuristics, + AuxOut* aux_out = nullptr); + Status PrepareStreamParams(const Rect& rect, const CompressParams& cparams, + int minShift, int maxShift, + const ModularStreamId& stream, bool do_color); + std::vector stream_images_; + std::vector stream_options_; + + Tree tree_; + std::vector> tree_tokens_; + std::vector stream_headers_; + std::vector> tokens_; + EntropyEncodingData code_; + std::vector context_map_; + FrameDimensions frame_dim_; + CompressParams cparams_; + std::vector tree_splits_; + std::vector multiplier_info_; + std::vector> gi_channel_; + std::vector image_widths_; + Predictor delta_pred_ = Predictor::Average4; +}; + +} // namespace jxl + +#endif // LIB_JXL_ENC_MODULAR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_noise.cc b/third-party/libjxl/libjxl/lib/jxl/enc_noise.cc new file mode 100644 index 0000000000..54bb4482e8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_noise.cc @@ -0,0 +1,374 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_noise.h" + +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_optimize.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/opsin_params.h" + +namespace jxl { +namespace { + +using OptimizeArray = optimize::Array; + +float GetScoreSumsOfAbsoluteDifferences(const Image3F& opsin, const int x, + const int y, const int block_size) { + const int small_bl_size_x = 3; + const int small_bl_size_y = 4; + const int kNumSAD = + (block_size - small_bl_size_x) * (block_size - small_bl_size_y); + // block_size x block_size reference pixels + int counter = 0; + const int offset = 2; + + std::vector sad(kNumSAD, 0); + for (int y_bl = 0; y_bl + small_bl_size_y < block_size; ++y_bl) { + for (int x_bl = 0; x_bl + small_bl_size_x < block_size; ++x_bl) { + float sad_sum = 0; + // size of the center patch, we compare all the patches inside window with + // the center one + for (int cy = 0; cy < small_bl_size_y; ++cy) { + for (int cx = 0; cx < small_bl_size_x; ++cx) { + float wnd = 0.5f * (opsin.PlaneRow(1, y + y_bl + cy)[x + x_bl + cx] + + opsin.PlaneRow(0, y + y_bl + cy)[x + x_bl + cx]); + float center = + 0.5f * (opsin.PlaneRow(1, y + offset + cy)[x + offset + cx] + + opsin.PlaneRow(0, y + offset + cy)[x + offset + cx]); + sad_sum += std::abs(center - wnd); + } + } + sad[counter++] = sad_sum; + } + } + const int kSamples = (kNumSAD) / 2; + // As with ROAD (rank order absolute distance), we keep the smallest half of + // the values in SAD (we use here the more robust patch SAD instead of + // absolute single-pixel differences). + std::sort(sad.begin(), sad.end()); + const float total_sad_sum = + std::accumulate(sad.begin(), sad.begin() + kSamples, 0.0f); + return total_sad_sum / kSamples; +} + +class NoiseHistogram { + public: + static constexpr int kBins = 256; + + NoiseHistogram() { std::fill(bins, bins + kBins, 0); } + + void Increment(const float x) { bins[Index(x)] += 1; } + int Get(const float x) const { return bins[Index(x)]; } + int Bin(const size_t bin) const { return bins[bin]; } + + int Mode() const { + size_t max_idx = 0; + for (size_t i = 0; i < kBins; i++) { + if (bins[i] > bins[max_idx]) max_idx = i; + } + return max_idx; + } + + double Quantile(double q01) const { + const int64_t total = std::accumulate(bins, bins + kBins, int64_t{1}); + const int64_t target = static_cast(q01 * total); + // Until sum >= target: + int64_t sum = 0; + size_t i = 0; + for (; i < kBins; ++i) { + sum += bins[i]; + // Exact match: assume middle of bin i + if (sum == target) { + return i + 0.5; + } + if (sum > target) break; + } + + // Next non-empty bin (in case histogram is sparsely filled) + size_t next = i + 1; + while (next < kBins && bins[next] == 0) { + ++next; + } + + // Linear interpolation according to how far into next we went + const double excess = target - sum; + const double weight_next = bins[Index(next)] / excess; + return ClampX(next * weight_next + i * (1.0 - weight_next)); + } + + // Inter-quartile range + double IQR() const { return Quantile(0.75) - Quantile(0.25); } + + private: + template + T ClampX(const T x) const { + return std::min(std::max(T(0), x), T(kBins - 1)); + } + size_t Index(const float x) const { return ClampX(static_cast(x)); } + + uint32_t bins[kBins]; +}; + +std::vector GetSADScoresForPatches(const Image3F& opsin, + const size_t block_s, + const size_t num_bin, + NoiseHistogram* sad_histogram) { + std::vector sad_scores( + (opsin.ysize() / block_s) * (opsin.xsize() / block_s), 0.0f); + + int block_index = 0; + + for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) { + for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) { + float sad_sc = GetScoreSumsOfAbsoluteDifferences(opsin, x, y, block_s); + sad_scores[block_index++] = sad_sc; + sad_histogram->Increment(sad_sc * num_bin); + } + } + return sad_scores; +} + +float GetSADThreshold(const NoiseHistogram& histogram, const int num_bin) { + // Here we assume that the most patches with similar SAD value is a "flat" + // patches. However, some images might contain regular texture part and + // generate second strong peak at the histogram + // TODO(user) handle bimodal and heavy-tailed case + const int mode = histogram.Mode(); + return static_cast(mode) / NoiseHistogram::kBins; +} + +// loss = sum asym * (F(x) - nl)^2 + kReg * num_points * sum (w[i] - w[i+1])^2 +// where asym = 1 if F(x) < nl, kAsym if F(x) > nl. +struct LossFunction { + explicit LossFunction(std::vector nl0) : nl(std::move(nl0)) {} + + double Compute(const OptimizeArray& w, OptimizeArray* df, + bool skip_regularization = false) const { + constexpr double kReg = 0.005; + constexpr double kAsym = 1.1; + double loss_function = 0; + for (size_t i = 0; i < w.size(); i++) { + (*df)[i] = 0; + } + for (auto ind : nl) { + std::pair pos = IndexAndFrac(ind.intensity); + JXL_DASSERT(pos.first >= 0 && static_cast(pos.first) < + NoiseParams::kNumNoisePoints - 1); + double low = w[pos.first]; + double hi = w[pos.first + 1]; + double val = low * (1.0f - pos.second) + hi * pos.second; + double dist = val - ind.noise_level; + if (dist > 0) { + loss_function += kAsym * dist * dist; + (*df)[pos.first] -= kAsym * (1.0f - pos.second) * dist; + (*df)[pos.first + 1] -= kAsym * pos.second * dist; + } else { + loss_function += dist * dist; + (*df)[pos.first] -= (1.0f - pos.second) * dist; + (*df)[pos.first + 1] -= pos.second * dist; + } + } + if (skip_regularization) return loss_function; + for (size_t i = 0; i + 1 < w.size(); i++) { + double diff = w[i] - w[i + 1]; + loss_function += kReg * nl.size() * diff * diff; + (*df)[i] -= kReg * diff * nl.size(); + (*df)[i + 1] += kReg * diff * nl.size(); + } + return loss_function; + } + + std::vector nl; +}; + +void OptimizeNoiseParameters(const std::vector& noise_level, + NoiseParams* noise_params) { + constexpr double kMaxError = 1e-3; + static const double kPrecision = 1e-8; + static const int kMaxIter = 40; + + float avg = 0; + for (const NoiseLevel& nl : noise_level) { + avg += nl.noise_level; + } + avg /= noise_level.size(); + + LossFunction loss_function(noise_level); + OptimizeArray parameter_vector; + for (size_t i = 0; i < parameter_vector.size(); i++) { + parameter_vector[i] = avg; + } + + parameter_vector = optimize::OptimizeWithScaledConjugateGradientMethod( + loss_function, parameter_vector, kPrecision, kMaxIter); + + OptimizeArray df = parameter_vector; + float loss = loss_function.Compute(parameter_vector, &df, + /*skip_regularization=*/true) / + noise_level.size(); + + // Approximation went too badly: escape with no noise at all. + if (loss > kMaxError) { + noise_params->Clear(); + return; + } + + for (size_t i = 0; i < parameter_vector.size(); i++) { + noise_params->lut[i] = std::max(parameter_vector[i], 0.0); + } +} + +std::vector GetNoiseLevel( + const Image3F& opsin, const std::vector& texture_strength, + const float threshold, const size_t block_s) { + std::vector noise_level_per_intensity; + + const int filt_size = 1; + static const float kLaplFilter[filt_size * 2 + 1][filt_size * 2 + 1] = { + {-0.25f, -1.0f, -0.25f}, + {-1.0f, 5.0f, -1.0f}, + {-0.25f, -1.0f, -0.25f}, + }; + + // The noise model is built based on channel 0.5 * (X+Y) as we notice that it + // is similar to the model 0.5 * (Y-X) + size_t patch_index = 0; + + for (size_t y = 0; y + block_s <= opsin.ysize(); y += block_s) { + for (size_t x = 0; x + block_s <= opsin.xsize(); x += block_s) { + if (texture_strength[patch_index] <= threshold) { + // Calculate mean value + float mean_int = 0; + for (size_t y_bl = 0; y_bl < block_s; ++y_bl) { + for (size_t x_bl = 0; x_bl < block_s; ++x_bl) { + mean_int += 0.5f * (opsin.PlaneRow(1, y + y_bl)[x + x_bl] + + opsin.PlaneRow(0, y + y_bl)[x + x_bl]); + } + } + mean_int /= block_s * block_s; + + // Calculate Noise level + float noise_level = 0; + size_t count = 0; + for (size_t y_bl = 0; y_bl < block_s; ++y_bl) { + for (size_t x_bl = 0; x_bl < block_s; ++x_bl) { + float filtered_value = 0; + for (int y_f = -1 * filt_size; y_f <= filt_size; ++y_f) { + if ((static_cast(y_bl) + y_f) >= 0 && + (y_bl + y_f) < block_s) { + for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) { + if ((static_cast(x_bl) + x_f) >= 0 && + (x_bl + x_f) < block_s) { + filtered_value += + 0.5f * + (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl + x_f] + + opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl + x_f]) * + kLaplFilter[y_f + filt_size][x_f + filt_size]; + } else { + filtered_value += + 0.5f * + (opsin.PlaneRow(1, y + y_bl + y_f)[x + x_bl - x_f] + + opsin.PlaneRow(0, y + y_bl + y_f)[x + x_bl - x_f]) * + kLaplFilter[y_f + filt_size][x_f + filt_size]; + } + } + } else { + for (int x_f = -1 * filt_size; x_f <= filt_size; ++x_f) { + if ((static_cast(x_bl) + x_f) >= 0 && + (x_bl + x_f) < block_s) { + filtered_value += + 0.5f * + (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl + x_f] + + opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl + x_f]) * + kLaplFilter[y_f + filt_size][x_f + filt_size]; + } else { + filtered_value += + 0.5f * + (opsin.PlaneRow(1, y + y_bl - y_f)[x + x_bl - x_f] + + opsin.PlaneRow(0, y + y_bl - y_f)[x + x_bl - x_f]) * + kLaplFilter[y_f + filt_size][x_f + filt_size]; + } + } + } + } + noise_level += std::abs(filtered_value); + ++count; + } + } + noise_level /= count; + NoiseLevel nl; + nl.intensity = mean_int; + nl.noise_level = noise_level; + noise_level_per_intensity.push_back(nl); + } + ++patch_index; + } + } + return noise_level_per_intensity; +} + +void EncodeFloatParam(float val, float precision, BitWriter* writer) { + JXL_ASSERT(val >= 0); + const int absval_quant = static_cast(val * precision + 0.5f); + JXL_ASSERT(absval_quant < (1 << 10)); + writer->Write(10, absval_quant); +} + +} // namespace + +Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params, + float quality_coef) { + // The size of a patch in decoder might be different from encoder's patch + // size. + // For encoder: the patch size should be big enough to estimate + // noise level, but, at the same time, it should be not too big + // to be able to estimate intensity value of the patch + const size_t block_s = 8; + const size_t kNumBin = 256; + NoiseHistogram sad_histogram; + std::vector sad_scores = + GetSADScoresForPatches(opsin, block_s, kNumBin, &sad_histogram); + float sad_threshold = GetSADThreshold(sad_histogram, kNumBin); + // If threshold is too large, the image has a strong pattern. This pattern + // fools our model and it will add too much noise. Therefore, we do not add + // noise for such images + if (sad_threshold > 0.15f || sad_threshold <= 0.0f) { + noise_params->Clear(); + return false; + } + std::vector nl = + GetNoiseLevel(opsin, sad_scores, sad_threshold, block_s); + + OptimizeNoiseParameters(nl, noise_params); + for (float& i : noise_params->lut) { + i *= quality_coef * 1.4; + } + return noise_params->HasAny(); +} + +void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer, + size_t layer, AuxOut* aux_out) { + JXL_ASSERT(noise_params.HasAny()); + + BitWriter::Allotment allotment(writer, NoiseParams::kNumNoisePoints * 16); + for (float i : noise_params.lut) { + EncodeFloatParam(i, kNoisePrecision, writer); + } + allotment.ReclaimAndCharge(writer, layer, aux_out); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_noise.h b/third-party/libjxl/libjxl/lib/jxl/enc_noise.h new file mode 100644 index 0000000000..851fdd12db --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_noise.h @@ -0,0 +1,34 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_NOISE_H_ +#define LIB_JXL_ENC_NOISE_H_ + +// Noise parameter estimation. + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/image.h" +#include "lib/jxl/noise.h" + +namespace jxl { + +struct AuxOut; + +// Get parameters of the noise for NoiseParams model +// Returns whether a valid noise model (with HasAny()) is set. +Status GetNoiseParameter(const Image3F& opsin, NoiseParams* noise_params, + float quality_coef); + +// Does not write anything if `noise_params` are empty. Otherwise, caller must +// set FrameHeader.flags.kNoise. +void EncodeNoise(const NoiseParams& noise_params, BitWriter* writer, + size_t layer, AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_NOISE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_optimize.cc b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.cc new file mode 100644 index 0000000000..6865ff67df --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.cc @@ -0,0 +1,163 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_optimize.h" + +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { + +namespace optimize { + +namespace { + +// simplex vector must be sorted by first element of its elements +std::vector Midpoint(const std::vector>& simplex) { + JXL_CHECK(!simplex.empty()); + JXL_CHECK(simplex.size() == simplex[0].size()); + int dim = simplex.size() - 1; + std::vector result(dim + 1, 0); + for (int i = 0; i < dim; i++) { + for (int k = 0; k < dim; k++) { + result[i + 1] += simplex[k][i + 1]; + } + result[i + 1] /= dim; + } + return result; +} + +// first element ignored +std::vector Subtract(const std::vector& a, + const std::vector& b) { + JXL_CHECK(a.size() == b.size()); + std::vector result(a.size()); + result[0] = 0; + for (size_t i = 1; i < result.size(); i++) { + result[i] = a[i] - b[i]; + } + return result; +} + +// first element ignored +std::vector Add(const std::vector& a, + const std::vector& b) { + JXL_CHECK(a.size() == b.size()); + std::vector result(a.size()); + result[0] = 0; + for (size_t i = 1; i < result.size(); i++) { + result[i] = a[i] + b[i]; + } + return result; +} + +// first element ignored +std::vector Average(const std::vector& a, + const std::vector& b) { + JXL_CHECK(a.size() == b.size()); + std::vector result(a.size()); + result[0] = 0; + for (size_t i = 1; i < result.size(); i++) { + result[i] = 0.5 * (a[i] + b[i]); + } + return result; +} + +// vec: [0] will contain the objective function, [1:] will +// contain the vector position for the objective function. +// fun: the function evaluates the value. +void Eval(std::vector* vec, + const std::function&)>& fun) { + std::vector args(vec->begin() + 1, vec->end()); + (*vec)[0] = fun(args); +} + +void Sort(std::vector>* simplex) { + std::sort(simplex->begin(), simplex->end()); +} + +// Main iteration step of Nelder-Mead like optimization. +void Reflect(std::vector>* simplex, + const std::function&)>& fun) { + Sort(simplex); + const std::vector& last = simplex->back(); + std::vector mid = Midpoint(*simplex); + std::vector diff = Subtract(mid, last); + std::vector mirrored = Add(mid, diff); + Eval(&mirrored, fun); + if (mirrored[0] > (*simplex)[simplex->size() - 2][0]) { + // Still the worst, shrink towards the best. + std::vector shrinking = Average(simplex->back(), (*simplex)[0]); + Eval(&shrinking, fun); + simplex->back() = shrinking; + } else if (mirrored[0] < (*simplex)[0][0]) { + // new best + std::vector even_further = Add(mirrored, diff); + Eval(&even_further, fun); + if (even_further[0] < mirrored[0]) { + mirrored = even_further; + } + simplex->back() = mirrored; + } else { + // not a best, not a worst point + simplex->back() = mirrored; + } +} + +// Initialize the simplex at origin. +std::vector> InitialSimplex( + int dim, double amount, const std::vector& init, + const std::function&)>& fun) { + std::vector best(1 + dim, 0); + std::copy(init.begin(), init.end(), best.begin() + 1); + Eval(&best, fun); + std::vector> result{best}; + for (int i = 0; i < dim; i++) { + best = result[0]; + best[i + 1] += amount; + Eval(&best, fun); + result.push_back(best); + Sort(&result); + } + return result; +} + +// For comparing the same with the python tool +/*void RunSimplexExternal( + int dim, double amount, int max_iterations, + const std::function&))>& fun) { + vector vars; + for (int i = 0; i < dim; i++) { + vars.push_back(atof(getenv(StrCat("VAR", i).c_str()))); + } + double result = fun(vars); + std::cout << "Result=" << result; +}*/ + +} // namespace + +std::vector RunSimplex( + int dim, double amount, int max_iterations, const std::vector& init, + const std::function&)>& fun) { + std::vector> simplex = + InitialSimplex(dim, amount, init, fun); + for (int i = 0; i < max_iterations; i++) { + Sort(&simplex); + Reflect(&simplex, fun); + } + return simplex[0]; +} + +std::vector RunSimplex( + int dim, double amount, int max_iterations, + const std::function&)>& fun) { + std::vector init(dim, 0.0); + return RunSimplex(dim, amount, max_iterations, init, fun); +} + +} // namespace optimize + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_optimize.h b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.h new file mode 100644 index 0000000000..0a60198214 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_optimize.h @@ -0,0 +1,218 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Utility functions for optimizing multi-dimensional nonlinear functions. + +#ifndef LIB_JXL_OPTIMIZE_H_ +#define LIB_JXL_OPTIMIZE_H_ + +#include + +#include +#include +#include +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace optimize { + +// An array type of numeric values that supports math operations with operator-, +// operator+, etc. +template +class Array { + public: + Array() = default; + explicit Array(T v) { + for (size_t i = 0; i < N; i++) v_[i] = v; + } + + size_t size() const { return N; } + + T& operator[](size_t index) { + JXL_DASSERT(index < N); + return v_[index]; + } + T operator[](size_t index) const { + JXL_DASSERT(index < N); + return v_[index]; + } + + private: + // The values used by this Array. + T v_[N]; +}; + +template +Array operator+(const Array& x, const Array& y) { + Array z; + for (size_t i = 0; i < N; ++i) { + z[i] = x[i] + y[i]; + } + return z; +} + +template +Array operator-(const Array& x, const Array& y) { + Array z; + for (size_t i = 0; i < N; ++i) { + z[i] = x[i] - y[i]; + } + return z; +} + +template +Array operator*(T v, const Array& x) { + Array y; + for (size_t i = 0; i < N; ++i) { + y[i] = v * x[i]; + } + return y; +} + +template +T operator*(const Array& x, const Array& y) { + T r = 0.0; + for (size_t i = 0; i < N; ++i) { + r += x[i] * y[i]; + } + return r; +} + +// Runs Nelder-Mead like optimization. Runs for max_iterations times, +// fun gets called with a vector of size dim as argument, and returns the score +// based on those parameters (lower is better). Returns a vector of dim+1 +// dimensions, where the first value is the optimal value of the function and +// the rest is the argmin value. Use init to pass an initial guess or where +// the optimal value is. +// +// Usage example: +// +// RunSimplex(2, 0.1, 100, [](const vector& v) { +// return (v[0] - 5) * (v[0] - 5) + (v[1] - 7) * (v[1] - 7); +// }); +// +// Returns (0.0, 5, 7) +std::vector RunSimplex( + int dim, double amount, int max_iterations, + const std::function&)>& fun); +std::vector RunSimplex( + int dim, double amount, int max_iterations, const std::vector& init, + const std::function&)>& fun); + +// Implementation of the Scaled Conjugate Gradient method described in the +// following paper: +// Moller, M. "A Scaled Conjugate Gradient Algorithm for Fast Supervised +// Learning", Neural Networks, Vol. 6. pp. 525-533, 1993 +// http://sci2s.ugr.es/keel/pdf/algorithm/articulo/moller1990.pdf +// +// The Function template parameter is a class that has the following method: +// +// // Returns the value of the function at point w and sets *df to be the +// // negative gradient vector of the function at point w. +// double Compute(const optimize::Array& w, +// optimize::Array* df) const; +// +// Returns a vector w, such that |df(w)| < grad_norm_threshold. +template +Array OptimizeWithScaledConjugateGradientMethod( + const Function& f, const Array& w0, const T grad_norm_threshold, + size_t max_iters) { + const size_t n = w0.size(); + const T rsq_threshold = grad_norm_threshold * grad_norm_threshold; + const T sigma0 = static_cast(0.0001); + const T l_min = static_cast(1.0e-15); + const T l_max = static_cast(1.0e15); + + Array w = w0; + Array wp; + Array r; + Array rt; + Array e; + Array p; + T psq; + T fp; + T D; + T d; + T m; + T a; + T b; + T s; + T t; + + T fw = f.Compute(w, &r); + T rsq = r * r; + e = r; + p = r; + T l = static_cast(1.0); + bool success = true; + size_t n_success = 0; + size_t k = 0; + + while (k++ < max_iters) { + if (success) { + m = -(p * r); + if (m >= 0) { + p = r; + m = -(p * r); + } + psq = p * p; + s = sigma0 / std::sqrt(psq); + f.Compute(w + (s * p), &rt); + t = (p * (r - rt)) / s; + } + + d = t + l * psq; + if (d <= 0) { + d = l * psq; + l = l - t / psq; + } + + a = -m / d; + wp = w + a * p; + fp = f.Compute(wp, &rt); + + D = 2.0 * (fp - fw) / (a * m); + if (D >= 0.0) { + success = true; + n_success++; + w = wp; + } else { + success = false; + } + + if (success) { + e = r; + r = rt; + rsq = r * r; + fw = fp; + if (rsq <= rsq_threshold) { + break; + } + } + + if (D < 0.25) { + l = std::min(4.0 * l, l_max); + } else if (D > 0.75) { + l = std::max(0.25 * l, l_min); + } + + if ((n_success % n) == 0) { + p = r; + l = 1.0; + } else if (success) { + b = ((e - r) * r) / m; + p = b * p + r; + } + } + + return w; +} + +} // namespace optimize +} // namespace jxl + +#endif // LIB_JXL_OPTIMIZE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_optimize_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_optimize_test.cc new file mode 100644 index 0000000000..1c6699f99e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_optimize_test.cc @@ -0,0 +1,109 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_optimize.h" + +#include + +#include "lib/jxl/testing.h" + +namespace jxl { +namespace optimize { +namespace { + +// The maximum number of iterations for the test. +static const size_t kMaxTestIter = 100000; + +// F(w) = (w - w_min)^2. +struct SimpleQuadraticFunction { + typedef Array ArrayType; + explicit SimpleQuadraticFunction(const ArrayType& w0) : w_min(w0) {} + + double Compute(const ArrayType& w, ArrayType* df) const { + ArrayType dw = w - w_min; + *df = -2.0 * dw; + return dw * dw; + } + + ArrayType w_min; +}; + +// F(alpha, beta, gamma| x,y) = \sum_i(y_i - (alpha x_i ^ gamma + beta))^2. +struct PowerFunction { + explicit PowerFunction(const std::vector& x0, + const std::vector& y0) + : x(x0), y(y0) {} + + typedef Array ArrayType; + double Compute(const ArrayType& w, ArrayType* df) const { + double loss_function = 0; + (*df)[0] = 0; + (*df)[1] = 0; + (*df)[2] = 0; + for (size_t ind = 0; ind < y.size(); ++ind) { + if (x[ind] != 0) { + double l_f = y[ind] - (w[0] * pow(x[ind], w[1]) + w[2]); + (*df)[0] += 2.0 * l_f * pow(x[ind], w[1]); + (*df)[1] += 2.0 * l_f * w[0] * pow(x[ind], w[1]) * log(x[ind]); + (*df)[2] += 2.0 * l_f * 1; + loss_function += l_f * l_f; + } + } + return loss_function; + } + + std::vector x; + std::vector y; +}; + +TEST(OptimizeTest, SimpleQuadraticFunction) { + SimpleQuadraticFunction::ArrayType w_min; + w_min[0] = 1.0; + w_min[1] = 2.0; + SimpleQuadraticFunction f(w_min); + SimpleQuadraticFunction::ArrayType w(0.); + static const double kPrecision = 1e-8; + w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision, + kMaxTestIter); + EXPECT_NEAR(w[0], 1.0, kPrecision); + EXPECT_NEAR(w[1], 2.0, kPrecision); +} + +TEST(OptimizeTest, PowerFunction) { + std::vector x(10); + std::vector y(10); + for (int ind = 0; ind < 10; ++ind) { + x[ind] = 1. * ind; + y[ind] = 2. * pow(x[ind], 3) + 5.; + } + PowerFunction f(x, y); + PowerFunction::ArrayType w(0.); + + static const double kPrecision = 0.01; + w = optimize::OptimizeWithScaledConjugateGradientMethod(f, w, kPrecision, + kMaxTestIter); + EXPECT_NEAR(w[0], 2.0, kPrecision); + EXPECT_NEAR(w[1], 3.0, kPrecision); + EXPECT_NEAR(w[2], 5.0, kPrecision); +} + +TEST(OptimizeTest, SimplexOptTest) { + auto f = [](const std::vector& x) -> double { + double t1 = x[0] - 1.0; + double t2 = x[1] + 1.5; + return 2.0 + t1 * t1 + t2 * t2; + }; + auto opt = RunSimplex(2, 0.01, 100, f); + EXPECT_EQ(opt.size(), 3u); + + static const double kPrecision = 0.01; + EXPECT_NEAR(opt[0], 2.0, kPrecision); + EXPECT_NEAR(opt[1], 1.0, kPrecision); + EXPECT_NEAR(opt[2], -1.5, kPrecision); +} + +} // namespace +} // namespace optimize +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_params.h b/third-party/libjxl/libjxl/lib/jxl/enc_params.h new file mode 100644 index 0000000000..bce640ba5d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_params.h @@ -0,0 +1,220 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_PARAMS_H_ +#define LIB_JXL_ENC_PARAMS_H_ + +// Parameters and flags that govern JXL compression. + +#include +#include +#include + +#include + +#include "lib/jxl/base/override.h" +#include "lib/jxl/butteraugli/butteraugli.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +enum class SpeedTier { + // Try multiple combinations of Tortoise flags for modular mode. Otherwise + // like kTortoise. + kGlacier = 0, + // Turns on FindBestQuantizationHQ loop. Equivalent to "guetzli" mode. + kTortoise = 1, + // Turns on FindBestQuantization butteraugli loop. + kKitten = 2, + // Turns on dots, patches, and spline detection by default, as well as full + // context clustering. Default. + kSquirrel = 3, + // Turns on error diffusion and full AC strategy heuristics. Equivalent to + // "fast" mode. + kWombat = 4, + // Turns on gaborish by default, non-default cmap, initial quant field. + kHare = 5, + // Turns on simple heuristics for AC strategy, quant field, and clustering; + // also enables coefficient reordering. + kCheetah = 6, + // Turns off most encoder features. Does context clustering. + // Modular: uses fixed tree with Weighted predictor. + kFalcon = 7, + // Currently fastest possible setting for VarDCT. + // Modular: uses fixed tree with Gradient predictor. + kThunder = 8, + // VarDCT: same as kThunder. + // Modular: no tree, Gradient predictor, fast histograms + kLightning = 9 +}; + +// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) +struct CompressParams { + float butteraugli_distance = 1.0f; + + // explicit distances for extra channels (defaults to butteraugli_distance + // when not set; value of -1 can be used to represent 'default') + std::vector ec_distance; + + // Try to achieve a maximum pixel-by-pixel error on each channel. + bool max_error_mode = false; + float max_error[3] = {0.0, 0.0, 0.0}; + + SpeedTier speed_tier = SpeedTier::kSquirrel; + int brotli_effort = -1; + + // 0 = default. + // 1 = slightly worse quality. + // 4 = fastest speed, lowest quality + size_t decoding_speed_tier = 0; + + ColorTransform color_transform = ColorTransform::kXYB; + + // If true, the "modular mode options" members below are used. + bool modular_mode = false; + + // Change group size in modular mode (0=128, 1=256, 2=512, 3=1024, -1=encoder + // chooses). + int modular_group_size_shift = -1; + + Override preview = Override::kDefault; + Override noise = Override::kDefault; + Override dots = Override::kDefault; + Override patches = Override::kDefault; + Override gaborish = Override::kDefault; + int epf = -1; + + // Progressive mode. + bool progressive_mode = false; + + // Quantized-progressive mode. + bool qprogressive_mode = false; + + // Put center groups first in the bitstream. + bool centerfirst = false; + + // Pixel coordinates of the center. First group will contain that center. + size_t center_x = static_cast(-1); + size_t center_y = static_cast(-1); + + int progressive_dc = -1; + + // If on: preserve color of invisible pixels (if off: don't care) + // Default: on for lossless, off for lossy + Override keep_invisible = Override::kDefault; + + JxlCmsInterface cms; + bool cms_set = false; + void SetCms(const JxlCmsInterface& cms) { + this->cms = cms; + cms_set = true; + } + + // Force usage of CfL when doing JPEG recompression. This can have unexpected + // effects on the decoded pixels, while still being JPEG-compliant and + // allowing reconstruction of the original JPEG. + bool force_cfl_jpeg_recompression = true; + + // Use brotli compression for any boxes derived from a JPEG frame. + bool jpeg_compress_boxes = true; + + // Preserve this metadata when doing JPEG recompression. + bool jpeg_keep_exif = true; + bool jpeg_keep_xmp = true; + bool jpeg_keep_jumbf = true; + + // Set the noise to what it would approximately be if shooting at the nominal + // exposure for a given ISO setting on a 35mm camera. + float photon_noise_iso = 0; + + // modular mode options below + ModularOptions options; + int responsive = -1; + // empty for default squeeze + std::vector squeezes; + int colorspace = -1; + // Use Global channel palette if #colors < this percentage of range + float channel_colors_pre_transform_percent = 95.f; + // Use Local channel palette if #colors < this percentage of range + float channel_colors_percent = 80.f; + int palette_colors = 1 << 10; // up to 10-bit palette is probably worthwhile + bool lossy_palette = false; + + // Returns whether these params are lossless as defined by SetLossless(); + bool IsLossless() const { return modular_mode && ModularPartIsLossless(); } + + bool ModularPartIsLossless() const { + if (modular_mode) { + // YCbCr is also considered lossless here since it's intended for + // source material that is already YCbCr (we don't do the fwd transform) + if (butteraugli_distance != 0 || + color_transform == jxl::ColorTransform::kXYB) + return false; + } + for (float f : ec_distance) { + if (f > 0) return false; + if (f < 0 && butteraugli_distance != 0) return false; + } + // if no explicit ec_distance given, and using vardct, then the modular part + // is empty or not lossless + if (!modular_mode && ec_distance.empty()) return false; + // all modular channels are encoded at distance 0 + return true; + } + + // Sets the parameters required to make the codec lossless. + void SetLossless() { + modular_mode = true; + butteraugli_distance = 0.0f; + for (float &f : ec_distance) f = 0.0f; + color_transform = jxl::ColorTransform::kNone; + } + + // Down/upsample the image before encoding / after decoding by this factor. + // The resampling value can also be set to <= 0 to automatically choose based + // on distance, however EncodeFrame doesn't support this, so it is + // required to call PostInit() to set a valid positive resampling + // value and altered butteraugli score if this is used. + int resampling = -1; + int ec_resampling = -1; + // Skip the downsampling before encoding if this is true. + bool already_downsampled = false; + // Butteraugli target distance on the original full size image, this can be + // different from butteraugli_distance if resampling was used. + float original_butteraugli_distance = -1.0f; + + float quant_ac_rescale = 1.0; + + // Codestream level to conform to. + // -1: don't care + int level = -1; + + std::vector manual_noise; + std::vector manual_xyb_factors; + + JxlDebugImageCallback debug_image = nullptr; + void* debug_image_opaque; +}; + +static constexpr float kMinButteraugliForDynamicAR = 0.5f; +static constexpr float kMinButteraugliForDots = 3.0f; +static constexpr float kMinButteraugliToSubtractOriginalPatches = 3.0f; + +// Always off +static constexpr float kMinButteraugliForNoise = 99.0f; + +// Minimum butteraugli distance the encoder accepts. +static constexpr float kMinButteraugliDistance = 0.001f; + +// Tile size for encoder-side processing. Must be equal to color tile dim in the +// current implementation. +static constexpr size_t kEncTileDim = 64; +static constexpr size_t kEncTileDimInBlocks = kEncTileDim / kBlockDim; + +} // namespace jxl + +#endif // LIB_JXL_ENC_PARAMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.cc b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.cc new file mode 100644 index 0000000000..800cfc56f8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.cc @@ -0,0 +1,816 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_patch_dictionary.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/dec_frame.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_debug_image.h" +#include "lib/jxl/enc_dot_dictionary.h" +#include "lib/jxl/enc_frame.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/patch_dictionary_internal.h" + +namespace jxl { + +static constexpr size_t kPatchFrameReferenceId = 3; + +// static +void PatchDictionaryEncoder::Encode(const PatchDictionary& pdic, + BitWriter* writer, size_t layer, + AuxOut* aux_out) { + JXL_ASSERT(pdic.HasAny()); + std::vector> tokens(1); + size_t num_ec = pdic.shared_->metadata->m.num_extra_channels; + + auto add_num = [&](int context, size_t num) { + tokens[0].emplace_back(context, num); + }; + size_t num_ref_patch = 0; + for (size_t i = 0; i < pdic.positions_.size();) { + size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx; + while (i < pdic.positions_.size() && + pdic.positions_[i].ref_pos_idx == ref_pos_idx) { + i++; + } + num_ref_patch++; + } + add_num(kNumRefPatchContext, num_ref_patch); + size_t blend_pos = 0; + for (size_t i = 0; i < pdic.positions_.size();) { + size_t i_start = i; + size_t ref_pos_idx = pdic.positions_[i].ref_pos_idx; + const auto& ref_pos = pdic.ref_positions_[ref_pos_idx]; + while (i < pdic.positions_.size() && + pdic.positions_[i].ref_pos_idx == ref_pos_idx) { + i++; + } + size_t num = i - i_start; + JXL_ASSERT(num > 0); + add_num(kReferenceFrameContext, ref_pos.ref); + add_num(kPatchReferencePositionContext, ref_pos.x0); + add_num(kPatchReferencePositionContext, ref_pos.y0); + add_num(kPatchSizeContext, ref_pos.xsize - 1); + add_num(kPatchSizeContext, ref_pos.ysize - 1); + add_num(kPatchCountContext, num - 1); + for (size_t j = i_start; j < i; j++) { + const PatchPosition& pos = pdic.positions_[j]; + if (j == i_start) { + add_num(kPatchPositionContext, pos.x); + add_num(kPatchPositionContext, pos.y); + } else { + add_num(kPatchOffsetContext, + PackSigned(pos.x - pdic.positions_[j - 1].x)); + add_num(kPatchOffsetContext, + PackSigned(pos.y - pdic.positions_[j - 1].y)); + } + for (size_t j = 0; j < num_ec + 1; ++j, ++blend_pos) { + const PatchBlending& info = pdic.blendings_[blend_pos]; + add_num(kPatchBlendModeContext, static_cast(info.mode)); + if (UsesAlpha(info.mode) && + pdic.shared_->metadata->m.extra_channel_info.size() > 1) { + add_num(kPatchAlphaChannelContext, info.alpha_channel); + } + if (UsesClamp(info.mode)) { + add_num(kPatchClampContext, info.clamp); + } + } + } + } + + EntropyEncodingData codes; + std::vector context_map; + BuildAndEncodeHistograms(HistogramParams(), kNumPatchDictionaryContexts, + tokens, &codes, &context_map, writer, layer, + aux_out); + WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out); +} + +// static +void PatchDictionaryEncoder::SubtractFrom(const PatchDictionary& pdic, + Image3F* opsin) { + size_t num_ec = pdic.shared_->metadata->m.num_extra_channels; + // TODO(veluca): this can likely be optimized knowing it runs on full images. + for (size_t y = 0; y < opsin->ysize(); y++) { + float* JXL_RESTRICT rows[3] = { + opsin->PlaneRow(0, y), + opsin->PlaneRow(1, y), + opsin->PlaneRow(2, y), + }; + for (size_t pos_idx : pdic.GetPatchesForRow(y)) { + const size_t blending_idx = pos_idx * (num_ec + 1); + const PatchPosition& pos = pdic.positions_[pos_idx]; + const PatchReferencePosition& ref_pos = + pdic.ref_positions_[pos.ref_pos_idx]; + const PatchBlendMode mode = pdic.blendings_[blending_idx].mode; + size_t by = pos.y; + size_t bx = pos.x; + size_t xsize = ref_pos.xsize; + JXL_DASSERT(y >= by); + JXL_DASSERT(y < by + ref_pos.ysize); + size_t iy = y - by; + size_t ref = ref_pos.ref; + const float* JXL_RESTRICT ref_rows[3] = { + pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow( + 0, ref_pos.y0 + iy) + + ref_pos.x0, + pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow( + 1, ref_pos.y0 + iy) + + ref_pos.x0, + pdic.shared_->reference_frames[ref].frame.color().ConstPlaneRow( + 2, ref_pos.y0 + iy) + + ref_pos.x0, + }; + for (size_t ix = 0; ix < xsize; ix++) { + for (size_t c = 0; c < 3; c++) { + if (mode == PatchBlendMode::kAdd) { + rows[c][bx + ix] -= ref_rows[c][ix]; + } else if (mode == PatchBlendMode::kReplace) { + rows[c][bx + ix] = 0; + } else if (mode == PatchBlendMode::kNone) { + // Nothing to do. + } else { + JXL_UNREACHABLE("Blending mode %u not yet implemented", + (uint32_t)mode); + } + } + } + } + } +} + +namespace { + +struct PatchColorspaceInfo { + float kChannelDequant[3]; + float kChannelWeights[3]; + + explicit PatchColorspaceInfo(bool is_xyb) { + if (is_xyb) { + kChannelDequant[0] = 0.01615; + kChannelDequant[1] = 0.08875; + kChannelDequant[2] = 0.1922; + kChannelWeights[0] = 30.0; + kChannelWeights[1] = 3.0; + kChannelWeights[2] = 1.0; + } else { + kChannelDequant[0] = 20.0f / 255; + kChannelDequant[1] = 22.0f / 255; + kChannelDequant[2] = 20.0f / 255; + kChannelWeights[0] = 0.017 * 255; + kChannelWeights[1] = 0.02 * 255; + kChannelWeights[2] = 0.017 * 255; + } + } + + float ScaleForQuantization(float val, size_t c) { + return val / kChannelDequant[c]; + } + + int Quantize(float val, size_t c) { + return truncf(ScaleForQuantization(val, c)); + } + + bool is_similar_v(const float v1[3], const float v2[3], float threshold) { + float distance = 0; + for (size_t c = 0; c < 3; c++) { + distance += std::fabs(v1[c] - v2[c]) * kChannelWeights[c]; + } + return distance <= threshold; + } +}; + +std::vector FindTextLikePatches( + const CompressParams& cparams, const Image3F& opsin, + const PassesEncoderState* JXL_RESTRICT state, ThreadPool* pool, + AuxOut* aux_out, bool is_xyb) { + if (state->cparams.patches == Override::kOff) return {}; + + PatchColorspaceInfo pci(is_xyb); + float kSimilarThreshold = 0.8f; + + auto is_similar_impl = [&pci](std::pair p1, + std::pair p2, + const float* JXL_RESTRICT rows[3], + size_t stride, float threshold) { + float v1[3], v2[3]; + for (size_t c = 0; c < 3; c++) { + v1[c] = rows[c][p1.second * stride + p1.first]; + v2[c] = rows[c][p2.second * stride + p2.first]; + } + return pci.is_similar_v(v1, v2, threshold); + }; + + std::atomic has_screenshot_areas{false}; + const size_t opsin_stride = opsin.PixelsPerRow(); + const float* JXL_RESTRICT opsin_rows[3] = {opsin.ConstPlaneRow(0, 0), + opsin.ConstPlaneRow(1, 0), + opsin.ConstPlaneRow(2, 0)}; + + auto is_same = [&opsin_rows, opsin_stride](std::pair p1, + std::pair p2) { + for (size_t c = 0; c < 3; c++) { + float v1 = opsin_rows[c][p1.second * opsin_stride + p1.first]; + float v2 = opsin_rows[c][p2.second * opsin_stride + p2.first]; + if (std::fabs(v1 - v2) > 1e-4) { + return false; + } + } + return true; + }; + + auto is_similar = [&](std::pair p1, + std::pair p2) { + return is_similar_impl(p1, p2, opsin_rows, opsin_stride, kSimilarThreshold); + }; + + constexpr int64_t kPatchSide = 4; + constexpr int64_t kExtraSide = 4; + + // Look for kPatchSide size squares, naturally aligned, that all have the same + // pixel values. + ImageB is_screenshot_like(DivCeil(opsin.xsize(), kPatchSide), + DivCeil(opsin.ysize(), kPatchSide)); + ZeroFillImage(&is_screenshot_like); + uint8_t* JXL_RESTRICT screenshot_row = is_screenshot_like.Row(0); + const size_t screenshot_stride = is_screenshot_like.PixelsPerRow(); + const auto process_row = [&](const uint32_t y, size_t /* thread */) { + for (uint64_t x = 0; x < opsin.xsize() / kPatchSide; x++) { + bool all_same = true; + for (size_t iy = 0; iy < static_cast(kPatchSide); iy++) { + for (size_t ix = 0; ix < static_cast(kPatchSide); ix++) { + size_t cx = x * kPatchSide + ix; + size_t cy = y * kPatchSide + iy; + if (!is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) { + all_same = false; + break; + } + } + } + if (!all_same) continue; + size_t num = 0; + size_t num_same = 0; + for (int64_t iy = -kExtraSide; iy < kExtraSide + kPatchSide; iy++) { + for (int64_t ix = -kExtraSide; ix < kExtraSide + kPatchSide; ix++) { + int64_t cx = x * kPatchSide + ix; + int64_t cy = y * kPatchSide + iy; + if (cx < 0 || static_cast(cx) >= opsin.xsize() || // + cy < 0 || static_cast(cy) >= opsin.ysize()) { + continue; + } + num++; + if (is_same({cx, cy}, {x * kPatchSide, y * kPatchSide})) num_same++; + } + } + // Too few equal pixels nearby. + if (num_same * 8 < num * 7) continue; + screenshot_row[y * screenshot_stride + x] = 1; + has_screenshot_areas = true; + } + }; + JXL_CHECK(RunOnPool(pool, 0, opsin.ysize() / kPatchSide, ThreadPool::NoInit, + process_row, "IsScreenshotLike")); + + // TODO(veluca): also parallelize the rest of this function. + if (WantDebugOutput(cparams)) { + DumpPlaneNormalized(cparams, "screenshot_like", is_screenshot_like); + } + + constexpr int kSearchRadius = 1; + + if (!ApplyOverride(state->cparams.patches, has_screenshot_areas)) { + return {}; + } + + // Search for "similar enough" pixels near the screenshot-like areas. + ImageB is_background(opsin.xsize(), opsin.ysize()); + ZeroFillImage(&is_background); + Image3F background(opsin.xsize(), opsin.ysize()); + ZeroFillImage(&background); + constexpr size_t kDistanceLimit = 50; + float* JXL_RESTRICT background_rows[3] = { + background.PlaneRow(0, 0), + background.PlaneRow(1, 0), + background.PlaneRow(2, 0), + }; + const size_t background_stride = background.PixelsPerRow(); + uint8_t* JXL_RESTRICT is_background_row = is_background.Row(0); + const size_t is_background_stride = is_background.PixelsPerRow(); + std::vector< + std::pair, std::pair>> + queue; + size_t queue_front = 0; + for (size_t y = 0; y < opsin.ysize(); y++) { + for (size_t x = 0; x < opsin.xsize(); x++) { + if (!screenshot_row[screenshot_stride * (y / kPatchSide) + + (x / kPatchSide)]) + continue; + queue.push_back({{x, y}, {x, y}}); + } + } + while (queue.size() != queue_front) { + std::pair cur = queue[queue_front].first; + std::pair src = queue[queue_front].second; + queue_front++; + if (is_background_row[cur.second * is_background_stride + cur.first]) + continue; + is_background_row[cur.second * is_background_stride + cur.first] = 1; + for (size_t c = 0; c < 3; c++) { + background_rows[c][cur.second * background_stride + cur.first] = + opsin_rows[c][src.second * opsin_stride + src.first]; + } + for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) { + for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) { + if (dx == 0 && dy == 0) continue; + int next_first = cur.first + dx; + int next_second = cur.second + dy; + if (next_first < 0 || next_second < 0 || + static_cast(next_first) >= opsin.xsize() || + static_cast(next_second) >= opsin.ysize()) { + continue; + } + if (static_cast( + std::abs(next_first - static_cast(src.first)) + + std::abs(next_second - static_cast(src.second))) > + kDistanceLimit) { + continue; + } + std::pair next{next_first, next_second}; + if (is_similar(src, next)) { + if (!screenshot_row[next.second / kPatchSide * screenshot_stride + + next.first / kPatchSide] || + is_same(src, next)) { + if (!is_background_row[next.second * is_background_stride + + next.first]) + queue.emplace_back(next, src); + } + } + } + } + } + queue.clear(); + + ImageF ccs; + Rng rng(0); + bool paint_ccs = false; + if (WantDebugOutput(cparams)) { + DumpPlaneNormalized(cparams, "is_background", is_background); + if (is_xyb) { + DumpXybImage(cparams, "background", background); + } else { + DumpImage(cparams, "background", background); + } + ccs = ImageF(opsin.xsize(), opsin.ysize()); + ZeroFillImage(&ccs); + paint_ccs = true; + } + + constexpr float kVerySimilarThreshold = 0.03f; + constexpr float kHasSimilarThreshold = 0.03f; + + const float* JXL_RESTRICT const_background_rows[3] = { + background_rows[0], background_rows[1], background_rows[2]}; + auto is_similar_b = [&](std::pair p1, std::pair p2) { + return is_similar_impl(p1, p2, const_background_rows, background_stride, + kVerySimilarThreshold); + }; + + constexpr int kMinPeak = 2; + constexpr int kHasSimilarRadius = 2; + + std::vector info; + + // Find small CC outside the "similar enough" areas, compute bounding boxes, + // and run heuristics to exclude some patches. + ImageB visited(opsin.xsize(), opsin.ysize()); + ZeroFillImage(&visited); + uint8_t* JXL_RESTRICT visited_row = visited.Row(0); + const size_t visited_stride = visited.PixelsPerRow(); + std::vector> cc; + std::vector> stack; + for (size_t y = 0; y < opsin.ysize(); y++) { + for (size_t x = 0; x < opsin.xsize(); x++) { + if (is_background_row[y * is_background_stride + x]) continue; + cc.clear(); + stack.clear(); + stack.emplace_back(x, y); + size_t min_x = x; + size_t max_x = x; + size_t min_y = y; + size_t max_y = y; + std::pair reference; + bool found_border = false; + bool all_similar = true; + while (!stack.empty()) { + std::pair cur = stack.back(); + stack.pop_back(); + if (visited_row[cur.second * visited_stride + cur.first]) continue; + visited_row[cur.second * visited_stride + cur.first] = 1; + if (cur.first < min_x) min_x = cur.first; + if (cur.first > max_x) max_x = cur.first; + if (cur.second < min_y) min_y = cur.second; + if (cur.second > max_y) max_y = cur.second; + if (paint_ccs) { + cc.push_back(cur); + } + for (int dx = -kSearchRadius; dx <= kSearchRadius; dx++) { + for (int dy = -kSearchRadius; dy <= kSearchRadius; dy++) { + if (dx == 0 && dy == 0) continue; + int next_first = static_cast(cur.first) + dx; + int next_second = static_cast(cur.second) + dy; + if (next_first < 0 || next_second < 0 || + static_cast(next_first) >= opsin.xsize() || + static_cast(next_second) >= opsin.ysize()) { + continue; + } + std::pair next{next_first, next_second}; + if (!is_background_row[next.second * is_background_stride + + next.first]) { + stack.push_back(next); + } else { + if (!found_border) { + reference = next; + found_border = true; + } else { + if (!is_similar_b(next, reference)) all_similar = false; + } + } + } + } + } + if (!found_border || !all_similar || max_x - min_x >= kMaxPatchSize || + max_y - min_y >= kMaxPatchSize) { + continue; + } + size_t bpos = background_stride * reference.second + reference.first; + float ref[3] = {background_rows[0][bpos], background_rows[1][bpos], + background_rows[2][bpos]}; + bool has_similar = false; + for (size_t iy = std::max( + static_cast(min_y) - kHasSimilarRadius, 0); + iy < std::min(max_y + kHasSimilarRadius + 1, opsin.ysize()); iy++) { + for (size_t ix = std::max( + static_cast(min_x) - kHasSimilarRadius, 0); + ix < std::min(max_x + kHasSimilarRadius + 1, opsin.xsize()); + ix++) { + size_t opos = opsin_stride * iy + ix; + float px[3] = {opsin_rows[0][opos], opsin_rows[1][opos], + opsin_rows[2][opos]}; + if (pci.is_similar_v(ref, px, kHasSimilarThreshold)) { + has_similar = true; + } + } + } + if (!has_similar) continue; + info.emplace_back(); + info.back().second.emplace_back(min_x, min_y); + QuantizedPatch& patch = info.back().first; + patch.xsize = max_x - min_x + 1; + patch.ysize = max_y - min_y + 1; + int max_value = 0; + for (size_t c : {1, 0, 2}) { + for (size_t iy = min_y; iy <= max_y; iy++) { + for (size_t ix = min_x; ix <= max_x; ix++) { + size_t offset = (iy - min_y) * patch.xsize + ix - min_x; + patch.fpixels[c][offset] = + opsin_rows[c][iy * opsin_stride + ix] - ref[c]; + int val = pci.Quantize(patch.fpixels[c][offset], c); + patch.pixels[c][offset] = val; + if (std::abs(val) > max_value) max_value = std::abs(val); + } + } + } + if (max_value < kMinPeak) { + info.pop_back(); + continue; + } + if (paint_ccs) { + float cc_color = rng.UniformF(0.5, 1.0); + for (std::pair p : cc) { + ccs.Row(p.second)[p.first] = cc_color; + } + } + } + } + + if (paint_ccs) { + JXL_ASSERT(WantDebugOutput(cparams)); + DumpPlaneNormalized(cparams, "ccs", ccs); + } + if (info.empty()) { + return {}; + } + + // Remove duplicates. + constexpr size_t kMinPatchOccurrences = 2; + std::sort(info.begin(), info.end()); + size_t unique = 0; + for (size_t i = 1; i < info.size(); i++) { + if (info[i].first == info[unique].first) { + info[unique].second.insert(info[unique].second.end(), + info[i].second.begin(), info[i].second.end()); + } else { + if (info[unique].second.size() >= kMinPatchOccurrences) { + unique++; + } + info[unique] = info[i]; + } + } + if (info[unique].second.size() >= kMinPatchOccurrences) { + unique++; + } + info.resize(unique); + + size_t max_patch_size = 0; + + for (size_t i = 0; i < info.size(); i++) { + size_t pixels = info[i].first.xsize * info[i].first.ysize; + if (pixels > max_patch_size) max_patch_size = pixels; + } + + // don't use patches if all patches are smaller than this + constexpr size_t kMinMaxPatchSize = 20; + if (max_patch_size < kMinMaxPatchSize) return {}; + + return info; +} + +} // namespace + +void FindBestPatchDictionary(const Image3F& opsin, + PassesEncoderState* JXL_RESTRICT state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out, bool is_xyb) { + std::vector info = + FindTextLikePatches(state->cparams, opsin, state, pool, aux_out, is_xyb); + + // TODO(veluca): this doesn't work if both dots and patches are enabled. + // For now, since dots and patches are not likely to occur in the same kind of + // images, disable dots if some patches were found. + if (info.empty() && + ApplyOverride( + state->cparams.dots, + state->cparams.speed_tier <= SpeedTier::kSquirrel && + state->cparams.butteraugli_distance >= kMinButteraugliForDots)) { + info = FindDotDictionary(state->cparams, opsin, state->shared.cmap, pool); + } + + if (info.empty()) return; + + std::sort( + info.begin(), info.end(), [&](const PatchInfo& a, const PatchInfo& b) { + return a.first.xsize * a.first.ysize > b.first.xsize * b.first.ysize; + }); + + size_t max_x_size = 0; + size_t max_y_size = 0; + size_t total_pixels = 0; + + for (size_t i = 0; i < info.size(); i++) { + size_t pixels = info[i].first.xsize * info[i].first.ysize; + if (max_x_size < info[i].first.xsize) max_x_size = info[i].first.xsize; + if (max_y_size < info[i].first.ysize) max_y_size = info[i].first.ysize; + total_pixels += pixels; + } + + // Bin-packing & conversion of patches. + constexpr float kBinPackingSlackness = 1.05f; + size_t ref_xsize = std::max(max_x_size, std::sqrt(total_pixels)); + size_t ref_ysize = std::max(max_y_size, std::sqrt(total_pixels)); + std::vector> ref_positions(info.size()); + // TODO(veluca): allow partial overlaps of patches that have the same pixels. + size_t max_y = 0; + do { + max_y = 0; + // Increase packed image size. + ref_xsize = ref_xsize * kBinPackingSlackness + 1; + ref_ysize = ref_ysize * kBinPackingSlackness + 1; + + ImageB occupied(ref_xsize, ref_ysize); + ZeroFillImage(&occupied); + uint8_t* JXL_RESTRICT occupied_rows = occupied.Row(0); + size_t occupied_stride = occupied.PixelsPerRow(); + + bool success = true; + // For every patch... + for (size_t patch = 0; patch < info.size(); patch++) { + size_t x0 = 0; + size_t y0 = 0; + size_t xsize = info[patch].first.xsize; + size_t ysize = info[patch].first.ysize; + bool found = false; + // For every possible start position ... + for (; y0 + ysize <= ref_ysize; y0++) { + x0 = 0; + for (; x0 + xsize <= ref_xsize; x0++) { + bool has_occupied_pixel = false; + size_t x = x0; + // Check if it is possible to place the patch in this position in the + // reference frame. + for (size_t y = y0; y < y0 + ysize; y++) { + x = x0; + for (; x < x0 + xsize; x++) { + if (occupied_rows[y * occupied_stride + x]) { + has_occupied_pixel = true; + break; + } + } + } // end of positioning check + if (!has_occupied_pixel) { + found = true; + break; + } + x0 = x; // Jump to next pixel after the occupied one. + } + if (found) break; + } // end of start position checking + + // We didn't find a possible position: repeat from the beginning with a + // larger reference frame size. + if (!found) { + success = false; + break; + } + + // We found a position: mark the corresponding positions in the reference + // image as used. + ref_positions[patch] = {x0, y0}; + for (size_t y = y0; y < y0 + ysize; y++) { + for (size_t x = x0; x < x0 + xsize; x++) { + occupied_rows[y * occupied_stride + x] = true; + } + } + max_y = std::max(max_y, y0 + ysize); + } + + if (success) break; + } while (true); + + JXL_ASSERT(ref_ysize >= max_y); + + ref_ysize = max_y; + + Image3F reference_frame(ref_xsize, ref_ysize); + // TODO(veluca): figure out a better way to fill the image. + ZeroFillImage(&reference_frame); + std::vector positions; + std::vector pref_positions; + std::vector blendings; + float* JXL_RESTRICT ref_rows[3] = { + reference_frame.PlaneRow(0, 0), + reference_frame.PlaneRow(1, 0), + reference_frame.PlaneRow(2, 0), + }; + size_t ref_stride = reference_frame.PixelsPerRow(); + size_t num_ec = state->shared.metadata->m.num_extra_channels; + + for (size_t i = 0; i < info.size(); i++) { + PatchReferencePosition ref_pos; + ref_pos.xsize = info[i].first.xsize; + ref_pos.ysize = info[i].first.ysize; + ref_pos.x0 = ref_positions[i].first; + ref_pos.y0 = ref_positions[i].second; + ref_pos.ref = kPatchFrameReferenceId; + for (size_t y = 0; y < ref_pos.ysize; y++) { + for (size_t x = 0; x < ref_pos.xsize; x++) { + for (size_t c = 0; c < 3; c++) { + ref_rows[c][(y + ref_pos.y0) * ref_stride + x + ref_pos.x0] = + info[i].first.fpixels[c][y * ref_pos.xsize + x]; + } + } + } + for (const auto& pos : info[i].second) { + positions.emplace_back( + PatchPosition{pos.first, pos.second, pref_positions.size()}); + // Add blending for color channels, ignore other channels. + blendings.push_back({PatchBlendMode::kAdd, 0, false}); + for (size_t j = 0; j < num_ec; ++j) { + blendings.push_back({PatchBlendMode::kNone, 0, false}); + } + } + pref_positions.emplace_back(std::move(ref_pos)); + } + + CompressParams cparams = state->cparams; + // Recursive application of patches could create very weird issues. + cparams.patches = Override::kOff; + + RoundtripPatchFrame(&reference_frame, state, kPatchFrameReferenceId, cparams, + cms, pool, aux_out, /*subtract=*/true); + + // TODO(veluca): this assumes that applying patches is commutative, which is + // not true for all blending modes. This code only produces kAdd patches, so + // this works out. + PatchDictionaryEncoder::SetPositions( + &state->shared.image_features.patches, std::move(positions), + std::move(pref_positions), std::move(blendings)); +} + +void RoundtripPatchFrame(Image3F* reference_frame, + PassesEncoderState* JXL_RESTRICT state, int idx, + CompressParams& cparams, const JxlCmsInterface& cms, + ThreadPool* pool, AuxOut* aux_out, bool subtract) { + FrameInfo patch_frame_info; + cparams.resampling = 1; + cparams.ec_resampling = 1; + cparams.dots = Override::kOff; + cparams.noise = Override::kOff; + cparams.modular_mode = true; + cparams.responsive = 0; + cparams.progressive_dc = 0; + cparams.progressive_mode = false; + cparams.qprogressive_mode = false; + // Use gradient predictor and not Predictor::Best. + cparams.options.predictor = Predictor::Gradient; + patch_frame_info.save_as_reference = idx; // always saved. + patch_frame_info.frame_type = FrameType::kReferenceOnly; + patch_frame_info.save_before_color_transform = true; + ImageBundle ib(&state->shared.metadata->m); + // TODO(veluca): metadata.color_encoding is a lie: ib is in XYB, but there is + // no simple way to express that yet. + patch_frame_info.ib_needs_color_transform = false; + ib.SetFromImage(std::move(*reference_frame), + state->shared.metadata->m.color_encoding); + if (!ib.metadata()->extra_channel_info.empty()) { + // Add dummy extra channels to the patch image: patch encoding does not yet + // support extra channels, but the codec expects that the amount of extra + // channels in frames matches that in the metadata of the codestream. + std::vector extra_channels; + extra_channels.reserve(ib.metadata()->extra_channel_info.size()); + for (size_t i = 0; i < ib.metadata()->extra_channel_info.size(); i++) { + extra_channels.emplace_back(ib.xsize(), ib.ysize()); + // Must initialize the image with data to not affect blending with + // uninitialized memory. + // TODO(lode): patches must copy and use the real extra channels instead. + ZeroFillImage(&extra_channels.back()); + } + ib.SetExtraChannels(std::move(extra_channels)); + } + PassesEncoderState roundtrip_state; + auto special_frame = std::unique_ptr(new BitWriter()); + AuxOut patch_aux_out; + JXL_CHECK(EncodeFrame(cparams, patch_frame_info, state->shared.metadata, ib, + &roundtrip_state, cms, pool, special_frame.get(), + aux_out ? &patch_aux_out : nullptr)); + if (aux_out) { + for (const auto& l : patch_aux_out.layers) { + aux_out->layers[kLayerDictionary].Assimilate(l); + } + } + const Span encoded = special_frame->GetSpan(); + state->special_frames.emplace_back(std::move(special_frame)); + if (subtract) { + ImageBundle decoded(&state->shared.metadata->m); + PassesDecoderState dec_state; + JXL_CHECK(dec_state.output_encoding_info.SetFromMetadata( + *state->shared.metadata)); + const uint8_t* frame_start = encoded.data(); + size_t encoded_size = encoded.size(); + JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size, &decoded, + *state->shared.metadata)); + frame_start += decoded.decoded_bytes(); + encoded_size -= decoded.decoded_bytes(); + size_t ref_xsize = + dec_state.shared_storage.reference_frames[idx].frame.color()->xsize(); + // if the frame itself uses patches, we need to decode another frame + if (!ref_xsize) { + JXL_CHECK(DecodeFrame(&dec_state, pool, frame_start, encoded_size, + &decoded, *state->shared.metadata)); + } + JXL_CHECK(encoded_size == 0); + state->shared.reference_frames[idx] = + std::move(dec_state.shared_storage.reference_frames[idx]); + } else { + state->shared.reference_frames[idx].frame = std::move(ib); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.h b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.h new file mode 100644 index 0000000000..f30881b232 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_patch_dictionary.h @@ -0,0 +1,109 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_PATCH_DICTIONARY_H_ +#define LIB_JXL_ENC_PATCH_DICTIONARY_H_ + +// Chooses reference patches, and avoids encoding them once per occurrence. + +#include +#include +#include + +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" +#include "lib/jxl/opsin_params.h" + +namespace jxl { + +struct AuxOut; + +constexpr size_t kMaxPatchSize = 32; + +struct QuantizedPatch { + size_t xsize; + size_t ysize; + QuantizedPatch() { + for (size_t i = 0; i < 3; i++) { + pixels[i].resize(kMaxPatchSize * kMaxPatchSize); + fpixels[i].resize(kMaxPatchSize * kMaxPatchSize); + } + } + std::vector pixels[3] = {}; + // Not compared. Used only to retrieve original pixels to construct the + // reference image. + std::vector fpixels[3] = {}; + bool operator==(const QuantizedPatch& other) const { + if (xsize != other.xsize) return false; + if (ysize != other.ysize) return false; + for (size_t c = 0; c < 3; c++) { + if (memcmp(pixels[c].data(), other.pixels[c].data(), + sizeof(int8_t) * xsize * ysize) != 0) + return false; + } + return true; + } + + bool operator<(const QuantizedPatch& other) const { + if (xsize != other.xsize) return xsize < other.xsize; + if (ysize != other.ysize) return ysize < other.ysize; + for (size_t c = 0; c < 3; c++) { + int cmp = memcmp(pixels[c].data(), other.pixels[c].data(), + sizeof(int8_t) * xsize * ysize); + if (cmp > 0) return false; + if (cmp < 0) return true; + } + return false; + } +}; + +// Pair (patch, vector of occurrences). +using PatchInfo = + std::pair>>; + +// Friend class of PatchDictionary. +class PatchDictionaryEncoder { + public: + // Only call if HasAny(). + static void Encode(const PatchDictionary& pdic, BitWriter* writer, + size_t layer, AuxOut* aux_out); + + static void SetPositions(PatchDictionary* pdic, + std::vector positions, + std::vector ref_positions, + std::vector blendings) { + pdic->positions_ = std::move(positions); + pdic->ref_positions_ = std::move(ref_positions); + pdic->blendings_ = std::move(blendings); + pdic->ComputePatchTree(); + } + + static void SubtractFrom(const PatchDictionary& pdic, Image3F* opsin); +}; + +void FindBestPatchDictionary(const Image3F& opsin, + PassesEncoderState* JXL_RESTRICT state, + const JxlCmsInterface& cms, ThreadPool* pool, + AuxOut* aux_out, bool is_xyb = true); + +void RoundtripPatchFrame(Image3F* reference_frame, + PassesEncoderState* JXL_RESTRICT state, int idx, + CompressParams& cparams, const JxlCmsInterface& cms, + ThreadPool* pool, AuxOut* aux_out, bool subtract); + +} // namespace jxl + +#endif // LIB_JXL_ENC_PATCH_DICTIONARY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.cc b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.cc new file mode 100644 index 0000000000..3786ef5cf5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.cc @@ -0,0 +1,89 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_photon_noise.h" + +namespace jxl { + +namespace { + +// Assumes a daylight-like spectrum. +// https://www.strollswithmydog.com/effective-quantum-efficiency-of-sensor/#:~:text=11%2C260%20photons/um%5E2/lx-s +constexpr float kPhotonsPerLxSPerUm2 = 11260; + +// Order of magnitude for cameras in the 2010-2020 decade, taking the CFA into +// account. +constexpr float kEffectiveQuantumEfficiency = 0.20; + +// TODO(sboukortt): reevaluate whether these are good defaults, notably whether +// it would be worth making read noise higher at lower ISO settings. +constexpr float kPhotoResponseNonUniformity = 0.005; +constexpr float kInputReferredReadNoise = 3; + +// Assumes a 35mm sensor. +constexpr float kSensorAreaUm2 = 36000.f * 24000; + +template +inline constexpr T Square(const T x) { + return x * x; +} +template +inline constexpr T Cube(const T x) { + return x * x * x; +} + +} // namespace + +NoiseParams SimulatePhotonNoise(const size_t xsize, const size_t ysize, + const float iso) { + const float kOpsinAbsorbanceBiasCbrt = std::cbrt(kOpsinAbsorbanceBias[1]); + + // Focal plane exposure for 18% of kDefaultIntensityTarget, in lx·s. + // (ISO = 10 lx·s ÷ H) + const float h_18 = 10 / iso; + + const float pixel_area_um2 = kSensorAreaUm2 / (xsize * ysize); + + const float electrons_per_pixel_18 = kEffectiveQuantumEfficiency * + kPhotonsPerLxSPerUm2 * h_18 * + pixel_area_um2; + + NoiseParams params; + + for (size_t i = 0; i < NoiseParams::kNumNoisePoints; ++i) { + const float scaled_index = i / (NoiseParams::kNumNoisePoints - 2.f); + // scaled_index is used for XYB = (0, 2·scaled_index, 2·scaled_index) + const float y = 2 * scaled_index; + // 1 = default intensity target + const float linear = std::max( + 0.f, Cube(y - kOpsinAbsorbanceBiasCbrt) + kOpsinAbsorbanceBias[1]); + const float electrons_per_pixel = electrons_per_pixel_18 * (linear / 0.18f); + // Quadrature sum of read noise, photon shot noise (sqrt(S) so simply not + // squared here) and photo response non-uniformity. + // https://doi.org/10.1117/3.725073 + // Units are electrons rms. + const float noise = + std::sqrt(Square(kInputReferredReadNoise) + electrons_per_pixel + + Square(kPhotoResponseNonUniformity * electrons_per_pixel)); + const float linear_noise = noise * (0.18f / electrons_per_pixel_18); + const float opsin_derivative = + (1.f / 3) / Square(std::cbrt(linear - kOpsinAbsorbanceBias[1])); + const float opsin_noise = linear_noise * opsin_derivative; + + // TODO(sboukortt): verify more thoroughly whether the denominator is + // correct. + params.lut[i] = + Clamp1(opsin_noise / + (0.22f // norm_const + * std::sqrt(2.f) // red_noise + green_noise + * 1.13f // standard deviation of a plane of generated noise + ), + 0.f, 1.f); + } + + return params; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.h b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.h new file mode 100644 index 0000000000..f43e14d560 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_PHOTON_NOISE_H_ +#define LIB_JXL_ENC_PHOTON_NOISE_H_ + +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/image.h" +#include "lib/jxl/noise.h" + +namespace jxl { + +// Constructs a NoiseParams representing the noise that would be seen at the +// selected nominal exposure on a last-decade (as of 2021) color camera with a +// 36×24mm sensor (“35mm format”). +NoiseParams SimulatePhotonNoise(size_t xsize, size_t ysize, float iso); + +} // namespace jxl + +#endif // LIB_JXL_ENC_PHOTON_NOISE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise_test.cc b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise_test.cc new file mode 100644 index 0000000000..be11b465ad --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_photon_noise_test.cc @@ -0,0 +1,51 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_photon_noise.h" + +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +using ::testing::FloatNear; +using ::testing::Pointwise; + +MATCHER(AreApproximatelyEqual, "") { + constexpr float kTolerance = 1e-6; + const float actual = std::get<0>(arg); + const float expected = std::get<1>(arg); + return testing::ExplainMatchResult(FloatNear(expected, kTolerance), actual, + result_listener); +} + +TEST(EncPhotonNoiseTest, LUTs) { + EXPECT_THAT( + SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/100).lut, + Pointwise(AreApproximatelyEqual(), + {0.00259652, 0.0139648, 0.00681551, 0.00632582, 0.00694917, + 0.00803922, 0.00934574, 0.0107607})); + EXPECT_THAT( + SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/800).lut, + Pointwise(AreApproximatelyEqual(), + {0.02077220, 0.0420923, 0.01820690, 0.01439020, 0.01293670, + 0.01254030, 0.01277390, 0.0134161})); + EXPECT_THAT( + SimulatePhotonNoise(/*xsize=*/6000, /*ysize=*/4000, /*iso=*/6400).lut, + Pointwise(AreApproximatelyEqual(), + {0.1661770, 0.1691120, 0.05309080, 0.03963960, 0.03357410, + 0.03001650, 0.02776740, 0.0263478})); + + // Lower when measured on a per-pixel basis as there are fewer of them. + EXPECT_THAT( + SimulatePhotonNoise(/*xsize=*/4000, /*ysize=*/3000, /*iso=*/6400).lut, + Pointwise(AreApproximatelyEqual(), + {0.0830886, 0.1008720, 0.0367748, 0.0280305, 0.0240236, + 0.0218040, 0.0205771, 0.0200058})); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.cc b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.cc new file mode 100644 index 0000000000..b65319f3fd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.cc @@ -0,0 +1,82 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_progressive_split.h" + +#include + +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" + +namespace jxl { + +template +void ProgressiveSplitter::SplitACCoefficients( + const T* JXL_RESTRICT block, const AcStrategy& acs, size_t bx, size_t by, + T* JXL_RESTRICT output[kMaxNumPasses]) { + size_t size = acs.covered_blocks_x() * acs.covered_blocks_y() * kDCTBlockSize; + auto shift_right_round0 = [&](T v, int shift) { + T one_if_negative = static_cast(v) >> 31; + T add = (one_if_negative << shift) - one_if_negative; + return (v + add) >> shift; + }; + // Early quit for the simple case of only one pass. + if (mode_.num_passes == 1) { + memcpy(output[0], block, sizeof(T) * size); + return; + } + size_t ncoeffs_all_done_from_earlier_passes = 1; + + int previous_pass_shift = 0; + for (size_t num_pass = 0; num_pass < mode_.num_passes; num_pass++) { // pass + // Zero out output block. + memset(output[num_pass], 0, size * sizeof(T)); + const int pass_shift = mode_.passes[num_pass].shift; + size_t frame_ncoeffs = mode_.passes[num_pass].num_coefficients; + size_t xsize = acs.covered_blocks_x(); + size_t ysize = acs.covered_blocks_y(); + CoefficientLayout(&ysize, &xsize); + for (size_t y = 0; y < ysize * frame_ncoeffs; y++) { // superblk-y + for (size_t x = 0; x < xsize * frame_ncoeffs; x++) { // superblk-x + size_t pos = y * xsize * kBlockDim + x; + if (x < xsize * ncoeffs_all_done_from_earlier_passes && + y < ysize * ncoeffs_all_done_from_earlier_passes) { + // This coefficient was already included in an earlier pass, + // which included a genuinely smaller set of coefficients. + continue; + } + T v = block[pos]; + // Previous pass discarded some bits: do not encode them again. + if (previous_pass_shift != 0) { + T previous_v = shift_right_round0(v, previous_pass_shift) * + (1 << previous_pass_shift); + v -= previous_v; + } + output[num_pass][pos] = shift_right_round0(v, pass_shift); + } // superblk-x + } // superblk-y + // We just finished a pass. + // Hence, we are now guaranteed to have included all coeffs up to + // frame_ncoeffs in every block, unless the current pass is shifted. + if (mode_.passes[num_pass].shift == 0) { + ncoeffs_all_done_from_earlier_passes = frame_ncoeffs; + } + previous_pass_shift = mode_.passes[num_pass].shift; + } // num_pass +} + +template void ProgressiveSplitter::SplitACCoefficients( + const int32_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t, + int32_t* JXL_RESTRICT[kMaxNumPasses]); + +template void ProgressiveSplitter::SplitACCoefficients( + const int16_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t, + int16_t* JXL_RESTRICT[kMaxNumPasses]); + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.h b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.h new file mode 100644 index 0000000000..ef25944bb7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_progressive_split.h @@ -0,0 +1,131 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_PROGRESSIVE_SPLIT_H_ +#define LIB_JXL_PROGRESSIVE_SPLIT_H_ + +#include +#include + +#include +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/splines.h" + +// Functions to split DCT coefficients in multiple passes. All the passes of a +// single frame are added together. + +namespace jxl { + +constexpr size_t kNoDownsamplingFactor = std::numeric_limits::max(); + +struct PassDefinition { + // Side of the square of the coefficients that should be kept in each 8x8 + // block. Must be greater than 1, and at most 8. Should be in non-decreasing + // order. + size_t num_coefficients; + + // How much to shift the encoded values by, with rounding. + size_t shift; + + // If specified, this indicates that if the requested downsampling factor is + // sufficiently high, then it is fine to stop decoding after this pass. + // By default, passes are not marked as being suitable for any downsampling. + size_t suitable_for_downsampling_of_at_least; +}; + +struct ProgressiveMode { + size_t num_passes = 1; + PassDefinition passes[kMaxNumPasses] = { + PassDefinition{/*num_coefficients=*/8, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/1}}; + + ProgressiveMode() = default; + + template + explicit ProgressiveMode(const PassDefinition (&p)[nump]) { + JXL_ASSERT(nump <= kMaxNumPasses); + num_passes = nump; + PassDefinition previous_pass{ + /*num_coefficients=*/1, /*shift=*/0, + /*suitable_for_downsampling_of_at_least=*/kNoDownsamplingFactor}; + size_t last_downsampling_factor = kNoDownsamplingFactor; + for (size_t i = 0; i < nump; i++) { + JXL_ASSERT(p[i].num_coefficients > previous_pass.num_coefficients || + (p[i].num_coefficients == previous_pass.num_coefficients && + p[i].shift < previous_pass.shift)); + JXL_ASSERT(p[i].suitable_for_downsampling_of_at_least == + kNoDownsamplingFactor || + p[i].suitable_for_downsampling_of_at_least <= + last_downsampling_factor); + // Only used inside assert. + (void)last_downsampling_factor; + if (p[i].suitable_for_downsampling_of_at_least != kNoDownsamplingFactor) { + last_downsampling_factor = p[i].suitable_for_downsampling_of_at_least; + } + previous_pass = passes[i] = p[i]; + } + } +}; + +class ProgressiveSplitter { + public: + void SetProgressiveMode(ProgressiveMode mode) { mode_ = mode; } + + size_t GetNumPasses() const { return mode_.num_passes; } + + void InitPasses(Passes* JXL_RESTRICT passes) const { + passes->num_passes = static_cast(GetNumPasses()); + passes->num_downsample = 0; + JXL_ASSERT(passes->num_passes != 0); + passes->shift[passes->num_passes - 1] = 0; + if (passes->num_passes == 1) return; // Done, arrays are empty + + for (uint32_t i = 0; i < mode_.num_passes - 1; ++i) { + const size_t min_downsampling_factor = + mode_.passes[i].suitable_for_downsampling_of_at_least; + passes->shift[i] = mode_.passes[i].shift; + if (1 < min_downsampling_factor && + min_downsampling_factor != kNoDownsamplingFactor) { + passes->downsample[passes->num_downsample] = min_downsampling_factor; + passes->last_pass[passes->num_downsample] = i; + if (mode_.passes[i + 1].suitable_for_downsampling_of_at_least < + min_downsampling_factor) { + passes->num_downsample += 1; + } + } + } + } + + template + void SplitACCoefficients(const T* JXL_RESTRICT block, const AcStrategy& acs, + size_t bx, size_t by, + T* JXL_RESTRICT output[kMaxNumPasses]); + + private: + ProgressiveMode mode_; +}; + +extern template void ProgressiveSplitter::SplitACCoefficients( + const int32_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t, + int32_t* JXL_RESTRICT[kMaxNumPasses]); + +extern template void ProgressiveSplitter::SplitACCoefficients( + const int16_t* JXL_RESTRICT, const AcStrategy&, size_t, size_t, + int16_t* JXL_RESTRICT[kMaxNumPasses]); + +} // namespace jxl + +#endif // LIB_JXL_PROGRESSIVE_SPLIT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.cc b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.cc new file mode 100644 index 0000000000..848310e75d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.cc @@ -0,0 +1,214 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_quant_weights.h" + +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_modular.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +struct AuxOut; + +namespace { + +Status EncodeDctParams(const DctQuantWeightParams& params, BitWriter* writer) { + JXL_ASSERT(params.num_distance_bands >= 1); + writer->Write(DctQuantWeightParams::kLog2MaxDistanceBands, + params.num_distance_bands - 1); + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < params.num_distance_bands; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Write( + params.distance_bands[c][i] * (i == 0 ? (1 / 64.0f) : 1.0f), writer)); + } + } + return true; +} + +Status EncodeQuant(const QuantEncoding& encoding, size_t idx, size_t size_x, + size_t size_y, BitWriter* writer, + ModularFrameEncoder* modular_frame_encoder) { + writer->Write(kLog2NumQuantModes, encoding.mode); + size_x *= kBlockDim; + size_y *= kBlockDim; + switch (encoding.mode) { + case QuantEncoding::kQuantModeLibrary: { + writer->Write(kCeilLog2NumPredefinedTables, encoding.predefined); + break; + } + case QuantEncoding::kQuantModeID: { + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 3; i++) { + JXL_RETURN_IF_ERROR( + F16Coder::Write(encoding.idweights[c][i] * (1.0f / 64), writer)); + } + } + break; + } + case QuantEncoding::kQuantModeDCT2: { + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 6; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Write( + encoding.dct2weights[c][i] * (1.0f / 64), writer)); + } + } + break; + } + case QuantEncoding::kQuantModeDCT4X8: { + for (size_t c = 0; c < 3; c++) { + JXL_RETURN_IF_ERROR( + F16Coder::Write(encoding.dct4x8multipliers[c], writer)); + } + JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer)); + break; + } + case QuantEncoding::kQuantModeDCT4: { + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 2; i++) { + JXL_RETURN_IF_ERROR( + F16Coder::Write(encoding.dct4multipliers[c][i], writer)); + } + } + JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer)); + break; + } + case QuantEncoding::kQuantModeDCT: { + JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer)); + break; + } + case QuantEncoding::kQuantModeRAW: { + ModularFrameEncoder::EncodeQuantTable(size_x, size_y, writer, encoding, + idx, modular_frame_encoder); + break; + } + case QuantEncoding::kQuantModeAFV: { + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 9; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Write( + encoding.afv_weights[c][i] * (i < 6 ? 1.0f / 64 : 1.0f), writer)); + } + } + JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params, writer)); + JXL_RETURN_IF_ERROR(EncodeDctParams(encoding.dct_params_afv_4x4, writer)); + break; + } + } + return true; +} + +} // namespace + +Status DequantMatricesEncode(const DequantMatrices* matrices, BitWriter* writer, + size_t layer, AuxOut* aux_out, + ModularFrameEncoder* modular_frame_encoder) { + bool all_default = true; + const std::vector& encodings = matrices->encodings(); + + for (size_t i = 0; i < encodings.size(); i++) { + if (encodings[i].mode != QuantEncoding::kQuantModeLibrary || + encodings[i].predefined != 0) { + all_default = false; + } + } + // TODO(janwas): better bound + BitWriter::Allotment allotment(writer, 512 * 1024); + writer->Write(1, all_default); + if (!all_default) { + for (size_t i = 0; i < encodings.size(); i++) { + JXL_RETURN_IF_ERROR(EncodeQuant( + encodings[i], i, DequantMatrices::required_size_x[i], + DequantMatrices::required_size_y[i], writer, modular_frame_encoder)); + } + } + allotment.ReclaimAndCharge(writer, layer, aux_out); + return true; +} + +Status DequantMatricesEncodeDC(const DequantMatrices* matrices, + BitWriter* writer, size_t layer, + AuxOut* aux_out) { + bool all_default = true; + const float* dc_quant = matrices->DCQuants(); + for (size_t c = 0; c < 3; c++) { + if (dc_quant[c] != kDCQuant[c]) { + all_default = false; + } + } + BitWriter::Allotment allotment(writer, 1 + sizeof(float) * kBitsPerByte * 3); + writer->Write(1, all_default); + if (!all_default) { + for (size_t c = 0; c < 3; c++) { + JXL_RETURN_IF_ERROR(F16Coder::Write(dc_quant[c] * 128.0f, writer)); + } + } + allotment.ReclaimAndCharge(writer, layer, aux_out); + return true; +} + +void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc) { + matrices->SetDCQuant(dc); + // Roundtrip encode/decode DC to ensure same values as decoder. + BitWriter writer; + JXL_CHECK(DequantMatricesEncodeDC(matrices, &writer, 0, nullptr)); + writer.ZeroPadToByte(); + BitReader br(writer.GetSpan()); + // Called only in the encoder: should fail only for programmer errors. + JXL_CHECK(matrices->DecodeDC(&br)); + JXL_CHECK(br.Close()); +} + +void DequantMatricesScaleDC(DequantMatrices* matrices, const float scale) { + float dc[3]; + for (size_t c = 0; c < 3; ++c) { + dc[c] = matrices->InvDCQuant(c) * (1.0f / scale); + } + DequantMatricesSetCustomDC(matrices, dc); +} + +void DequantMatricesRoundtrip(DequantMatrices* matrices) { + // Do not pass modular en/decoder, as they only change entropy and not + // values. + BitWriter writer; + JXL_CHECK(DequantMatricesEncode(matrices, &writer, 0, nullptr)); + writer.ZeroPadToByte(); + BitReader br(writer.GetSpan()); + // Called only in the encoder: should fail only for programmer errors. + JXL_CHECK(matrices->Decode(&br)); + JXL_CHECK(br.Close()); +} + +void DequantMatricesSetCustom(DequantMatrices* matrices, + const std::vector& encodings, + ModularFrameEncoder* encoder) { + JXL_ASSERT(encodings.size() == DequantMatrices::kNum); + matrices->SetEncodings(encodings); + for (size_t i = 0; i < encodings.size(); i++) { + if (encodings[i].mode == QuantEncodingInternal::kQuantModeRAW) { + encoder->AddQuantTable(DequantMatrices::required_size_x[i] * kBlockDim, + DequantMatrices::required_size_y[i] * kBlockDim, + encodings[i], i); + } + } + DequantMatricesRoundtrip(matrices); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.h b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.h new file mode 100644 index 0000000000..e0a387fed5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_quant_weights.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_QUANT_WEIGHTS_H_ +#define LIB_JXL_ENC_QUANT_WEIGHTS_H_ + +#include "lib/jxl/quant_weights.h" + +namespace jxl { + +struct AuxOut; +struct BitWriter; + +Status DequantMatricesEncode( + const DequantMatrices* matrices, BitWriter* writer, size_t layer, + AuxOut* aux_out, ModularFrameEncoder* modular_frame_encoder = nullptr); +Status DequantMatricesEncodeDC(const DequantMatrices* matrices, + BitWriter* writer, size_t layer, + AuxOut* aux_out); +// For consistency with QuantEncoding, higher values correspond to more +// precision. +void DequantMatricesSetCustomDC(DequantMatrices* matrices, const float* dc); + +void DequantMatricesScaleDC(DequantMatrices* matrices, float scale); + +void DequantMatricesSetCustom(DequantMatrices* matrices, + const std::vector& encodings, + ModularFrameEncoder* encoder); + +// Roundtrip encode/decode the matrices to ensure same values as decoder. +void DequantMatricesRoundtrip(DequantMatrices* matrices); + +} // namespace jxl + +#endif // LIB_JXL_ENC_QUANT_WEIGHTS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_splines.cc b/third-party/libjxl/libjxl/lib/jxl/enc_splines.cc new file mode 100644 index 0000000000..ddcd78a748 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_splines.cc @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/splines.h" + +namespace jxl { + +struct AuxOut; + +class QuantizedSplineEncoder { + public: + // Only call if HasAny(). + static void Tokenize(const QuantizedSpline& spline, + std::vector* const tokens) { + tokens->emplace_back(kNumControlPointsContext, + spline.control_points_.size()); + for (const auto& point : spline.control_points_) { + tokens->emplace_back(kControlPointsContext, PackSigned(point.first)); + tokens->emplace_back(kControlPointsContext, PackSigned(point.second)); + } + const auto encode_dct = [tokens](const int dct[32]) { + for (int i = 0; i < 32; ++i) { + tokens->emplace_back(kDCTContext, PackSigned(dct[i])); + } + }; + for (int c = 0; c < 3; ++c) { + encode_dct(spline.color_dct_[c]); + } + encode_dct(spline.sigma_dct_); + } +}; + +namespace { + +void EncodeAllStartingPoints(const std::vector& points, + std::vector* tokens) { + int64_t last_x = 0; + int64_t last_y = 0; + for (size_t i = 0; i < points.size(); i++) { + const int64_t x = lroundf(points[i].x); + const int64_t y = lroundf(points[i].y); + if (i == 0) { + tokens->emplace_back(kStartingPositionContext, x); + tokens->emplace_back(kStartingPositionContext, y); + } else { + tokens->emplace_back(kStartingPositionContext, PackSigned(x - last_x)); + tokens->emplace_back(kStartingPositionContext, PackSigned(y - last_y)); + } + last_x = x; + last_y = y; + } +} + +} // namespace + +void EncodeSplines(const Splines& splines, BitWriter* writer, + const size_t layer, const HistogramParams& histogram_params, + AuxOut* aux_out) { + JXL_ASSERT(splines.HasAny()); + + const std::vector& quantized_splines = + splines.QuantizedSplines(); + std::vector> tokens(1); + tokens[0].emplace_back(kNumSplinesContext, quantized_splines.size() - 1); + EncodeAllStartingPoints(splines.StartingPoints(), &tokens[0]); + + tokens[0].emplace_back(kQuantizationAdjustmentContext, + PackSigned(splines.GetQuantizationAdjustment())); + + for (const QuantizedSpline& spline : quantized_splines) { + QuantizedSplineEncoder::Tokenize(spline, &tokens[0]); + } + + EntropyEncodingData codes; + std::vector context_map; + BuildAndEncodeHistograms(histogram_params, kNumSplineContexts, tokens, &codes, + &context_map, writer, layer, aux_out); + WriteTokens(tokens[0], codes, context_map, writer, layer, aux_out); +} + +Splines FindSplines(const Image3F& opsin) { + // TODO: implement spline detection. + return {}; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_splines.h b/third-party/libjxl/libjxl/lib/jxl/enc_splines.h new file mode 100644 index 0000000000..be700dba75 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_splines.h @@ -0,0 +1,38 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_SPLINES_H_ +#define LIB_JXL_ENC_SPLINES_H_ + +#include +#include + +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/image.h" +#include "lib/jxl/splines.h" + +namespace jxl { + +struct AuxOut; + +// Only call if splines.HasAny(). +void EncodeSplines(const Splines& splines, BitWriter* writer, size_t layer, + const HistogramParams& histogram_params, AuxOut* aux_out); + +Splines FindSplines(const Image3F& opsin); + +} // namespace jxl + +#endif // LIB_JXL_ENC_SPLINES_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_toc.cc b/third-party/libjxl/libjxl/lib/jxl/enc_toc.cc new file mode 100644 index 0000000000..dc75fdd9ba --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_toc.cc @@ -0,0 +1,45 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_toc.h" + +#include + +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_coeff_order.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/toc.h" + +namespace jxl { +Status WriteGroupOffsets(const std::vector& group_codes, + const std::vector* permutation, + BitWriter* JXL_RESTRICT writer, AuxOut* aux_out) { + BitWriter::Allotment allotment(writer, MaxBits(group_codes.size())); + if (permutation && !group_codes.empty()) { + // Don't write a permutation at all for an empty group_codes. + writer->Write(1, 1); // permutation + JXL_DASSERT(permutation->size() == group_codes.size()); + EncodePermutation(permutation->data(), /*skip=*/0, permutation->size(), + writer, /* layer= */ 0, aux_out); + + } else { + writer->Write(1, 0); // no permutation + } + writer->ZeroPadToByte(); // before TOC entries + + for (size_t i = 0; i < group_codes.size(); i++) { + JXL_ASSERT(group_codes[i].BitsWritten() % kBitsPerByte == 0); + const size_t group_size = group_codes[i].BitsWritten() / kBitsPerByte; + JXL_RETURN_IF_ERROR(U32Coder::Write(kTocDist, group_size, writer)); + } + writer->ZeroPadToByte(); // before first group + allotment.ReclaimAndCharge(writer, kLayerTOC, aux_out); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_toc.h b/third-party/libjxl/libjxl/lib/jxl/enc_toc.h new file mode 100644 index 0000000000..242b3efccb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_toc.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_TOC_H_ +#define LIB_JXL_ENC_TOC_H_ + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/enc_bit_writer.h" + +namespace jxl { + +struct AuxOut; + +// Writes the group offsets. If the permutation vector is nullptr, the identity +// permutation will be used. +Status WriteGroupOffsets(const std::vector& group_codes, + const std::vector* permutation, + BitWriter* JXL_RESTRICT writer, AuxOut* aux_out); + +} // namespace jxl + +#endif // LIB_JXL_ENC_TOC_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_transforms-inl.h b/third-party/libjxl/libjxl/lib/jxl/enc_transforms-inl.h new file mode 100644 index 0000000000..7459a16305 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_transforms-inl.h @@ -0,0 +1,800 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_ +#undef LIB_JXL_ENC_TRANSFORMS_INL_H_ +#else +#define LIB_JXL_ENC_TRANSFORMS_INL_H_ +#endif + +#include + +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dct-inl.h" +#include "lib/jxl/dct_scales.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// Inverse of ReinterpretingDCT. +template +HWY_INLINE void ReinterpretingIDCT(const float* input, + const size_t input_stride, float* output, + const size_t output_stride) { + HWY_ALIGN float block[ROWS * COLS] = {}; + if (ROWS < COLS) { + for (size_t y = 0; y < LF_ROWS; y++) { + for (size_t x = 0; x < LF_COLS; x++) { + block[y * COLS + x] = input[y * input_stride + x] * + DCTTotalResampleScale(y) * + DCTTotalResampleScale(x); + } + } + } else { + for (size_t y = 0; y < LF_COLS; y++) { + for (size_t x = 0; x < LF_ROWS; x++) { + block[y * ROWS + x] = input[y * input_stride + x] * + DCTTotalResampleScale(y) * + DCTTotalResampleScale(x); + } + } + } + + // ROWS, COLS <= 8, so we can put scratch space on the stack. + HWY_ALIGN float scratch_space[ROWS * COLS]; + ComputeScaledIDCT()(block, DCTTo(output, output_stride), + scratch_space); +} + +template +void DCT2TopBlock(const float* block, size_t stride, float* out) { + static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim"); + static_assert(S % 2 == 0, "S should be even"); + float temp[kDCTBlockSize]; + constexpr size_t num_2x2 = S / 2; + for (size_t y = 0; y < num_2x2; y++) { + for (size_t x = 0; x < num_2x2; x++) { + float c00 = block[y * 2 * stride + x * 2]; + float c01 = block[y * 2 * stride + x * 2 + 1]; + float c10 = block[(y * 2 + 1) * stride + x * 2]; + float c11 = block[(y * 2 + 1) * stride + x * 2 + 1]; + float r00 = c00 + c01 + c10 + c11; + float r01 = c00 + c01 - c10 - c11; + float r10 = c00 - c01 + c10 - c11; + float r11 = c00 - c01 - c10 + c11; + r00 *= 0.25f; + r01 *= 0.25f; + r10 *= 0.25f; + r11 *= 0.25f; + temp[y * kBlockDim + x] = r00; + temp[y * kBlockDim + num_2x2 + x] = r01; + temp[(y + num_2x2) * kBlockDim + x] = r10; + temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11; + } + } + for (size_t y = 0; y < S; y++) { + for (size_t x = 0; x < S; x++) { + out[y * kBlockDim + x] = temp[y * kBlockDim + x]; + } + } +} + +void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { + HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = { + { + 0.2500000000000000, + 0.8769029297991420f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + -0.4105377591765233f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + }, + { + 0.2500000000000000, + 0.2206518106944235f, + 0.0000000000000000, + 0.0000000000000000, + -0.7071067811865474f, + 0.6235485373547691f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + }, + { + 0.2500000000000000, + -0.1014005039375376f, + 0.4067007583026075f, + -0.2125574805828875f, + 0.0000000000000000, + -0.0643507165794627f, + -0.4517556589999482f, + -0.3046847507248690f, + 0.3017929516615495f, + 0.4082482904638627f, + 0.1747866975480809f, + -0.2110560104933578f, + -0.1426608480880726f, + -0.1381354035075859f, + -0.1743760259965107f, + 0.1135498731499434f, + }, + { + 0.2500000000000000, + -0.1014005039375375f, + 0.4444481661973445f, + 0.3085497062849767f, + 0.0000000000000000f, + -0.0643507165794627f, + 0.1585450355184006f, + 0.5112616136591823f, + 0.2579236279634118f, + 0.0000000000000000, + 0.0812611176717539f, + 0.1856718091610980f, + -0.3416446842253372f, + 0.3302282550303788f, + 0.0702790691196284f, + -0.0741750459581035f, + }, + { + 0.2500000000000000, + 0.2206518106944236f, + 0.0000000000000000, + 0.0000000000000000, + 0.7071067811865476f, + 0.6235485373547694f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + }, + { + 0.2500000000000000, + -0.1014005039375378f, + 0.0000000000000000, + 0.4706702258572536f, + 0.0000000000000000, + -0.0643507165794628f, + -0.0403851516082220f, + 0.0000000000000000, + 0.1627234014286620f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.7367497537172237f, + 0.0875511500058708f, + -0.2921026642334881f, + 0.1940289303259434f, + }, + { + 0.2500000000000000, + -0.1014005039375377f, + 0.1957439937204294f, + -0.1621205195722993f, + 0.0000000000000000, + -0.0643507165794628f, + 0.0074182263792424f, + -0.2904801297289980f, + 0.0952002265347504f, + 0.0000000000000000, + -0.3675398009862027f, + 0.4921585901373873f, + 0.2462710772207515f, + -0.0794670660590957f, + 0.3623817333531167f, + -0.4351904965232280f, + }, + { + 0.2500000000000000, + -0.1014005039375376f, + 0.2929100136981264f, + 0.0000000000000000, + 0.0000000000000000, + -0.0643507165794627f, + 0.3935103426921017f, + -0.0657870154914280f, + 0.0000000000000000, + -0.4082482904638628f, + -0.3078822139579090f, + -0.3852501370925192f, + -0.0857401903551931f, + -0.4613374887461511f, + 0.0000000000000000, + 0.2191868483885747f, + }, + { + 0.2500000000000000, + -0.1014005039375376f, + -0.4067007583026072f, + -0.2125574805828705f, + 0.0000000000000000, + -0.0643507165794627f, + -0.4517556589999464f, + 0.3046847507248840f, + 0.3017929516615503f, + -0.4082482904638635f, + -0.1747866975480813f, + 0.2110560104933581f, + -0.1426608480880734f, + -0.1381354035075829f, + -0.1743760259965108f, + 0.1135498731499426f, + }, + { + 0.2500000000000000, + -0.1014005039375377f, + -0.1957439937204287f, + -0.1621205195722833f, + 0.0000000000000000, + -0.0643507165794628f, + 0.0074182263792444f, + 0.2904801297290076f, + 0.0952002265347505f, + 0.0000000000000000, + 0.3675398009862011f, + -0.4921585901373891f, + 0.2462710772207514f, + -0.0794670660591026f, + 0.3623817333531165f, + -0.4351904965232251f, + }, + { + 0.2500000000000000, + -0.1014005039375375f, + 0.0000000000000000, + -0.4706702258572528f, + 0.0000000000000000, + -0.0643507165794627f, + 0.1107416575309343f, + 0.0000000000000000, + -0.1627234014286617f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + 0.1488339922711357f, + 0.4972464710953509f, + 0.2921026642334879f, + 0.5550443808910661f, + }, + { + 0.2500000000000000, + -0.1014005039375377f, + 0.1137907446044809f, + -0.1464291867126764f, + 0.0000000000000000, + -0.0643507165794628f, + 0.0829816309488205f, + -0.2388977352334460f, + -0.3531238544981630f, + -0.4082482904638630f, + 0.4826689115059883f, + 0.1741941265991622f, + -0.0476868035022925f, + 0.1253805944856366f, + -0.4326608024727445f, + -0.2546827712406646f, + }, + { + 0.2500000000000000, + -0.1014005039375377f, + -0.4444481661973438f, + 0.3085497062849487f, + 0.0000000000000000, + -0.0643507165794628f, + 0.1585450355183970f, + -0.5112616136592012f, + 0.2579236279634129f, + 0.0000000000000000, + -0.0812611176717504f, + -0.1856718091610990f, + -0.3416446842253373f, + 0.3302282550303805f, + 0.0702790691196282f, + -0.0741750459581023f, + }, + { + 0.2500000000000000, + -0.1014005039375376f, + -0.2929100136981264f, + 0.0000000000000000, + 0.0000000000000000, + -0.0643507165794627f, + 0.3935103426921022f, + 0.0657870154914254f, + 0.0000000000000000, + 0.4082482904638634f, + 0.3078822139579031f, + 0.3852501370925211f, + -0.0857401903551927f, + -0.4613374887461554f, + 0.0000000000000000, + 0.2191868483885728f, + }, + { + 0.2500000000000000, + -0.1014005039375376f, + -0.1137907446044814f, + -0.1464291867126654f, + 0.0000000000000000, + -0.0643507165794627f, + 0.0829816309488214f, + 0.2388977352334547f, + -0.3531238544981624f, + 0.4082482904638630f, + -0.4826689115059858f, + -0.1741941265991621f, + -0.0476868035022928f, + 0.1253805944856431f, + -0.4326608024727457f, + -0.2546827712406641f, + }, + { + 0.2500000000000000, + -0.1014005039375374f, + 0.0000000000000000, + 0.4251149611657548f, + 0.0000000000000000, + -0.0643507165794626f, + -0.4517556589999480f, + 0.0000000000000000, + -0.6035859033230976f, + 0.0000000000000000, + 0.0000000000000000, + 0.0000000000000000, + -0.1426608480880724f, + -0.1381354035075845f, + 0.3487520519930227f, + 0.1135498731499429f, + }, + }; + + const HWY_CAPPED(float, 16) d; + for (size_t i = 0; i < 16; i += Lanes(d)) { + auto scalar = Zero(d); + for (size_t j = 0; j < 16; j++) { + auto px = Set(d, pixels[j]); + auto basis = Load(d, k4x4AFVBasisTranspose[j] + i); + scalar = MulAdd(px, basis, scalar); + } + Store(scalar, d, coeffs + i); + } +} + +// Coefficient layout: +// - (even, even) positions hold AFV coefficients +// - (odd, even) positions hold DCT4x4 coefficients +// - (any, odd) positions hold DCT4x8 coefficients +template +void AFVTransformFromPixels(const float* JXL_RESTRICT pixels, + size_t pixels_stride, + float* JXL_RESTRICT coefficients) { + HWY_ALIGN float scratch_space[4 * 8 * 2]; + size_t afv_x = afv_kind & 1; + size_t afv_y = afv_kind / 2; + HWY_ALIGN float block[4 * 8]; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] = + pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x]; + } + } + // AFV coefficients in (even, even) positions. + HWY_ALIGN float coeff[4 * 4]; + AFVDCT4x4(block, coeff); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix]; + } + } + // 4x4 DCT of the block with same y and different x. + ComputeScaledDCT<4, 4>()( + DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4), + pixels_stride), + block, scratch_space); + // ... in (odd, even) positions. + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix]; + } + } + // 4x8 DCT of the other half of the block. + ComputeScaledDCT<4, 8>()( + DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride), + block, scratch_space); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix]; + } + } + float block00 = coefficients[0] * 0.25f; + float block01 = coefficients[1]; + float block10 = coefficients[8]; + coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f; + coefficients[1] = (block00 - block01) * 0.5f; + coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f; +} + +HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategy::Type strategy, + const float* JXL_RESTRICT pixels, + size_t pixels_stride, + float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT scratch_space) { + using Type = AcStrategy::Type; + switch (strategy) { + case Type::IDENTITY: { + for (size_t y = 0; y < 2; y++) { + for (size_t x = 0; x < 2; x++) { + float block_dc = 0; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix]; + } + } + block_dc *= 1.0f / 16; + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + if (ix == 1 && iy == 1) continue; + coefficients[(y + iy * 2) * 8 + x + ix * 2] = + pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] - + pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1]; + } + } + coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x]; + coefficients[y * 8 + x] = block_dc; + } + } + float block00 = coefficients[0]; + float block01 = coefficients[1]; + float block10 = coefficients[8]; + float block11 = coefficients[9]; + coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; + coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; + coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; + coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; + break; + } + case Type::DCT8X4: { + for (size_t x = 0; x < 2; x++) { + HWY_ALIGN float block[4 * 8]; + ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block, + scratch_space); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + // Store transposed. + coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix]; + } + } + } + float block0 = coefficients[0]; + float block1 = coefficients[8]; + coefficients[0] = (block0 + block1) * 0.5f; + coefficients[8] = (block0 - block1) * 0.5f; + break; + } + case Type::DCT4X8: { + for (size_t y = 0; y < 2; y++) { + HWY_ALIGN float block[4 * 8]; + ComputeScaledDCT<4, 8>()( + DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block, + scratch_space); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 8; ix++) { + coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix]; + } + } + } + float block0 = coefficients[0]; + float block1 = coefficients[8]; + coefficients[0] = (block0 + block1) * 0.5f; + coefficients[8] = (block0 - block1) * 0.5f; + break; + } + case Type::DCT4X4: { + for (size_t y = 0; y < 2; y++) { + for (size_t x = 0; x < 2; x++) { + HWY_ALIGN float block[4 * 4]; + ComputeScaledDCT<4, 4>()( + DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride), + block, scratch_space); + for (size_t iy = 0; iy < 4; iy++) { + for (size_t ix = 0; ix < 4; ix++) { + coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix]; + } + } + } + } + float block00 = coefficients[0]; + float block01 = coefficients[1]; + float block10 = coefficients[8]; + float block11 = coefficients[9]; + coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f; + coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f; + coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f; + coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f; + break; + } + case Type::DCT2X2: { + DCT2TopBlock<8>(pixels, pixels_stride, coefficients); + DCT2TopBlock<4>(coefficients, kBlockDim, coefficients); + DCT2TopBlock<2>(coefficients, kBlockDim, coefficients); + break; + } + case Type::DCT16X16: { + ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT16X8: { + ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT8X16: { + ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT32X8: { + ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT8X32: { + ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT32X16: { + ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT16X32: { + ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT32X32: { + ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT: { + ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::AFV0: { + AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients); + break; + } + case Type::AFV1: { + AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients); + break; + } + case Type::AFV2: { + AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients); + break; + } + case Type::AFV3: { + AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients); + break; + } + case Type::DCT64X64: { + ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT64X32: { + ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT32X64: { + ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT128X128: { + ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT128X64: { + ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT64X128: { + ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT256X256: { + ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT256X128: { + ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::DCT128X256: { + ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients, + scratch_space); + break; + } + case Type::kNumValidStrategies: + JXL_UNREACHABLE("Invalid strategy"); + } +} + +HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategy::Type strategy, + const float* block, float* dc, + size_t dc_stride) { + using Type = AcStrategy::Type; + switch (strategy) { + case Type::DCT16X8: { + ReinterpretingIDCT( + block, 2 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT8X16: { + ReinterpretingIDCT( + block, 2 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT16X16: { + ReinterpretingIDCT( + block, 2 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT32X8: { + ReinterpretingIDCT( + block, 4 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT8X32: { + ReinterpretingIDCT( + block, 4 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT32X16: { + ReinterpretingIDCT( + block, 4 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT16X32: { + ReinterpretingIDCT( + block, 4 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT32X32: { + ReinterpretingIDCT( + block, 4 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT64X32: { + ReinterpretingIDCT( + block, 8 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT32X64: { + ReinterpretingIDCT( + block, 8 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT64X64: { + ReinterpretingIDCT( + block, 8 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT128X64: { + ReinterpretingIDCT< + /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim, + /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>( + block, 16 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT64X128: { + ReinterpretingIDCT< + /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, + /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>( + block, 16 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT128X128: { + ReinterpretingIDCT< + /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, + /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>( + block, 16 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT256X128: { + ReinterpretingIDCT< + /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim, + /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>( + block, 32 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT128X256: { + ReinterpretingIDCT< + /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, + /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>( + block, 32 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT256X256: { + ReinterpretingIDCT< + /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim, + /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>( + block, 32 * kBlockDim, dc, dc_stride); + break; + } + case Type::DCT: + case Type::DCT2X2: + case Type::DCT4X4: + case Type::DCT4X8: + case Type::DCT8X4: + case Type::AFV0: + case Type::AFV1: + case Type::AFV2: + case Type::AFV3: + case Type::IDENTITY: + dc[0] = block[0]; + break; + case Type::kNumValidStrategies: + JXL_UNREACHABLE("Invalid strategy"); + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_ENC_TRANSFORMS_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_transforms.cc b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.cc new file mode 100644 index 0000000000..8978ba1dcb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.cc @@ -0,0 +1,41 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_transforms.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_transforms.cc" +#include +#include + +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/enc_transforms-inl.h" + +namespace jxl { + +#if HWY_ONCE +HWY_EXPORT(TransformFromPixels); +void TransformFromPixels(const AcStrategy::Type strategy, + const float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT coefficients, + float* scratch_space) { + return HWY_DYNAMIC_DISPATCH(TransformFromPixels)( + strategy, pixels, pixels_stride, coefficients, scratch_space); +} + +HWY_EXPORT(DCFromLowestFrequencies); +void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block, + float* dc, size_t dc_stride) { + return HWY_DYNAMIC_DISPATCH(DCFromLowestFrequencies)(strategy, block, dc, + dc_stride); +} + +HWY_EXPORT(AFVDCT4x4); +void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) { + return HWY_DYNAMIC_DISPATCH(AFVDCT4x4)(pixels, coeffs); +} +#endif // HWY_ONCE + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_transforms.h b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.h new file mode 100644 index 0000000000..039ccc3893 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_transforms.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_TRANSFORMS_H_ +#define LIB_JXL_ENC_TRANSFORMS_H_ + +// Facade for (non-inlined) integral transforms. + +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +void TransformFromPixels(const AcStrategy::Type strategy, + const float* JXL_RESTRICT pixels, size_t pixels_stride, + float* JXL_RESTRICT coefficients, + float* JXL_RESTRICT scratch_space); + +// Equivalent of the above for DC image. +void DCFromLowestFrequencies(AcStrategy::Type strategy, const float* block, + float* dc, size_t dc_stride); + +void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs); + +} // namespace jxl + +#endif // LIB_JXL_ENC_TRANSFORMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_xyb.cc b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.cc new file mode 100644 index 0000000000..a0a5e48e1c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.cc @@ -0,0 +1,517 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/enc_xyb.h" + +#include +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/enc_xyb.cc" +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_image_bundle.h" +#include "lib/jxl/fast_math-inl.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +// 4x3 matrix * 3x1 SIMD vectors +template +JXL_INLINE void OpsinAbsorbance(const V r, const V g, const V b, + const float* JXL_RESTRICT premul_absorb, + V* JXL_RESTRICT mixed0, V* JXL_RESTRICT mixed1, + V* JXL_RESTRICT mixed2) { + const float* bias = &kOpsinAbsorbanceBias[0]; + const HWY_FULL(float) d; + const size_t N = Lanes(d); + const auto m0 = Load(d, premul_absorb + 0 * N); + const auto m1 = Load(d, premul_absorb + 1 * N); + const auto m2 = Load(d, premul_absorb + 2 * N); + const auto m3 = Load(d, premul_absorb + 3 * N); + const auto m4 = Load(d, premul_absorb + 4 * N); + const auto m5 = Load(d, premul_absorb + 5 * N); + const auto m6 = Load(d, premul_absorb + 6 * N); + const auto m7 = Load(d, premul_absorb + 7 * N); + const auto m8 = Load(d, premul_absorb + 8 * N); + *mixed0 = MulAdd(m0, r, MulAdd(m1, g, MulAdd(m2, b, Set(d, bias[0])))); + *mixed1 = MulAdd(m3, r, MulAdd(m4, g, MulAdd(m5, b, Set(d, bias[1])))); + *mixed2 = MulAdd(m6, r, MulAdd(m7, g, MulAdd(m8, b, Set(d, bias[2])))); +} + +template +void StoreXYB(const V r, V g, const V b, float* JXL_RESTRICT valx, + float* JXL_RESTRICT valy, float* JXL_RESTRICT valz) { + const HWY_FULL(float) d; + const V half = Set(d, 0.5f); + Store(Mul(half, Sub(r, g)), d, valx); + Store(Mul(half, Add(r, g)), d, valy); + Store(b, d, valz); +} + +// Converts one RGB vector to XYB. +template +void LinearRGBToXYB(const V r, const V g, const V b, + const float* JXL_RESTRICT premul_absorb, + float* JXL_RESTRICT valx, float* JXL_RESTRICT valy, + float* JXL_RESTRICT valz) { + V mixed0, mixed1, mixed2; + OpsinAbsorbance(r, g, b, premul_absorb, &mixed0, &mixed1, &mixed2); + + // mixed* should be non-negative even for wide-gamut, so clamp to zero. + mixed0 = ZeroIfNegative(mixed0); + mixed1 = ZeroIfNegative(mixed1); + mixed2 = ZeroIfNegative(mixed2); + + const HWY_FULL(float) d; + const size_t N = Lanes(d); + mixed0 = CubeRootAndAdd(mixed0, Load(d, premul_absorb + 9 * N)); + mixed1 = CubeRootAndAdd(mixed1, Load(d, premul_absorb + 10 * N)); + mixed2 = CubeRootAndAdd(mixed2, Load(d, premul_absorb + 11 * N)); + StoreXYB(mixed0, mixed1, mixed2, valx, valy, valz); + + // For wide-gamut inputs, r/g/b and valx (but not y/z) are often negative. +} + +void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1, + float* JXL_RESTRICT row2, + const float* JXL_RESTRICT premul_absorb, size_t xsize) { + const HWY_FULL(float) d; + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto r = Load(d, row0 + x); + const auto g = Load(d, row1 + x); + const auto b = Load(d, row2 + x); + LinearRGBToXYB(r, g, b, premul_absorb, row0 + x, row1 + x, row2 + x); + } +} + +// Input/output uses the codec.h scaling: nominally 0-1 if in-gamut. +template +V LinearFromSRGB(V encoded) { + return TF_SRGB().DisplayFromEncoded(encoded); +} + +Status LinearSRGBToXYB(const Image3F& linear, + const float* JXL_RESTRICT premul_absorb, + ThreadPool* pool, Image3F* JXL_RESTRICT xyb) { + const size_t xsize = linear.xsize(); + + const HWY_FULL(float) d; + return RunOnPool( + pool, 0, static_cast(linear.ysize()), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const size_t y = static_cast(task); + const float* JXL_RESTRICT row_in0 = linear.ConstPlaneRow(0, y); + const float* JXL_RESTRICT row_in1 = linear.ConstPlaneRow(1, y); + const float* JXL_RESTRICT row_in2 = linear.ConstPlaneRow(2, y); + float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y); + float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y); + float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y); + + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto in_r = Load(d, row_in0 + x); + const auto in_g = Load(d, row_in1 + x); + const auto in_b = Load(d, row_in2 + x); + LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x, + row_xyb1 + x, row_xyb2 + x); + } + }, + "LinearToXYB"); +} + +Status SRGBToXYB(const Image3F& srgb, const float* JXL_RESTRICT premul_absorb, + ThreadPool* pool, Image3F* JXL_RESTRICT xyb) { + const size_t xsize = srgb.xsize(); + + const HWY_FULL(float) d; + return RunOnPool( + pool, 0, static_cast(srgb.ysize()), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const size_t y = static_cast(task); + const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y); + const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y); + const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y); + float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y); + float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y); + float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y); + + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x)); + const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x)); + const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x)); + LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x, + row_xyb1 + x, row_xyb2 + x); + } + }, + "SRGBToXYB"); +} + +Status SRGBToXYBAndLinear(const Image3F& srgb, + const float* JXL_RESTRICT premul_absorb, + ThreadPool* pool, Image3F* JXL_RESTRICT xyb, + Image3F* JXL_RESTRICT linear) { + const size_t xsize = srgb.xsize(); + + const HWY_FULL(float) d; + return RunOnPool( + pool, 0, static_cast(srgb.ysize()), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const size_t y = static_cast(task); + const float* JXL_RESTRICT row_srgb0 = srgb.ConstPlaneRow(0, y); + const float* JXL_RESTRICT row_srgb1 = srgb.ConstPlaneRow(1, y); + const float* JXL_RESTRICT row_srgb2 = srgb.ConstPlaneRow(2, y); + + float* JXL_RESTRICT row_linear0 = linear->PlaneRow(0, y); + float* JXL_RESTRICT row_linear1 = linear->PlaneRow(1, y); + float* JXL_RESTRICT row_linear2 = linear->PlaneRow(2, y); + + float* JXL_RESTRICT row_xyb0 = xyb->PlaneRow(0, y); + float* JXL_RESTRICT row_xyb1 = xyb->PlaneRow(1, y); + float* JXL_RESTRICT row_xyb2 = xyb->PlaneRow(2, y); + + for (size_t x = 0; x < xsize; x += Lanes(d)) { + const auto in_r = LinearFromSRGB(Load(d, row_srgb0 + x)); + const auto in_g = LinearFromSRGB(Load(d, row_srgb1 + x)); + const auto in_b = LinearFromSRGB(Load(d, row_srgb2 + x)); + + Store(in_r, d, row_linear0 + x); + Store(in_g, d, row_linear1 + x); + Store(in_b, d, row_linear2 + x); + + LinearRGBToXYB(in_r, in_g, in_b, premul_absorb, row_xyb0 + x, + row_xyb1 + x, row_xyb2 + x); + } + }, + "SRGBToXYBAndLinear"); +} + +void ComputePremulAbsorb(float intensity_target, float* premul_absorb) { + const HWY_FULL(float) d; + const size_t N = Lanes(d); + const float mul = intensity_target / 255.0f; + for (size_t i = 0; i < 9; ++i) { + const auto absorb = Set(d, kOpsinAbsorbanceMatrix[i] * mul); + Store(absorb, d, premul_absorb + i * N); + } + for (size_t i = 0; i < 3; ++i) { + const auto neg_bias_cbrt = Set(d, -cbrtf(kOpsinAbsorbanceBias[i])); + Store(neg_bias_cbrt, d, premul_absorb + (9 + i) * N); + } +} + +Image3F TransformToLinearRGB(const Image3F& in, + const ColorEncoding& color_encoding, + float intensity_target, const JxlCmsInterface& cms, + ThreadPool* pool) { + ColorSpaceTransform c_transform(cms); + bool is_gray = color_encoding.IsGray(); + const ColorEncoding& c_desired = ColorEncoding::LinearSRGB(is_gray); + Image3F out(in.xsize(), in.ysize()); + std::atomic ok{true}; + JXL_CHECK(RunOnPool( + pool, 0, in.ysize(), + [&](const size_t num_threads) { + return c_transform.Init(color_encoding, c_desired, intensity_target, + in.xsize(), num_threads); + }, + [&](const uint32_t y, const size_t thread) { + float* mutable_src_buf = c_transform.BufSrc(thread); + const float* src_buf = mutable_src_buf; + // Interleave input. + if (is_gray) { + src_buf = in.ConstPlaneRow(0, y); + } else { + const float* JXL_RESTRICT row_in0 = in.ConstPlaneRow(0, y); + const float* JXL_RESTRICT row_in1 = in.ConstPlaneRow(1, y); + const float* JXL_RESTRICT row_in2 = in.ConstPlaneRow(2, y); + for (size_t x = 0; x < in.xsize(); x++) { + mutable_src_buf[3 * x + 0] = row_in0[x]; + mutable_src_buf[3 * x + 1] = row_in1[x]; + mutable_src_buf[3 * x + 2] = row_in2[x]; + } + } + float* JXL_RESTRICT dst_buf = c_transform.BufDst(thread); + if (!c_transform.Run(thread, src_buf, dst_buf)) { + ok.store(false); + return; + } + float* JXL_RESTRICT row_out0 = out.PlaneRow(0, y); + float* JXL_RESTRICT row_out1 = out.PlaneRow(1, y); + float* JXL_RESTRICT row_out2 = out.PlaneRow(2, y); + // De-interleave output and convert type. + if (is_gray) { + for (size_t x = 0; x < in.xsize(); x++) { + row_out0[x] = dst_buf[x]; + row_out1[x] = dst_buf[x]; + row_out2[x] = dst_buf[x]; + } + } else { + for (size_t x = 0; x < in.xsize(); x++) { + row_out0[x] = dst_buf[3 * x + 0]; + row_out1[x] = dst_buf[3 * x + 1]; + row_out2[x] = dst_buf[3 * x + 2]; + } + } + }, + "Colorspace transform")); + JXL_CHECK(ok.load()); + return out; +} + +void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding, + float intensity_target, ThreadPool* pool, + Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) { + JXL_ASSERT(SameSize(in, *xyb)); + + const HWY_FULL(float) d; + // Pre-broadcasted constants + HWY_ALIGN float premul_absorb[MaxLanes(d) * 12]; + ComputePremulAbsorb(intensity_target, premul_absorb); + + bool is_gray = color_encoding.IsGray(); + const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(is_gray); + if (c_linear_srgb.SameColorEncoding(color_encoding)) { + JXL_CHECK(LinearSRGBToXYB(in, premul_absorb, pool, xyb)); + } else if (color_encoding.IsSRGB()) { + JXL_CHECK(SRGBToXYB(in, premul_absorb, pool, xyb)); + } else { + Image3F linear = + TransformToLinearRGB(in, color_encoding, intensity_target, cms, pool); + JXL_CHECK(LinearSRGBToXYB(linear, premul_absorb, pool, xyb)); + } +} + +// This is different from Butteraugli's OpsinDynamicsImage() in the sense that +// it does not contain a sensitivity multiplier based on the blurred image. +const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool, + Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms, + ImageBundle* const JXL_RESTRICT linear) { + const size_t xsize = in.xsize(); + const size_t ysize = in.ysize(); + JXL_ASSERT(SameSize(in, *xyb)); + + const HWY_FULL(float) d; + // Pre-broadcasted constants + HWY_ALIGN float premul_absorb[MaxLanes(d) * 12]; + ComputePremulAbsorb(in.metadata()->IntensityTarget(), premul_absorb); + + const bool want_linear = linear != nullptr; + + const ColorEncoding& c_linear_srgb = ColorEncoding::LinearSRGB(in.IsGray()); + // Linear sRGB inputs are rare but can be useful for the fastest encoders, for + // which undoing the sRGB transfer function would be a large part of the cost. + if (c_linear_srgb.SameColorEncoding(in.c_current())) { + JXL_CHECK(LinearSRGBToXYB(in.color(), premul_absorb, pool, xyb)); + // This only happens if kitten or slower, moving ImageBundle might be + // possible but the encoder is much slower than this copy. + if (want_linear) { + *linear = in.Copy(); + return linear; + } + return ∈ + } + + // Common case: already sRGB, can avoid the color transform + if (in.IsSRGB()) { + // Common case: can avoid allocating/copying + if (!want_linear) { + JXL_CHECK(SRGBToXYB(in.color(), premul_absorb, pool, xyb)); + return ∈ + } + + // Slow encoder also wants linear sRGB. + linear->SetFromImage(Image3F(xsize, ysize), c_linear_srgb); + JXL_CHECK(SRGBToXYBAndLinear(in.color(), premul_absorb, pool, xyb, + linear->color())); + return linear; + } + + // General case: not sRGB, need color transform. + ImageBundle linear_storage; // Local storage only used if !want_linear. + + ImageBundle* linear_storage_ptr; + if (want_linear) { + // Caller asked for linear, use that storage directly. + linear_storage_ptr = linear; + } else { + // Caller didn't ask for linear, create our own local storage + // OK to reuse metadata, it will not be changed. + linear_storage = ImageBundle(const_cast(in.metadata())); + linear_storage_ptr = &linear_storage; + } + + const ImageBundle* ptr; + JXL_CHECK(TransformIfNeeded(in, c_linear_srgb, cms, pool, linear_storage_ptr, + &ptr)); + // If no transform was necessary, should have taken the above codepath. + JXL_ASSERT(ptr == linear_storage_ptr); + + JXL_CHECK( + LinearSRGBToXYB(*linear_storage_ptr->color(), premul_absorb, pool, xyb)); + return want_linear ? linear : ∈ +} + +// Transform RGB to YCbCr. +// Could be performed in-place (i.e. Y, Cb and Cr could alias R, B and B). +Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane, + const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane, + ImageF* cr_plane, ThreadPool* pool) { + const HWY_FULL(float) df; + const size_t S = Lanes(df); // Step. + + const size_t xsize = r_plane.xsize(); + const size_t ysize = r_plane.ysize(); + if ((xsize == 0) || (ysize == 0)) return true; + + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto k128 = Set(df, 128.0f / 255); + const auto kR = Set(df, 0.299f); // NTSC luma + const auto kG = Set(df, 0.587f); + const auto kB = Set(df, 0.114f); + const auto kAmpR = Set(df, 0.701f); + const auto kAmpB = Set(df, 0.886f); + const auto kDiffR = Add(kAmpR, kR); + const auto kDiffB = Add(kAmpB, kB); + const auto kNormR = Div(Set(df, 1.0f), (Add(kAmpR, Add(kG, kB)))); + const auto kNormB = Div(Set(df, 1.0f), (Add(kR, Add(kG, kAmpB)))); + + constexpr size_t kGroupArea = kGroupDim * kGroupDim; + const size_t lines_per_group = DivCeil(kGroupArea, xsize); + const size_t num_stripes = DivCeil(ysize, lines_per_group); + const auto transform = [&](int idx, int /* thread*/) { + const size_t y0 = idx * lines_per_group; + const size_t y1 = std::min(y0 + lines_per_group, ysize); + for (size_t y = y0; y < y1; ++y) { + const float* r_row = r_plane.ConstRow(y); + const float* g_row = g_plane.ConstRow(y); + const float* b_row = b_plane.ConstRow(y); + float* y_row = y_plane->Row(y); + float* cb_row = cb_plane->Row(y); + float* cr_row = cr_plane->Row(y); + for (size_t x = 0; x < xsize; x += S) { + const auto r = Load(df, r_row + x); + const auto g = Load(df, g_row + x); + const auto b = Load(df, b_row + x); + const auto r_base = Mul(r, kR); + const auto r_diff = Mul(r, kDiffR); + const auto g_base = Mul(g, kG); + const auto b_base = Mul(b, kB); + const auto b_diff = Mul(b, kDiffB); + const auto y_base = Add(r_base, Add(g_base, b_base)); + const auto y_vec = Sub(y_base, k128); + const auto cb_vec = Mul(Sub(b_diff, y_base), kNormB); + const auto cr_vec = Mul(Sub(r_diff, y_base), kNormR); + Store(y_vec, df, y_row + x); + Store(cb_vec, df, cb_row + x); + Store(cr_vec, df, cr_row + x); + } + } + }; + return RunOnPool(pool, 0, static_cast(num_stripes), ThreadPool::NoInit, + transform, "RgbToYcbCr"); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(ToXYB); +const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool, + Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms, + ImageBundle* JXL_RESTRICT linear_storage) { + return HWY_DYNAMIC_DISPATCH(ToXYB)(in, pool, xyb, cms, linear_storage); +} + +HWY_EXPORT(LinearRGBRowToXYB); +void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1, + float* JXL_RESTRICT row2, + const float* JXL_RESTRICT premul_absorb, size_t xsize) { + HWY_DYNAMIC_DISPATCH(LinearRGBRowToXYB) + (row0, row1, row2, premul_absorb, xsize); +} + +HWY_EXPORT(ComputePremulAbsorb); +void ComputePremulAbsorb(float intensity_target, float* premul_absorb) { + HWY_DYNAMIC_DISPATCH(ComputePremulAbsorb)(intensity_target, premul_absorb); +} + +void ScaleXYBRow(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1, + float* JXL_RESTRICT row2, size_t xsize) { + for (size_t x = 0; x < xsize; x++) { + row2[x] = (row2[x] - row1[x] + kScaledXYBOffset[2]) * kScaledXYBScale[2]; + row0[x] = (row0[x] + kScaledXYBOffset[0]) * kScaledXYBScale[0]; + row1[x] = (row1[x] + kScaledXYBOffset[1]) * kScaledXYBScale[1]; + } +} + +void ScaleXYB(Image3F* opsin) { + for (size_t y = 0; y < opsin->ysize(); y++) { + float* row0 = opsin->PlaneRow(0, y); + float* row1 = opsin->PlaneRow(1, y); + float* row2 = opsin->PlaneRow(2, y); + ScaleXYBRow(row0, row1, row2, opsin->xsize()); + } +} + +HWY_EXPORT(Image3FToXYB); +void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding, + float intensity_target, ThreadPool* pool, + Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms) { + return HWY_DYNAMIC_DISPATCH(Image3FToXYB)(in, color_encoding, + intensity_target, pool, xyb, cms); +} + +HWY_EXPORT(RgbToYcbcr); +Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane, + const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane, + ImageF* cr_plane, ThreadPool* pool) { + return HWY_DYNAMIC_DISPATCH(RgbToYcbcr)(r_plane, g_plane, b_plane, y_plane, + cb_plane, cr_plane, pool); +} + +// DEPRECATED +Image3F OpsinDynamicsImage(const Image3B& srgb8, const JxlCmsInterface& cms) { + ImageMetadata metadata; + metadata.SetUintSamples(8); + metadata.color_encoding = ColorEncoding::SRGB(); + ImageBundle ib(&metadata); + ib.SetFromImage(ConvertToFloat(srgb8), metadata.color_encoding); + JXL_CHECK(ib.TransformTo(ColorEncoding::LinearSRGB(ib.IsGray()), cms)); + ThreadPool* null_pool = nullptr; + Image3F xyb(srgb8.xsize(), srgb8.ysize()); + + ImageBundle linear_storage(&metadata); + (void)ToXYB(ib, null_pool, &xyb, cms, &linear_storage); + return xyb; +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/enc_xyb.h b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.h new file mode 100644 index 0000000000..fc902848ee --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/enc_xyb.h @@ -0,0 +1,56 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENC_XYB_H_ +#define LIB_JXL_ENC_XYB_H_ + +// Converts to XYB color space. + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" + +namespace jxl { + +// Converts any color space to XYB. If `linear` is not null, returns `linear` +// after filling it with a linear sRGB copy of `in`. Otherwise, returns `&in`. +// +// NOTE this return value can avoid an extra color conversion if `in` would +// later be passed to JxlButteraugliComparator. +const ImageBundle* ToXYB(const ImageBundle& in, ThreadPool* pool, + Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms, + ImageBundle* JXL_RESTRICT linear = nullptr); + +void Image3FToXYB(const Image3F& in, const ColorEncoding& color_encoding, + float intensity_target, ThreadPool* pool, + Image3F* JXL_RESTRICT xyb, const JxlCmsInterface& cms); + +void LinearRGBRowToXYB(float* JXL_RESTRICT row0, float* JXL_RESTRICT row1, + float* JXL_RESTRICT row2, + const float* JXL_RESTRICT premul_absorb, size_t xsize); + +void ComputePremulAbsorb(float intensity_target, float* premul_absorb); + +// Transforms each color component of the given XYB image into the [0.0, 1.0] +// interval with an affine transform. +void ScaleXYB(Image3F* opsin); +void ScaleXYBRow(float* row0, float* row1, float* row2, size_t xsize); + +// Bt.601 to match JPEG/JFIF. Outputs _signed_ YCbCr values suitable for DCT, +// see F.1.1.3 of T.81 (because our data type is float, there is no need to add +// a bias to make the values unsigned). +Status RgbToYcbcr(const ImageF& r_plane, const ImageF& g_plane, + const ImageF& b_plane, ImageF* y_plane, ImageF* cb_plane, + ImageF* cr_plane, ThreadPool* pool); + +// DEPRECATED, used by opsin_image_wrapper. +Image3F OpsinDynamicsImage(const Image3B& srgb8, const JxlCmsInterface& cms); + +} // namespace jxl + +#endif // LIB_JXL_ENC_XYB_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/encode.cc b/third-party/libjxl/libjxl/lib/jxl/encode.cc new file mode 100644 index 0000000000..7c23847ca8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/encode.cc @@ -0,0 +1,2258 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/enc_fast_lossless.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_icc_codec.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/encode_internal.h" +#include "lib/jxl/exif.h" +#include "lib/jxl/jpeg/enc_jpeg_data.h" +#include "lib/jxl/luminance.h" +#include "lib/jxl/sanitizers.h" + +// Debug-printing failure macro similar to JXL_FAILURE, but for the status code +// JXL_ENC_ERROR +#ifdef JXL_CRASH_ON_ERROR +#define JXL_API_ERROR(enc, error_code, format, ...) \ + (enc->error = error_code, \ + ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \ + ::jxl::Abort(), JXL_ENC_ERROR) +#define JXL_API_ERROR_NOSET(format, ...) \ + (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \ + ::jxl::Abort(), JXL_ENC_ERROR) +#else // JXL_CRASH_ON_ERROR +#define JXL_API_ERROR(enc, error_code, format, ...) \ + (enc->error = error_code, \ + ((JXL_DEBUG_ON_ERROR) && \ + ::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__)), \ + JXL_ENC_ERROR) +#define JXL_API_ERROR_NOSET(format, ...) \ + (::jxl::Debug(("%s:%d: " format "\n"), __FILE__, __LINE__, ##__VA_ARGS__), \ + JXL_ENC_ERROR) +#endif // JXL_CRASH_ON_ERROR + +namespace jxl {} // namespace jxl + +uint32_t JxlEncoderVersion(void) { + return JPEGXL_MAJOR_VERSION * 1000000 + JPEGXL_MINOR_VERSION * 1000 + + JPEGXL_PATCH_VERSION; +} + +namespace { +template +void AppendJxlpBoxCounter(uint32_t counter, bool last, T* output) { + if (last) counter |= 0x80000000; + for (size_t i = 0; i < 4; i++) { + output->push_back(counter >> (8 * (3 - i)) & 0xff); + } +} + +void QueueFrame( + const JxlEncoderFrameSettings* frame_settings, + jxl::MemoryManagerUniquePtr& frame) { + if (frame_settings->values.lossless) { + frame->option_values.cparams.SetLossless(); + } + + jxl::JxlEncoderQueuedInput queued_input(frame_settings->enc->memory_manager); + queued_input.frame = std::move(frame); + frame_settings->enc->input_queue.emplace_back(std::move(queued_input)); + frame_settings->enc->num_queued_frames++; +} + +void QueueFastLosslessFrame(const JxlEncoderFrameSettings* frame_settings, + JxlFastLosslessFrameState* fast_lossless_frame) { + jxl::JxlEncoderQueuedInput queued_input(frame_settings->enc->memory_manager); + queued_input.fast_lossless_frame.reset(fast_lossless_frame); + frame_settings->enc->input_queue.emplace_back(std::move(queued_input)); + frame_settings->enc->num_queued_frames++; +} + +void QueueBox(JxlEncoder* enc, + jxl::MemoryManagerUniquePtr& box) { + jxl::JxlEncoderQueuedInput queued_input(enc->memory_manager); + queued_input.box = std::move(box); + enc->input_queue.emplace_back(std::move(queued_input)); + enc->num_queued_boxes++; +} + +// TODO(lode): share this code and the Brotli compression code in enc_jpeg_data +JxlEncoderStatus BrotliCompress(int quality, const uint8_t* in, size_t in_size, + jxl::PaddedBytes* out) { + std::unique_ptr + enc(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr), + BrotliEncoderDestroyInstance); + if (!enc) return JXL_API_ERROR_NOSET("BrotliEncoderCreateInstance failed"); + + BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_QUALITY, quality); + BrotliEncoderSetParameter(enc.get(), BROTLI_PARAM_SIZE_HINT, in_size); + + constexpr size_t kBufferSize = 128 * 1024; + jxl::PaddedBytes temp_buffer(kBufferSize); + + size_t avail_in = in_size; + const uint8_t* next_in = in; + + size_t total_out = 0; + + for (;;) { + size_t avail_out = kBufferSize; + uint8_t* next_out = temp_buffer.data(); + jxl::msan::MemoryIsInitialized(next_in, avail_in); + if (!BrotliEncoderCompressStream(enc.get(), BROTLI_OPERATION_FINISH, + &avail_in, &next_in, &avail_out, &next_out, + &total_out)) { + return JXL_API_ERROR_NOSET("Brotli compression failed"); + } + size_t out_size = next_out - temp_buffer.data(); + jxl::msan::UnpoisonMemory(next_out - out_size, out_size); + out->resize(out->size() + out_size); + memcpy(out->data() + out->size() - out_size, temp_buffer.data(), out_size); + if (BrotliEncoderIsFinished(enc.get())) break; + } + + return JXL_ENC_SUCCESS; +} + +// The JXL codestream can have level 5 or level 10. Levels have certain +// restrictions such as max allowed image dimensions. This function checks the +// level required to support the current encoder settings. The debug_string is +// intended to be used for developer API error messages, and may be set to +// nullptr. +int VerifyLevelSettings(const JxlEncoder* enc, std::string* debug_string) { + const auto& m = enc->metadata.m; + + uint64_t xsize = enc->metadata.size.xsize(); + uint64_t ysize = enc->metadata.size.ysize(); + // The uncompressed ICC size, if it is used. + size_t icc_size = 0; + if (m.color_encoding.WantICC()) { + icc_size = m.color_encoding.ICC().size(); + } + + // Level 10 checks + + if (xsize > (1ull << 30ull) || ysize > (1ull << 30ull) || + xsize * ysize > (1ull << 40ull)) { + if (debug_string) *debug_string = "Too large image dimensions"; + return -1; + } + if (icc_size > (1ull << 28)) { + if (debug_string) *debug_string = "Too large ICC profile size"; + return -1; + } + if (m.num_extra_channels > 256) { + if (debug_string) *debug_string = "Too many extra channels"; + return -1; + } + + // Level 5 checks + + if (!m.modular_16_bit_buffer_sufficient) { + if (debug_string) *debug_string = "Too high modular bit depth"; + return 10; + } + if (xsize > (1ull << 18ull) || ysize > (1ull << 18ull) || + xsize * ysize > (1ull << 28ull)) { + if (debug_string) *debug_string = "Too large image dimensions"; + return 10; + } + if (icc_size > (1ull << 22)) { + if (debug_string) *debug_string = "Too large ICC profile"; + return 10; + } + if (m.num_extra_channels > 4) { + if (debug_string) *debug_string = "Too many extra channels"; + return 10; + } + for (size_t i = 0; i < m.extra_channel_info.size(); ++i) { + if (m.extra_channel_info[i].type == jxl::ExtraChannel::kBlack) { + if (debug_string) *debug_string = "CMYK channel not allowed"; + return 10; + } + } + + // TODO(lode): also need to check if consecutive composite-still frames total + // pixel amount doesn't exceed 2**28 in the case of level 5. This should be + // done when adding frame and requires ability to add composite still frames + // to be added first. + + // TODO(lode): also need to check animation duration of a frame. This should + // be done when adding frame, but first requires implementing setting the + // JxlFrameHeader for a frame. + + // TODO(lode): also need to check properties such as num_splines, num_patches, + // modular_16bit_buffers and multiple properties of modular trees. However + // these are not user-set properties so cannot be checked here, but decisions + // the C++ encoder should be able to make based on the level. + + // All level 5 checks passes, so can return the more compatible level 5 + return 5; +} + +size_t BitsPerChannel(JxlDataType data_type) { + switch (data_type) { + case JXL_TYPE_UINT8: + return 8; + case JXL_TYPE_UINT16: + return 16; + case JXL_TYPE_FLOAT: + return 32; + case JXL_TYPE_FLOAT16: + return 16; + default: + return 0; // signals unhandled JxlDataType + } +} + +template +uint32_t GetBitDepth(JxlBitDepth bit_depth, const T& metadata, + JxlPixelFormat format) { + if (bit_depth.type == JXL_BIT_DEPTH_FROM_PIXEL_FORMAT) { + return BitsPerChannel(format.data_type); + } else if (bit_depth.type == JXL_BIT_DEPTH_FROM_CODESTREAM) { + return metadata.bit_depth.bits_per_sample; + } else if (bit_depth.type == JXL_BIT_DEPTH_CUSTOM) { + return bit_depth.bits_per_sample; + } else { + return 0; + } +} + +JxlEncoderStatus CheckValidBitdepth(uint32_t bits_per_sample, + uint32_t exponent_bits_per_sample) { + if (!exponent_bits_per_sample) { + // The spec allows up to 31 for bits_per_sample here, but + // the code does not (yet) support it. + if (!(bits_per_sample > 0 && bits_per_sample <= 24)) { + return JXL_API_ERROR_NOSET("Invalid value for bits_per_sample"); + } + } else if ((exponent_bits_per_sample > 8) || + (bits_per_sample > 24 + exponent_bits_per_sample) || + (bits_per_sample < 3 + exponent_bits_per_sample)) { + return JXL_API_ERROR_NOSET("Invalid float description"); + } + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus VerifyInputBitDepth(JxlBitDepth bit_depth, + JxlPixelFormat format) { + return JXL_ENC_SUCCESS; +} + +bool EncodeFrameIndexBox(const jxl::JxlEncoderFrameIndexBox& frame_index_box, + jxl::BitWriter& writer) { + bool ok = true; + int NF = 0; + for (size_t i = 0; i < frame_index_box.entries.size(); ++i) { + if (i == 0 || frame_index_box.entries[i].to_be_indexed) { + ++NF; + } + } + // Frame index box contents varint + 8 bytes + // continue with NF * 3 * varint + // varint max length is 10 for 64 bit numbers, and these numbers + // are limited to 63 bits. + static const int kVarintMaxLength = 10; + static const int kFrameIndexBoxHeaderLength = kVarintMaxLength + 8; + static const int kFrameIndexBoxElementLength = 3 * kVarintMaxLength; + const int buffer_size = + kFrameIndexBoxHeaderLength + NF * kFrameIndexBoxElementLength; + std::vector buffer_vec(buffer_size); + uint8_t* buffer = buffer_vec.data(); + size_t output_pos = 0; + ok &= jxl::EncodeVarInt(NF, buffer_vec.size(), &output_pos, buffer); + StoreBE32(frame_index_box.TNUM, &buffer[output_pos]); + output_pos += 4; + StoreBE32(frame_index_box.TDEN, &buffer[output_pos]); + output_pos += 4; + // When we record a frame in the index, the record needs to know + // how many frames until the next indexed frame. That is why + // we store the 'prev' record. That 'prev' record needs to store + // the offset byte position to previously recorded indexed frame, + // that's why we also trace previous to the previous frame. + int prev_prev_ix = -1; // For position offset (OFFi) delta coding. + int prev_ix = 0; + int T_prev = 0; + int T = 0; + for (size_t i = 1; i < frame_index_box.entries.size(); ++i) { + if (frame_index_box.entries[i].to_be_indexed) { + // Now we can record the previous entry, since we need to store + // there how many frames until the next one. + int64_t OFFi = frame_index_box.entries[prev_ix].OFFi; + if (prev_prev_ix != -1) { + // Offi needs to be offset of start byte of this frame compared to start + // byte of previous frame from this index in the JPEG XL codestream. For + // the first frame, this is the offset from the first byte of the JPEG + // XL codestream. + OFFi -= frame_index_box.entries[prev_prev_ix].OFFi; + } + int32_t Ti = T_prev; + int32_t Fi = i - prev_ix; + ok &= jxl::EncodeVarInt(OFFi, buffer_vec.size(), &output_pos, buffer); + ok &= jxl::EncodeVarInt(Ti, buffer_vec.size(), &output_pos, buffer); + ok &= jxl::EncodeVarInt(Fi, buffer_vec.size(), &output_pos, buffer); + prev_prev_ix = prev_ix; + prev_ix = i; + T_prev = T; + T += frame_index_box.entries[i].duration; + } + } + { + // Last frame. + size_t i = frame_index_box.entries.size(); + int64_t OFFi = frame_index_box.entries[prev_ix].OFFi; + if (prev_prev_ix != -1) { + OFFi -= frame_index_box.entries[prev_prev_ix].OFFi; + } + int32_t Ti = T_prev; + int32_t Fi = i - prev_ix; + ok &= jxl::EncodeVarInt(OFFi, buffer_vec.size(), &output_pos, buffer); + ok &= jxl::EncodeVarInt(Ti, buffer_vec.size(), &output_pos, buffer); + ok &= jxl::EncodeVarInt(Fi, buffer_vec.size(), &output_pos, buffer); + } + // Enough buffer has been allocated, this function should never fail in + // writing. + JXL_ASSERT(ok); + return ok; +} + +} // namespace + +JxlEncoderStatus JxlEncoderStruct::RefillOutputByteQueue() { + jxl::PaddedBytes bytes; + + jxl::JxlEncoderQueuedInput& input = input_queue[0]; + + // TODO(lode): split this into 3 functions: for adding the signature and other + // initial headers (jbrd, ...), one for adding frame, and one for adding user + // box. + + if (!wrote_bytes) { + // First time encoding any data, verify the level 5 vs level 10 settings + std::string level_message; + int required_level = VerifyLevelSettings(this, &level_message); + // Only level 5 and 10 are defined, and the function can return -1 to + // indicate full incompatibility. + JXL_ASSERT(required_level == -1 || required_level == 5 || + required_level == 10); + // codestream_level == -1 means auto-set to the required level + if (codestream_level == -1) codestream_level = required_level; + if (codestream_level == 5 && required_level != 5) { + // If the required level is 10, return error rather than automatically + // setting the level to 10, to avoid inadvertently creating a level 10 + // JXL file while intending to target a level 5 decoder. + return JXL_API_ERROR( + this, JXL_ENC_ERR_API_USAGE, "%s", + ("Codestream level verification for level 5 failed: " + level_message) + .c_str()); + } + if (required_level == -1) { + return JXL_API_ERROR( + this, JXL_ENC_ERR_API_USAGE, "%s", + ("Codestream level verification for level 10 failed: " + + level_message) + .c_str()); + } + jxl::AuxOut* aux_out = + input.frame ? input.frame->option_values.aux_out : nullptr; + jxl::BitWriter writer; + if (!WriteCodestreamHeaders(&metadata, &writer, aux_out)) { + return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC, + "Failed to write codestream header"); + } + // Only send ICC (at least several hundred bytes) if fields aren't enough. + if (metadata.m.color_encoding.WantICC()) { + if (!jxl::WriteICC(metadata.m.color_encoding.ICC(), &writer, + jxl::kLayerHeader, aux_out)) { + return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC, + "Failed to write ICC profile"); + } + } + // TODO(lode): preview should be added here if a preview image is added + + jxl::BitWriter::Allotment allotment(&writer, 8); + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, jxl::kLayerHeader, aux_out); + + // Not actually the end of frame, but the end of metadata/ICC, but helps + // the next frame to start here for indexing purposes. + codestream_bytes_written_end_of_frame += + jxl::DivCeil(writer.BitsWritten(), 8); + + bytes = std::move(writer).TakeBytes(); + + if (MustUseContainer()) { + // Add "JXL " and ftyp box. + output_byte_queue.insert( + output_byte_queue.end(), jxl::kContainerHeader, + jxl::kContainerHeader + sizeof(jxl::kContainerHeader)); + if (codestream_level != 5) { + // Add jxll box directly after the ftyp box to indicate the codestream + // level. + output_byte_queue.insert( + output_byte_queue.end(), jxl::kLevelBoxHeader, + jxl::kLevelBoxHeader + sizeof(jxl::kLevelBoxHeader)); + output_byte_queue.push_back(codestream_level); + } + + // Whether to write the basic info and color profile header of the + // codestream into an early separate jxlp box, so that it comes before + // metadata or jpeg reconstruction boxes. In theory this could simply + // always be done, but there's no reason to add an extra box with box + // header overhead if the codestream will already come immediately after + // the signature and level boxes. + bool partial_header = + store_jpeg_metadata || + (use_boxes && (!input.frame && !input.fast_lossless_frame)); + + if (partial_header) { + jxl::AppendBoxHeader(jxl::MakeBoxType("jxlp"), bytes.size() + 4, + /*unbounded=*/false, &output_byte_queue); + AppendJxlpBoxCounter(jxlp_counter++, /*last=*/false, + &output_byte_queue); + output_byte_queue.insert(output_byte_queue.end(), bytes.data(), + bytes.data() + bytes.size()); + bytes.clear(); + } + + if (store_jpeg_metadata && !jpeg_metadata.empty()) { + jxl::AppendBoxHeader(jxl::MakeBoxType("jbrd"), jpeg_metadata.size(), + false, &output_byte_queue); + output_byte_queue.insert(output_byte_queue.end(), jpeg_metadata.begin(), + jpeg_metadata.end()); + } + } + wrote_bytes = true; + } + + // Choose frame or box processing: exactly one of the two unique pointers (box + // or frame) in the input queue item is non-null. + if (input.frame || input.fast_lossless_frame) { + jxl::MemoryManagerUniquePtr input_frame = + std::move(input.frame); + if (input.fast_lossless_frame) { + output_fast_frame_queue.push_back(std::move(input.fast_lossless_frame)); + } + input_queue.erase(input_queue.begin()); + num_queued_frames--; + if (input_frame) { + for (unsigned idx = 0; idx < input_frame->ec_initialized.size(); idx++) { + if (!input_frame->ec_initialized[idx]) { + return JXL_API_ERROR(this, JXL_ENC_ERR_API_USAGE, + "Extra channel %u is not initialized", idx); + } + } + + // TODO(zond): If the input queue is empty and the frames_closed is true, + // then mark this frame as the last. + + // TODO(zond): Handle progressive mode like EncodeFile does it. + // TODO(zond): Handle animation like EncodeFile does it, by checking if + // JxlEncoderCloseFrames has been called and if the frame + // queue is empty (to see if it's the last animation frame). + + if (metadata.m.xyb_encoded) { + input_frame->option_values.cparams.color_transform = + jxl::ColorTransform::kXYB; + } else { + // TODO(zond): Figure out when to use kYCbCr instead. + input_frame->option_values.cparams.color_transform = + jxl::ColorTransform::kNone; + } + } + + uint32_t duration; + uint32_t timecode; + if (input_frame && metadata.m.have_animation) { + duration = input_frame->option_values.header.duration; + timecode = input_frame->option_values.header.timecode; + } else { + // If have_animation is false, the encoder should ignore the duration and + // timecode values. However, assigning them to ib will cause the encoder + // to write an invalid frame header that can't be decoded so ensure + // they're the default value of 0 here. + duration = 0; + timecode = 0; + } + + bool last_frame = frames_closed && !num_queued_frames; + + size_t codestream_byte_size = 0; + + jxl::BitWriter writer; + + if (input_frame) { + jxl::PassesEncoderState enc_state; + + frame_index_box.AddFrame(codestream_bytes_written_end_of_frame, duration, + input_frame->option_values.frame_index_box); + + // EncodeFrame creates jxl::FrameHeader object internally based on the + // FrameInfo, imagebundle, cparams and metadata. Copy the information to + // these. + jxl::ImageBundle& ib = input_frame->frame; + ib.duration = duration; + ib.timecode = timecode; + ib.name = input_frame->option_values.frame_name; + ib.blendmode = static_cast( + input_frame->option_values.header.layer_info.blend_info.blendmode); + ib.blend = + input_frame->option_values.header.layer_info.blend_info.blendmode != + JXL_BLEND_REPLACE; + + size_t save_as_reference = + input_frame->option_values.header.layer_info.save_as_reference; + if (save_as_reference >= 3) { + return JXL_API_ERROR( + this, JXL_ENC_ERR_API_USAGE, + "Cannot use save_as_reference values >=3 (found: %d)", + (int)save_as_reference); + } + ib.use_for_next_frame = !!save_as_reference; + + jxl::FrameInfo frame_info; + frame_info.is_last = last_frame; + frame_info.save_as_reference = save_as_reference; + frame_info.source = + input_frame->option_values.header.layer_info.blend_info.source; + frame_info.clamp = + input_frame->option_values.header.layer_info.blend_info.clamp; + frame_info.alpha_channel = + input_frame->option_values.header.layer_info.blend_info.alpha; + frame_info.extra_channel_blending_info.resize( + metadata.m.num_extra_channels); + // If extra channel blend info has not been set, use the blend mode from + // the layer_info. + JxlBlendInfo default_blend_info = + input_frame->option_values.header.layer_info.blend_info; + for (size_t i = 0; i < metadata.m.num_extra_channels; ++i) { + auto& to = frame_info.extra_channel_blending_info[i]; + const auto& from = + i < input_frame->option_values.extra_channel_blend_info.size() + ? input_frame->option_values.extra_channel_blend_info[i] + : default_blend_info; + to.mode = static_cast(from.blendmode); + to.source = from.source; + to.alpha_channel = from.alpha; + to.clamp = (from.clamp != 0); + } + + if (input_frame->option_values.header.layer_info.have_crop) { + ib.origin.x0 = input_frame->option_values.header.layer_info.crop_x0; + ib.origin.y0 = input_frame->option_values.header.layer_info.crop_y0; + } + JXL_ASSERT(writer.BitsWritten() == 0); + if (!jxl::EncodeFrame(input_frame->option_values.cparams, frame_info, + &metadata, input_frame->frame, &enc_state, cms, + thread_pool.get(), &writer, + input_frame->option_values.aux_out)) { + return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC, + "Failed to encode frame"); + } + codestream_bytes_written_beginning_of_frame = + codestream_bytes_written_end_of_frame; + codestream_bytes_written_end_of_frame += + jxl::DivCeil(writer.BitsWritten(), 8); + + // Possibly bytes already contains the codestream header: in case this is + // the first frame, and the codestream header was not encoded as jxlp + // above. + bytes.append(std::move(writer).TakeBytes()); + codestream_byte_size = bytes.size(); + } else { + JXL_CHECK(!output_fast_frame_queue.empty()); + JxlFastLosslessPrepareHeader(output_fast_frame_queue.front().get(), + /*add_image_header=*/0, last_frame); + codestream_byte_size = + JxlFastLosslessOutputSize(output_fast_frame_queue.front().get()) + + bytes.size(); + } + + if (MustUseContainer()) { + if (last_frame && jxlp_counter == 0) { + // If this is the last frame and no jxlp boxes were used yet, it's + // slighly more efficient to write a jxlc box since it has 4 bytes + // less overhead. + jxl::AppendBoxHeader(jxl::MakeBoxType("jxlc"), codestream_byte_size, + /*unbounded=*/false, &output_byte_queue); + } else { + jxl::AppendBoxHeader(jxl::MakeBoxType("jxlp"), codestream_byte_size + 4, + /*unbounded=*/false, &output_byte_queue); + AppendJxlpBoxCounter(jxlp_counter++, last_frame, &output_byte_queue); + } + } + + output_byte_queue.insert(output_byte_queue.end(), bytes.data(), + bytes.data() + bytes.size()); + + if (input_frame) { + last_used_cparams = input_frame->option_values.cparams; + } + if (last_frame && frame_index_box.StoreFrameIndexBox()) { + bytes.clear(); + EncodeFrameIndexBox(frame_index_box, writer); + jxl::AppendBoxHeader(jxl::MakeBoxType("jxli"), bytes.size(), + /*unbounded=*/false, &output_byte_queue); + } + } else { + // Not a frame, so is a box instead + jxl::MemoryManagerUniquePtr box = + std::move(input.box); + input_queue.erase(input_queue.begin()); + num_queued_boxes--; + + if (box->compress_box) { + jxl::PaddedBytes compressed(4); + // Prepend the original box type in the brob box contents + for (size_t i = 0; i < 4; i++) { + compressed[i] = static_cast(box->type[i]); + } + if (JXL_ENC_SUCCESS != + BrotliCompress((brotli_effort >= 0 ? brotli_effort : 4), + box->contents.data(), box->contents.size(), + &compressed)) { + return JXL_API_ERROR(this, JXL_ENC_ERR_GENERIC, + "Brotli compression for brob box failed"); + } + jxl::AppendBoxHeader(jxl::MakeBoxType("brob"), compressed.size(), false, + &output_byte_queue); + output_byte_queue.insert(output_byte_queue.end(), compressed.data(), + compressed.data() + compressed.size()); + } else { + jxl::AppendBoxHeader(box->type, box->contents.size(), false, + &output_byte_queue); + output_byte_queue.insert(output_byte_queue.end(), box->contents.data(), + box->contents.data() + box->contents.size()); + } + } + + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetColorEncoding(JxlEncoder* enc, + const JxlColorEncoding* color) { + if (!enc->basic_info_set) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Basic info not yet set"); + } + if (enc->color_encoding_set) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Color encoding is already set"); + } + if (!jxl::ConvertExternalToInternalColorEncoding( + *color, &enc->metadata.m.color_encoding)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_GENERIC, "Error in color conversion"); + } + if (enc->metadata.m.color_encoding.GetColorSpace() == + jxl::ColorSpace::kGray) { + if (enc->basic_info.num_color_channels != 1) + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, + "Cannot use grayscale color encoding with num_color_channels != 1"); + } else { + if (enc->basic_info.num_color_channels != 3) + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, + "Cannot use RGB color encoding with num_color_channels != 3"); + } + enc->color_encoding_set = true; + if (!enc->intensity_target_set) { + jxl::SetIntensityTarget(&enc->metadata.m); + } + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetICCProfile(JxlEncoder* enc, + const uint8_t* icc_profile, + size_t size) { + if (!enc->basic_info_set) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Basic info not yet set"); + } + if (enc->color_encoding_set) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "ICC profile is already set"); + } + jxl::PaddedBytes icc; + icc.assign(icc_profile, icc_profile + size); + if (!enc->metadata.m.color_encoding.SetICC( + std::move(icc), enc->cms_set ? &enc->cms : nullptr)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_BAD_INPUT, + "ICC profile could not be set"); + } + if (enc->metadata.m.color_encoding.GetColorSpace() == + jxl::ColorSpace::kGray) { + if (enc->basic_info.num_color_channels != 1) + return JXL_API_ERROR( + enc, JXL_ENC_ERR_BAD_INPUT, + "Cannot use grayscale ICC profile with num_color_channels != 1"); + } else { + if (enc->basic_info.num_color_channels != 3) + return JXL_API_ERROR( + enc, JXL_ENC_ERR_BAD_INPUT, + "Cannot use RGB ICC profile with num_color_channels != 3"); + // TODO(jon): also check that a kBlack extra channel is provided in the CMYK + // case + } + enc->color_encoding_set = true; + if (!enc->intensity_target_set) { + jxl::SetIntensityTarget(&enc->metadata.m); + } + + if (!enc->basic_info.uses_original_profile && enc->cms_set) { + enc->metadata.m.color_encoding.DecideIfWantICC(enc->cms); + } + + return JXL_ENC_SUCCESS; +} + +void JxlEncoderInitBasicInfo(JxlBasicInfo* info) { + info->have_container = JXL_FALSE; + info->xsize = 0; + info->ysize = 0; + info->bits_per_sample = 8; + info->exponent_bits_per_sample = 0; + info->intensity_target = 0.f; + info->min_nits = 0.f; + info->relative_to_max_display = JXL_FALSE; + info->linear_below = 0.f; + info->uses_original_profile = JXL_FALSE; + info->have_preview = JXL_FALSE; + info->have_animation = JXL_FALSE; + info->orientation = JXL_ORIENT_IDENTITY; + info->num_color_channels = 3; + info->num_extra_channels = 0; + info->alpha_bits = 0; + info->alpha_exponent_bits = 0; + info->alpha_premultiplied = JXL_FALSE; + info->preview.xsize = 0; + info->preview.ysize = 0; + info->intrinsic_xsize = 0; + info->intrinsic_ysize = 0; + info->animation.tps_numerator = 10; + info->animation.tps_denominator = 1; + info->animation.num_loops = 0; + info->animation.have_timecodes = JXL_FALSE; +} + +void JxlEncoderInitFrameHeader(JxlFrameHeader* frame_header) { + // For each field, the default value of the specification is used. Depending + // on whether an animation frame, or a composite still blending frame, + // is used, different fields have to be set up by the user after initing + // the frame header. + frame_header->duration = 0; + frame_header->timecode = 0; + frame_header->name_length = 0; + // In the specification, the default value of is_last is !frame_type, and the + // default frame_type is kRegularFrame which has value 0, so is_last is true + // by default. However, the encoder does not use this value (the field exists + // for the decoder to set) since last frame is determined by usage of + // JxlEncoderCloseFrames instead. + frame_header->is_last = JXL_TRUE; + frame_header->layer_info.have_crop = JXL_FALSE; + frame_header->layer_info.crop_x0 = 0; + frame_header->layer_info.crop_y0 = 0; + // These must be set if have_crop is enabled, but the default value has + // have_crop false, and these dimensions 0. The user must set these to the + // desired size after enabling have_crop (which is not yet implemented). + frame_header->layer_info.xsize = 0; + frame_header->layer_info.ysize = 0; + JxlEncoderInitBlendInfo(&frame_header->layer_info.blend_info); + frame_header->layer_info.save_as_reference = 0; +} + +void JxlEncoderInitBlendInfo(JxlBlendInfo* blend_info) { + // Default blend mode in the specification is 0. Note that combining + // blend mode of replace with a duration is not useful, but the user has to + // manually set duration in case of animation, or manually change the blend + // mode in case of composite stills, so initing to a combination that is not + // useful on its own is not an issue. + blend_info->blendmode = JXL_BLEND_REPLACE; + blend_info->source = 0; + blend_info->alpha = 0; + blend_info->clamp = 0; +} + +JxlEncoderStatus JxlEncoderSetBasicInfo(JxlEncoder* enc, + const JxlBasicInfo* info) { + if (!enc->metadata.size.Set(info->xsize, info->ysize)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid dimensions"); + } + if (JXL_ENC_SUCCESS != CheckValidBitdepth(info->bits_per_sample, + info->exponent_bits_per_sample)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid bit depth"); + } + + enc->metadata.m.bit_depth.bits_per_sample = info->bits_per_sample; + enc->metadata.m.bit_depth.exponent_bits_per_sample = + info->exponent_bits_per_sample; + enc->metadata.m.bit_depth.floating_point_sample = + (info->exponent_bits_per_sample != 0u); + enc->metadata.m.modular_16_bit_buffer_sufficient = + (!info->uses_original_profile || info->bits_per_sample <= 12) && + info->alpha_bits <= 12; + if ((info->intrinsic_xsize > 0 || info->intrinsic_ysize > 0) && + (info->intrinsic_xsize != info->xsize || + info->intrinsic_ysize != info->ysize)) { + if (info->intrinsic_xsize > (1ull << 30ull) || + info->intrinsic_ysize > (1ull << 30ull) || + !enc->metadata.m.intrinsic_size.Set(info->intrinsic_xsize, + info->intrinsic_ysize)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Invalid intrinsic dimensions"); + } + enc->metadata.m.have_intrinsic_size = true; + } + + // The number of extra channels includes the alpha channel, so for example and + // RGBA with no other extra channels, has exactly num_extra_channels == 1 + enc->metadata.m.num_extra_channels = info->num_extra_channels; + enc->metadata.m.extra_channel_info.resize(enc->metadata.m.num_extra_channels); + if (info->num_extra_channels == 0 && info->alpha_bits) { + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, + "when alpha_bits is non-zero, the number of channels must be at least " + "1"); + } + // If the user provides non-zero alpha_bits, we make the channel info at index + // zero the appropriate alpha channel. + if (info->alpha_bits) { + JxlExtraChannelInfo channel_info; + JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &channel_info); + channel_info.bits_per_sample = info->alpha_bits; + channel_info.exponent_bits_per_sample = info->alpha_exponent_bits; + if (JxlEncoderSetExtraChannelInfo(enc, 0, &channel_info)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Problem setting extra channel info for alpha"); + } + } + + enc->metadata.m.xyb_encoded = !info->uses_original_profile; + if (info->orientation > 0 && info->orientation <= 8) { + enc->metadata.m.orientation = info->orientation; + } else { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Invalid value for orientation field"); + } + if (info->num_color_channels != 1 && info->num_color_channels != 3) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Invalid number of color channels"); + } + if (info->intensity_target != 0) { + enc->metadata.m.SetIntensityTarget(info->intensity_target); + enc->intensity_target_set = true; + } else if (enc->color_encoding_set) { + // If this is false, JxlEncoderSetColorEncoding will be called later and we + // will get one more chance to call jxl::SetIntensityTarget, after the color + // encoding is indeed set. + jxl::SetIntensityTarget(&enc->metadata.m); + enc->intensity_target_set = true; + } + enc->metadata.m.tone_mapping.min_nits = info->min_nits; + enc->metadata.m.tone_mapping.relative_to_max_display = + info->relative_to_max_display; + enc->metadata.m.tone_mapping.linear_below = info->linear_below; + enc->basic_info = *info; + enc->basic_info_set = true; + + enc->metadata.m.have_animation = info->have_animation; + if (info->have_animation) { + if (info->animation.tps_denominator < 1) { + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, + "If animation is used, tps_denominator must be >= 1"); + } + if (info->animation.tps_numerator < 1) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "If animation is used, tps_numerator must be >= 1"); + } + enc->metadata.m.animation.tps_numerator = info->animation.tps_numerator; + enc->metadata.m.animation.tps_denominator = info->animation.tps_denominator; + enc->metadata.m.animation.num_loops = info->animation.num_loops; + enc->metadata.m.animation.have_timecodes = info->animation.have_timecodes; + } + std::string level_message; + int required_level = VerifyLevelSettings(enc, &level_message); + if (required_level == -1 || + (static_cast(enc->codestream_level) < required_level && + enc->codestream_level != -1)) { + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, "%s", + ("Codestream level verification for level " + + std::to_string(enc->codestream_level) + " failed: " + level_message) + .c_str()); + } + return JXL_ENC_SUCCESS; +} + +void JxlEncoderInitExtraChannelInfo(JxlExtraChannelType type, + JxlExtraChannelInfo* info) { + info->type = type; + info->bits_per_sample = 8; + info->exponent_bits_per_sample = 0; + info->dim_shift = 0; + info->name_length = 0; + info->alpha_premultiplied = JXL_FALSE; + info->spot_color[0] = 0; + info->spot_color[1] = 0; + info->spot_color[2] = 0; + info->spot_color[3] = 0; + info->cfa_channel = 0; +} + +JXL_EXPORT JxlEncoderStatus JxlEncoderSetUpsamplingMode(JxlEncoder* enc, + const int64_t factor, + const int64_t mode) { + // for convenience, allow calling this with factor 1 and just make it a no-op + if (factor == 1) return JXL_ENC_SUCCESS; + if (factor != 2 && factor != 4 && factor != 8) + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Invalid upsampling factor"); + if (mode < -1) + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid upsampling mode"); + if (mode > 1) + return JXL_API_ERROR(enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Unsupported upsampling mode"); + + const size_t count = (factor == 2 ? 15 : (factor == 4 ? 55 : 210)); + auto& td = enc->metadata.transform_data; + float* weights = (factor == 2 ? td.upsampling2_weights + : (factor == 4 ? td.upsampling4_weights + : td.upsampling8_weights)); + if (mode == -1) { + // Default fancy upsampling: don't signal custom weights + enc->metadata.transform_data.custom_weights_mask &= ~(factor >> 1); + } else if (mode == 0) { + // Nearest neighbor upsampling + enc->metadata.transform_data.custom_weights_mask |= (factor >> 1); + memset(weights, 0, sizeof(float) * count); + if (factor == 2) { + weights[9] = 1.f; + } else if (factor == 4) { + for (int i : {19, 24, 49}) weights[i] = 1.f; + } else if (factor == 8) { + for (int i : {39, 44, 49, 54, 119, 124, 129, 174, 179, 204}) { + weights[i] = 1.f; + } + } + } else if (mode == 1) { + // 'Pixel dots' upsampling (nearest-neighbor with cut corners) + JxlEncoderSetUpsamplingMode(enc, factor, 0); + if (factor == 4) { + weights[19] = 0.f; + weights[24] = 0.5f; + } else if (factor == 8) { + for (int i : {39, 44, 49, 119}) weights[i] = 0.f; + for (int i : {54, 124}) weights[i] = 0.5f; + } + } + return JXL_ENC_SUCCESS; +} + +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelInfo( + JxlEncoder* enc, size_t index, const JxlExtraChannelInfo* info) { + if (index >= enc->metadata.m.num_extra_channels) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Invalid value for the index of extra channel"); + } + if (JXL_ENC_SUCCESS != CheckValidBitdepth(info->bits_per_sample, + info->exponent_bits_per_sample)) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, "Invalid bit depth"); + } + + jxl::ExtraChannelInfo& channel = enc->metadata.m.extra_channel_info[index]; + channel.type = static_cast(info->type); + channel.bit_depth.bits_per_sample = info->bits_per_sample; + enc->metadata.m.modular_16_bit_buffer_sufficient &= + info->bits_per_sample <= 12; + channel.bit_depth.exponent_bits_per_sample = info->exponent_bits_per_sample; + channel.bit_depth.floating_point_sample = info->exponent_bits_per_sample != 0; + channel.dim_shift = info->dim_shift; + channel.name = ""; + channel.alpha_associated = (info->alpha_premultiplied != 0); + channel.cfa_channel = info->cfa_channel; + channel.spot_color[0] = info->spot_color[0]; + channel.spot_color[1] = info->spot_color[1]; + channel.spot_color[2] = info->spot_color[2]; + channel.spot_color[3] = info->spot_color[3]; + std::string level_message; + int required_level = VerifyLevelSettings(enc, &level_message); + if (required_level == -1 || + (static_cast(enc->codestream_level) < required_level && + enc->codestream_level != -1)) { + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, "%s", + ("Codestream level verification for level " + + std::to_string(enc->codestream_level) + " failed: " + level_message) + .c_str()); + } + return JXL_ENC_SUCCESS; +} + +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelName(JxlEncoder* enc, + size_t index, + const char* name, + size_t size) { + if (index >= enc->metadata.m.num_extra_channels) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "Invalid value for the index of extra channel"); + } + enc->metadata.m.extra_channel_info[index].name = + std::string(name, name + size); + return JXL_ENC_SUCCESS; +} + +JxlEncoderFrameSettings* JxlEncoderFrameSettingsCreate( + JxlEncoder* enc, const JxlEncoderFrameSettings* source) { + auto opts = jxl::MemoryManagerMakeUnique( + &enc->memory_manager); + if (!opts) return nullptr; + opts->enc = enc; + if (source != nullptr) { + opts->values = source->values; + } else { + opts->values.lossless = false; + } + opts->values.cparams.level = enc->codestream_level; + opts->values.cparams.ec_distance.resize(enc->metadata.m.num_extra_channels, + -1); + + JxlEncoderFrameSettings* ret = opts.get(); + enc->encoder_options.emplace_back(std::move(opts)); + return ret; +} + +JxlEncoderStatus JxlEncoderSetFrameLossless( + JxlEncoderFrameSettings* frame_settings, const JXL_BOOL lossless) { + if (lossless && frame_settings->enc->basic_info_set && + frame_settings->enc->metadata.m.xyb_encoded) { + return JXL_API_ERROR( + frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Set uses_original_profile=true for lossless encoding"); + } + frame_settings->values.lossless = lossless; + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetFrameDistance( + JxlEncoderFrameSettings* frame_settings, float distance) { + if (distance < 0.f || distance > 25.f) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Distance has to be in [0.0..25.0] (corresponding to " + "quality in [0.0..100.0])"); + } + if (distance > 0.f && distance < 0.01f) { + distance = 0.01f; + } + frame_settings->values.cparams.butteraugli_distance = distance; + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetExtraChannelDistance( + JxlEncoderFrameSettings* frame_settings, size_t index, float distance) { + if (index >= frame_settings->enc->metadata.m.num_extra_channels) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Invalid value for the index of extra channel"); + } + if (distance != -1.f && (distance < 0.f || distance > 25.f)) { + return JXL_API_ERROR( + frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Distance has to be -1 or in [0.0..25.0] (corresponding to " + "quality in [0.0..100.0])"); + } + if (distance > 0.f && distance < 0.01f) { + distance = 0.01f; + } + + if (index >= frame_settings->values.cparams.ec_distance.size()) { + // This can only happen if JxlEncoderFrameSettingsCreate() was called before + // JxlEncoderSetBasicInfo(). + frame_settings->values.cparams.ec_distance.resize( + frame_settings->enc->metadata.m.num_extra_channels, -1); + } + + frame_settings->values.cparams.ec_distance[index] = distance; + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderFrameSettingsSetOption( + JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option, + int64_t value) { + // check if value is -1, 0 or 1 for Override-type options + switch (option) { + case JXL_ENC_FRAME_SETTING_NOISE: + case JXL_ENC_FRAME_SETTING_DOTS: + case JXL_ENC_FRAME_SETTING_PATCHES: + case JXL_ENC_FRAME_SETTING_GABORISH: + case JXL_ENC_FRAME_SETTING_MODULAR: + case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE: + case JXL_ENC_FRAME_SETTING_GROUP_ORDER: + case JXL_ENC_FRAME_SETTING_RESPONSIVE: + case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC: + case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC: + case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE: + case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL: + case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES: + if (value < -1 || value > 1) { + return JXL_API_ERROR( + frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be -1 (default), 0 (off) or 1 (on)"); + } + break; + default: + break; + } + + switch (option) { + case JXL_ENC_FRAME_SETTING_EFFORT: + if (frame_settings->enc->allow_expert_options) { + if (value < 1 || value > 10) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Encode effort has to be in [1..10]"); + } + } else { + if (value < 1 || value > 9) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Encode effort has to be in [1..9]"); + } + } + frame_settings->values.cparams.speed_tier = + static_cast(10 - value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_BROTLI_EFFORT: + if (value < -1 || value > 11) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Brotli effort has to be in [-1..11]"); + } + // set cparams for brotli use in JPEG frames + frame_settings->values.cparams.brotli_effort = value; + // set enc option for brotli use in brob boxes + frame_settings->enc->brotli_effort = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_DECODING_SPEED: + if (value < 0 || value > 4) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Decoding speed has to be in [0..4]"); + } + frame_settings->values.cparams.decoding_speed_tier = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_RESAMPLING: + if (value != -1 && value != 1 && value != 2 && value != 4 && value != 8) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Resampling factor has to be 1, 2, 4 or 8"); + } + frame_settings->values.cparams.resampling = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING: + // TODO(lode): the jxl codestream allows choosing a different resampling + // factor for each extra channel, independently per frame. Move this + // option to a JxlEncoderFrameSettings-option that can be set per extra + // channel, so needs its own function rather than + // JxlEncoderFrameSettingsSetOption due to the extra channel index + // argument required. + if (value != -1 && value != 1 && value != 2 && value != 4 && value != 8) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Resampling factor has to be 1, 2, 4 or 8"); + } + frame_settings->values.cparams.ec_resampling = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED: + if (value < 0 || value > 1) { + return JXL_ENC_ERROR; + } + frame_settings->values.cparams.already_downsampled = (value == 1); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_NOISE: + frame_settings->values.cparams.noise = static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_DOTS: + frame_settings->values.cparams.dots = static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_PATCHES: + frame_settings->values.cparams.patches = + static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_EPF: + if (value < -1 || value > 3) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "EPF value has to be in [-1..3]"); + } + frame_settings->values.cparams.epf = static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_GABORISH: + frame_settings->values.cparams.gaborish = + static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_MODULAR: + frame_settings->values.cparams.modular_mode = (value == 1); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE: + frame_settings->values.cparams.keep_invisible = + static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_GROUP_ORDER: + frame_settings->values.cparams.centerfirst = (value == 1); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X: + if (value < -1) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Center x coordinate has to be -1 or positive"); + } + frame_settings->values.cparams.center_x = static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y: + if (value < -1) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Center y coordinate has to be -1 or positive"); + } + frame_settings->values.cparams.center_y = static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_RESPONSIVE: + frame_settings->values.cparams.responsive = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC: + frame_settings->values.cparams.progressive_mode = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC: + frame_settings->values.cparams.qprogressive_mode = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC: + if (value < -1 || value > 2) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Progressive DC has to be in [-1..2]"); + } + frame_settings->values.cparams.progressive_dc = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_PALETTE_COLORS: + if (value < -1 || value > 70913) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..70913]"); + } + if (value == -1) { + frame_settings->values.cparams.palette_colors = 1 << 10; + } else { + frame_settings->values.cparams.palette_colors = value; + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE: + // TODO(lode): the defaults of some palette settings depend on others. + // See the logic in cjxl. Similar for other settings. This should be + // handled in the encoder during JxlEncoderProcessOutput (or, + // alternatively, in the cjxl binary like now) + frame_settings->values.cparams.lossy_palette = (value == 1); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM: + if (value < -1 || value > 2) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..2]"); + } + if (value == -1) { + frame_settings->values.cparams.color_transform = + jxl::ColorTransform::kXYB; + } else { + frame_settings->values.cparams.color_transform = + static_cast(value); + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE: + if (value < -1 || value > 41) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..41]"); + } + frame_settings->values.cparams.colorspace = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE: + if (value < -1 || value > 3) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..3]"); + } + frame_settings->values.cparams.modular_group_size_shift = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR: + if (value < -1 || value > 15) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..15]"); + } + frame_settings->values.cparams.options.predictor = + static_cast(value); + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS: + // The max allowed value can in theory be higher. However, it depends on + // the effort setting. 11 is the highest safe value that doesn't cause + // tree_samples to be >= 64 in the encoder. The specification may allow + // more than this. With more fine tuning higher values could be allowed. + // For N-channel images, the largest useful value is N-1. + if (value < -1 || value > 11) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..11]"); + } + if (value == -1) { + frame_settings->values.cparams.options.max_properties = 0; + } else { + frame_settings->values.cparams.options.max_properties = value; + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL: + if (value == -1) { + frame_settings->values.cparams.force_cfl_jpeg_recompression = true; + } else { + frame_settings->values.cparams.force_cfl_jpeg_recompression = value; + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_INDEX_BOX: + frame_settings->values.frame_index_box = true; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_PHOTON_NOISE: + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Float option, try setting it with " + "JxlEncoderFrameSettingsSetFloatOption"); + case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES: + frame_settings->values.cparams.jpeg_compress_boxes = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_BUFFERING: + if (value < 0 || value > 3) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Buffering has to be in [0..3]"); + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF: + frame_settings->values.cparams.jpeg_keep_exif = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP: + frame_settings->values.cparams.jpeg_keep_xmp = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF: + frame_settings->values.cparams.jpeg_keep_jumbf = value; + return JXL_ENC_SUCCESS; + + default: + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Unknown option"); + } +} + +JxlEncoderStatus JxlEncoderFrameSettingsSetFloatOption( + JxlEncoderFrameSettings* frame_settings, JxlEncoderFrameSettingId option, + float value) { + switch (option) { + case JXL_ENC_FRAME_SETTING_PHOTON_NOISE: + if (value < 0) return JXL_ENC_ERROR; + // TODO(lode): add encoder setting to set the 8 floating point values of + // the noise synthesis parameters per frame for more fine grained control. + frame_settings->values.cparams.photon_noise_iso = value; + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT: + if (value < -1.f || value > 100.f) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be smaller than 100"); + } + // This value is called "iterations" or "nb_repeats" in cjxl, but is in + // fact a fraction in range 0.0-1.0, with the default value 0.5. + // Convert from floating point percentage to floating point fraction here. + if (value < -.5f) { + // TODO(lode): for this and many other settings (also in + // JxlEncoderFrameSettingsSetOption), avoid duplicating the default + // values here and in enc_params.h and options.h, have one location + // where the defaults are specified. + frame_settings->values.cparams.options.nb_repeats = 0.5f; + } else { + frame_settings->values.cparams.options.nb_repeats = value * 0.01f; + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT: + if (value < -1.f || value > 100.f) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..100]"); + } + if (value < -.5f) { + frame_settings->values.cparams.channel_colors_pre_transform_percent = + 95.0f; + } else { + frame_settings->values.cparams.channel_colors_pre_transform_percent = + value; + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT: + if (value < -1.f || value > 100.f) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Option value has to be in [-1..100]"); + } + if (value < -.5f) { + frame_settings->values.cparams.channel_colors_percent = 80.0f; + } else { + frame_settings->values.cparams.channel_colors_percent = value; + } + return JXL_ENC_SUCCESS; + case JXL_ENC_FRAME_SETTING_EFFORT: + case JXL_ENC_FRAME_SETTING_DECODING_SPEED: + case JXL_ENC_FRAME_SETTING_RESAMPLING: + case JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING: + case JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED: + case JXL_ENC_FRAME_SETTING_NOISE: + case JXL_ENC_FRAME_SETTING_DOTS: + case JXL_ENC_FRAME_SETTING_PATCHES: + case JXL_ENC_FRAME_SETTING_EPF: + case JXL_ENC_FRAME_SETTING_GABORISH: + case JXL_ENC_FRAME_SETTING_MODULAR: + case JXL_ENC_FRAME_SETTING_KEEP_INVISIBLE: + case JXL_ENC_FRAME_SETTING_GROUP_ORDER: + case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X: + case JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_Y: + case JXL_ENC_FRAME_SETTING_RESPONSIVE: + case JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC: + case JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC: + case JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC: + case JXL_ENC_FRAME_SETTING_PALETTE_COLORS: + case JXL_ENC_FRAME_SETTING_LOSSY_PALETTE: + case JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM: + case JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE: + case JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE: + case JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR: + case JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS: + case JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL: + case JXL_ENC_FRAME_INDEX_BOX: + case JXL_ENC_FRAME_SETTING_BROTLI_EFFORT: + case JXL_ENC_FRAME_SETTING_FILL_ENUM: + case JXL_ENC_FRAME_SETTING_JPEG_COMPRESS_BOXES: + case JXL_ENC_FRAME_SETTING_BUFFERING: + case JXL_ENC_FRAME_SETTING_JPEG_KEEP_EXIF: + case JXL_ENC_FRAME_SETTING_JPEG_KEEP_XMP: + case JXL_ENC_FRAME_SETTING_JPEG_KEEP_JUMBF: + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Int option, try setting it with " + "JxlEncoderFrameSettingsSetOption"); + default: + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_NOT_SUPPORTED, + "Unknown option"); + } +} +JxlEncoder* JxlEncoderCreate(const JxlMemoryManager* memory_manager) { + JxlMemoryManager local_memory_manager; + if (!jxl::MemoryManagerInit(&local_memory_manager, memory_manager)) { + return nullptr; + } + + void* alloc = + jxl::MemoryManagerAlloc(&local_memory_manager, sizeof(JxlEncoder)); + if (!alloc) return nullptr; + JxlEncoder* enc = new (alloc) JxlEncoder(); + enc->memory_manager = local_memory_manager; + // TODO(sboukortt): add an API function to set this. + enc->cms = jxl::GetJxlCms(); + enc->cms_set = true; + + // Initialize all the field values. + JxlEncoderReset(enc); + + return enc; +} + +void JxlEncoderReset(JxlEncoder* enc) { + enc->thread_pool.reset(); + enc->input_queue.clear(); + enc->num_queued_frames = 0; + enc->num_queued_boxes = 0; + enc->encoder_options.clear(); + enc->output_byte_queue.clear(); + enc->output_fast_frame_queue.clear(); + enc->codestream_bytes_written_beginning_of_frame = 0; + enc->codestream_bytes_written_end_of_frame = 0; + enc->wrote_bytes = false; + enc->jxlp_counter = 0; + enc->metadata = jxl::CodecMetadata(); + enc->last_used_cparams = jxl::CompressParams(); + enc->frames_closed = false; + enc->boxes_closed = false; + enc->basic_info_set = false; + enc->color_encoding_set = false; + enc->intensity_target_set = false; + enc->use_container = false; + enc->use_boxes = false; + enc->codestream_level = -1; + JxlEncoderInitBasicInfo(&enc->basic_info); +} + +void JxlEncoderDestroy(JxlEncoder* enc) { + if (enc) { + JxlMemoryManager local_memory_manager = enc->memory_manager; + // Call destructor directly since custom free function is used. + enc->~JxlEncoder(); + jxl::MemoryManagerFree(&local_memory_manager, enc); + } +} + +JxlEncoderError JxlEncoderGetError(JxlEncoder* enc) { return enc->error; } + +JxlEncoderStatus JxlEncoderUseContainer(JxlEncoder* enc, + JXL_BOOL use_container) { + if (enc->wrote_bytes) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "this setting can only be set at the beginning"); + } + enc->use_container = static_cast(use_container); + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderStoreJPEGMetadata(JxlEncoder* enc, + JXL_BOOL store_jpeg_metadata) { + if (enc->wrote_bytes) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "this setting can only be set at the beginning"); + } + enc->store_jpeg_metadata = static_cast(store_jpeg_metadata); + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetCodestreamLevel(JxlEncoder* enc, int level) { + if (level != -1 && level != 5 && level != 10) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_NOT_SUPPORTED, "invalid level"); + } + if (enc->wrote_bytes) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "this setting can only be set at the beginning"); + } + enc->codestream_level = level; + return JXL_ENC_SUCCESS; +} + +int JxlEncoderGetRequiredCodestreamLevel(const JxlEncoder* enc) { + return VerifyLevelSettings(enc, nullptr); +} + +void JxlEncoderSetCms(JxlEncoder* enc, JxlCmsInterface cms) { + jxl::msan::MemoryIsInitialized(&cms, sizeof(cms)); + enc->cms = cms; + enc->cms_set = true; +} + +JxlEncoderStatus JxlEncoderSetParallelRunner(JxlEncoder* enc, + JxlParallelRunner parallel_runner, + void* parallel_runner_opaque) { + if (enc->thread_pool) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "parallel runner already set"); + } + enc->thread_pool = jxl::MemoryManagerMakeUnique( + &enc->memory_manager, parallel_runner, parallel_runner_opaque); + if (!enc->thread_pool) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_GENERIC, + "error setting parallel runner"); + } + return JXL_ENC_SUCCESS; +} + +namespace { +JxlEncoderStatus GetCurrentDimensions( + const JxlEncoderFrameSettings* frame_settings, size_t& xsize, + size_t& ysize) { + xsize = frame_settings->enc->metadata.xsize(); + ysize = frame_settings->enc->metadata.ysize(); + if (frame_settings->values.header.layer_info.have_crop) { + xsize = frame_settings->values.header.layer_info.xsize; + ysize = frame_settings->values.header.layer_info.ysize; + } + if (frame_settings->values.cparams.already_downsampled) { + size_t factor = frame_settings->values.cparams.resampling; + xsize = jxl::DivCeil(xsize, factor); + ysize = jxl::DivCeil(ysize, factor); + } + if (xsize == 0 || ysize == 0) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "zero-sized frame is not allowed"); + } + return JXL_ENC_SUCCESS; +} +} // namespace + +JxlEncoderStatus JxlEncoderAddJPEGFrame( + const JxlEncoderFrameSettings* frame_settings, const uint8_t* buffer, + size_t size) { + if (frame_settings->enc->frames_closed) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Frame input is already closed"); + } + + jxl::CodecInOut io; + if (!jxl::jpeg::DecodeImageJPG(jxl::Span(buffer, size), &io)) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_BAD_INPUT, + "Error during decode of input JPEG"); + } + + if (!frame_settings->enc->color_encoding_set) { + if (!SetColorEncodingFromJpegData( + *io.Main().jpeg_data, + &frame_settings->enc->metadata.m.color_encoding)) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_BAD_INPUT, + "Error in input JPEG color space"); + } + } + + if (!frame_settings->enc->basic_info_set) { + JxlBasicInfo basic_info; + JxlEncoderInitBasicInfo(&basic_info); + basic_info.xsize = io.Main().jpeg_data->width; + basic_info.ysize = io.Main().jpeg_data->height; + basic_info.uses_original_profile = true; + if (JxlEncoderSetBasicInfo(frame_settings->enc, &basic_info) != + JXL_ENC_SUCCESS) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "Error setting basic info"); + } + } + + if (frame_settings->enc->metadata.m.xyb_encoded) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Can't XYB encode a lossless JPEG"); + } + if (!io.blobs.exif.empty()) { + JxlOrientation orientation = static_cast( + frame_settings->enc->metadata.m.orientation); + jxl::InterpretExif(io.blobs.exif, &orientation); + frame_settings->enc->metadata.m.orientation = orientation; + } + if (!io.blobs.exif.empty() && frame_settings->values.cparams.jpeg_keep_exif) { + size_t exif_size = io.blobs.exif.size(); + // Exif data in JPEG is limited to 64k + if (exif_size > 0xFFFF) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "Exif larger than possible in JPEG?"); + } + exif_size += 4; // prefix 4 zero bytes for tiff offset + std::vector exif(exif_size); + memcpy(exif.data() + 4, io.blobs.exif.data(), io.blobs.exif.size()); + JxlEncoderUseBoxes(frame_settings->enc); + JxlEncoderAddBox(frame_settings->enc, "Exif", exif.data(), exif_size, + frame_settings->values.cparams.jpeg_compress_boxes); + } + if (!io.blobs.xmp.empty() && frame_settings->values.cparams.jpeg_keep_xmp) { + JxlEncoderUseBoxes(frame_settings->enc); + JxlEncoderAddBox(frame_settings->enc, "xml ", io.blobs.xmp.data(), + io.blobs.xmp.size(), + frame_settings->values.cparams.jpeg_compress_boxes); + } + if (!io.blobs.jumbf.empty() && + frame_settings->values.cparams.jpeg_keep_jumbf) { + JxlEncoderUseBoxes(frame_settings->enc); + JxlEncoderAddBox(frame_settings->enc, "jumb", io.blobs.jumbf.data(), + io.blobs.jumbf.size(), + frame_settings->values.cparams.jpeg_compress_boxes); + } + if (frame_settings->enc->store_jpeg_metadata) { + if (!frame_settings->values.cparams.jpeg_keep_exif || + !frame_settings->values.cparams.jpeg_keep_xmp) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Need to preserve EXIF and XMP to allow JPEG " + "bitstream reconstruction"); + } + jxl::jpeg::JPEGData data_in = *io.Main().jpeg_data; + jxl::PaddedBytes jpeg_data; + if (!jxl::jpeg::EncodeJPEGData(data_in, &jpeg_data, + frame_settings->values.cparams)) { + return JXL_API_ERROR( + frame_settings->enc, JXL_ENC_ERR_JBRD, + "JPEG bitstream reconstruction data cannot be encoded"); + } + frame_settings->enc->jpeg_metadata = std::vector( + jpeg_data.data(), jpeg_data.data() + jpeg_data.size()); + } + + auto queued_frame = jxl::MemoryManagerMakeUnique( + &frame_settings->enc->memory_manager, + // JxlEncoderQueuedFrame is a struct with no constructors, so we use the + // default move constructor there. + jxl::JxlEncoderQueuedFrame{ + frame_settings->values, + jxl::ImageBundle(&frame_settings->enc->metadata.m), + {}}); + if (!queued_frame) { + // TODO(jon): when can this happen? is this an API usage error? + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "No frame queued?"); + } + queued_frame->frame.SetFromImage(std::move(*io.Main().color()), + io.Main().c_current()); + size_t xsize, ysize; + if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "bad dimensions"); + } + if (xsize != static_cast(io.Main().jpeg_data->width) || + ysize != static_cast(io.Main().jpeg_data->height)) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "JPEG dimensions don't match frame dimensions"); + } + std::vector extra_channels( + frame_settings->enc->metadata.m.num_extra_channels); + for (auto& extra_channel : extra_channels) { + extra_channel = jxl::ImageF(xsize, ysize); + queued_frame->ec_initialized.push_back(0); + } + queued_frame->frame.SetExtraChannels(std::move(extra_channels)); + queued_frame->frame.jpeg_data = std::move(io.Main().jpeg_data); + queued_frame->frame.color_transform = io.Main().color_transform; + queued_frame->frame.chroma_subsampling = io.Main().chroma_subsampling; + + QueueFrame(frame_settings, queued_frame); + return JXL_ENC_SUCCESS; +} + +static bool CanDoFastLossless(const JxlEncoderFrameSettings* frame_settings, + const JxlPixelFormat* pixel_format, + bool has_alpha) { + if (!frame_settings->values.lossless) { + return false; + } + // TODO(veluca): many of the following options could be made to work, but are + // just not implemented in FJXL's frame header handling yet. + if (frame_settings->values.frame_index_box) { + return false; + } + if (frame_settings->values.header.layer_info.have_crop) { + return false; + } + if (frame_settings->enc->metadata.m.have_animation) { + return false; + } + if (frame_settings->values.cparams.speed_tier != jxl::SpeedTier::kLightning) { + return false; + } + if (frame_settings->values.image_bit_depth.type == + JxlBitDepthType::JXL_BIT_DEPTH_CUSTOM && + frame_settings->values.image_bit_depth.bits_per_sample != + frame_settings->enc->metadata.m.bit_depth.bits_per_sample) { + return false; + } + // TODO(veluca): implement support for LSB-padded input in fast_lossless. + if (frame_settings->values.image_bit_depth.type == + JxlBitDepthType::JXL_BIT_DEPTH_FROM_PIXEL_FORMAT && + frame_settings->values.image_bit_depth.bits_per_sample % 8 != 0) { + return false; + } + if (!frame_settings->values.frame_name.empty()) { + return false; + } + // No extra channels other than alpha. + if (!(has_alpha && frame_settings->enc->metadata.m.num_extra_channels == 1) && + frame_settings->enc->metadata.m.num_extra_channels != 0) { + return false; + } + if (frame_settings->enc->metadata.m.bit_depth.bits_per_sample > 16) { + return false; + } + if (pixel_format->data_type != JxlDataType::JXL_TYPE_FLOAT16 && + pixel_format->data_type != JxlDataType::JXL_TYPE_UINT16 && + pixel_format->data_type != JxlDataType::JXL_TYPE_UINT8) { + return false; + } + if ((frame_settings->enc->metadata.m.bit_depth.bits_per_sample > 8) != + (pixel_format->data_type == JxlDataType::JXL_TYPE_UINT16 || + pixel_format->data_type == JxlDataType::JXL_TYPE_FLOAT16)) { + return false; + } + if (!((pixel_format->num_channels == 1 || pixel_format->num_channels == 3) && + !has_alpha) && + !((pixel_format->num_channels == 2 || pixel_format->num_channels == 4) && + has_alpha)) { + return false; + } + + return true; +} + +JxlEncoderStatus JxlEncoderAddImageFrame( + const JxlEncoderFrameSettings* frame_settings, + const JxlPixelFormat* pixel_format, const void* buffer, size_t size) { + if (!frame_settings->enc->basic_info_set || + (!frame_settings->enc->color_encoding_set && + !frame_settings->enc->metadata.m.xyb_encoded)) { + // Basic Info must be set, and color encoding must be set directly, + // or set to XYB via JxlBasicInfo.uses_original_profile = JXL_FALSE + // Otherwise, this is an API misuse. + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Basic info or color encoding not set yet"); + } + + if (frame_settings->enc->frames_closed) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Frame input already closed"); + } + if (pixel_format->num_channels < 3) { + if (frame_settings->enc->basic_info.num_color_channels != 1) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Grayscale pixel format input for an RGB image"); + } + } else { + if (frame_settings->enc->basic_info.num_color_channels != 3) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "RGB pixel format input for a grayscale image"); + } + } + + bool has_alpha = frame_settings->enc->metadata.m.HasAlpha(); + + size_t xsize, ysize; + if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "bad dimensions"); + } + + // All required conditions to do fast-lossless. + if (CanDoFastLossless(frame_settings, pixel_format, has_alpha)) { + const size_t bytes_per_pixel = + pixel_format->data_type == JxlDataType::JXL_TYPE_UINT8 + ? pixel_format->num_channels + : pixel_format->num_channels * 2; + const size_t last_row_size = xsize * bytes_per_pixel; + const size_t align = pixel_format->align; + const size_t row_size = + (align > 1 ? jxl::DivCeil(last_row_size, align) * align + : last_row_size); + const size_t bytes_to_read = row_size * (ysize - 1) + last_row_size; + if (bytes_to_read > size) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "provided image buffer too small"); + } + const bool big_endian = + pixel_format->endianness == JXL_BIG_ENDIAN || + (pixel_format->endianness == JXL_NATIVE_ENDIAN && !IsLittleEndian()); + + auto runner = +[](void* void_pool, void* opaque, void fun(void*, size_t), + size_t count) { + auto* pool = reinterpret_cast(void_pool); + JXL_CHECK(jxl::RunOnPool( + pool, 0, count, jxl::ThreadPool::NoInit, + [&](size_t i, size_t) { fun(opaque, i); }, "Encode fast lossless")); + }; + QueueFastLosslessFrame( + frame_settings, + JxlFastLosslessPrepareFrame( + reinterpret_cast(buffer), xsize, row_size, + ysize, pixel_format->num_channels, + frame_settings->enc->metadata.m.bit_depth.bits_per_sample, + big_endian, /*effort=*/2, frame_settings->enc->thread_pool.get(), + runner)); + return JXL_ENC_SUCCESS; + } + + auto queued_frame = jxl::MemoryManagerMakeUnique( + &frame_settings->enc->memory_manager, + // JxlEncoderQueuedFrame is a struct with no constructors, so we use the + // default move constructor there. + jxl::JxlEncoderQueuedFrame{ + frame_settings->values, + jxl::ImageBundle(&frame_settings->enc->metadata.m), + {}}); + + if (!queued_frame) { + // TODO(jon): when can this happen? is this an API usage error? + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "No frame queued?"); + } + + jxl::ColorEncoding c_current; + if (!frame_settings->enc->color_encoding_set) { + if ((pixel_format->data_type == JXL_TYPE_FLOAT) || + (pixel_format->data_type == JXL_TYPE_FLOAT16)) { + c_current = + jxl::ColorEncoding::LinearSRGB(pixel_format->num_channels < 3); + } else { + c_current = jxl::ColorEncoding::SRGB(pixel_format->num_channels < 3); + } + } else { + c_current = frame_settings->enc->metadata.m.color_encoding; + } + uint32_t num_channels = pixel_format->num_channels; + size_t has_interleaved_alpha = + static_cast(num_channels == 2 || num_channels == 4); + if (has_interleaved_alpha > + frame_settings->enc->metadata.m.num_extra_channels) { + return JXL_API_ERROR( + frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "number of extra channels mismatch (need 1 extra channel for alpha)"); + } + std::vector extra_channels( + frame_settings->enc->metadata.m.num_extra_channels); + for (auto& extra_channel : extra_channels) { + extra_channel = jxl::ImageF(xsize, ysize); + } + queued_frame->frame.SetExtraChannels(std::move(extra_channels)); + for (auto& ec_info : frame_settings->enc->metadata.m.extra_channel_info) { + if (has_interleaved_alpha && ec_info.type == jxl::ExtraChannel::kAlpha) { + queued_frame->ec_initialized.push_back(1); + has_interleaved_alpha = 0; // only first Alpha is initialized + } else { + queued_frame->ec_initialized.push_back(0); + } + } + queued_frame->frame.origin.x0 = + frame_settings->values.header.layer_info.crop_x0; + queued_frame->frame.origin.y0 = + frame_settings->values.header.layer_info.crop_y0; + queued_frame->frame.use_for_next_frame = + (frame_settings->values.header.layer_info.save_as_reference != 0u); + queued_frame->frame.blendmode = + frame_settings->values.header.layer_info.blend_info.blendmode == + JXL_BLEND_REPLACE + ? jxl::BlendMode::kReplace + : jxl::BlendMode::kBlend; + queued_frame->frame.blend = + frame_settings->values.header.layer_info.blend_info.source > 0; + + if (JXL_ENC_SUCCESS != + VerifyInputBitDepth(frame_settings->values.image_bit_depth, + *pixel_format)) { + return JXL_API_ERROR_NOSET("Invalid input bit depth"); + } + size_t bits_per_sample = + GetBitDepth(frame_settings->values.image_bit_depth, + frame_settings->enc->metadata.m, *pixel_format); + const uint8_t* uint8_buffer = reinterpret_cast(buffer); + if (!jxl::ConvertFromExternal( + jxl::Span(uint8_buffer, size), xsize, ysize, c_current, + bits_per_sample, *pixel_format, + frame_settings->enc->thread_pool.get(), &(queued_frame->frame))) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Invalid input buffer"); + } + if (frame_settings->values.lossless && + frame_settings->enc->metadata.m.xyb_encoded) { + return JXL_API_ERROR( + frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Set uses_original_profile=true for lossless encoding"); + } + queued_frame->option_values.cparams.level = + frame_settings->enc->codestream_level; + + QueueFrame(frame_settings, queued_frame); + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderUseBoxes(JxlEncoder* enc) { + if (enc->wrote_bytes) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "this setting can only be set at the beginning"); + } + enc->use_boxes = true; + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderAddBox(JxlEncoder* enc, const JxlBoxType type, + const uint8_t* contents, size_t size, + JXL_BOOL compress_box) { + if (!enc->use_boxes) { + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, + "must set JxlEncoderUseBoxes at the beginning to add boxes"); + } + if (compress_box) { + if (memcmp("jxl", type, 3) == 0) { + return JXL_API_ERROR( + enc, JXL_ENC_ERR_API_USAGE, + "brob box may not contain a type starting with \"jxl\""); + } + if (memcmp("jbrd", type, 4) == 0) { + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "jbrd box may not be brob compressed"); + } + if (memcmp("brob", type, 4) == 0) { + // The compress_box will compress an existing non-brob box into a brob + // box. If already giving a valid brotli-compressed brob box, set + // compress_box to false since it is already compressed. + return JXL_API_ERROR(enc, JXL_ENC_ERR_API_USAGE, + "a brob box cannot contain another brob box"); + } + } + + auto box = jxl::MemoryManagerMakeUnique( + &enc->memory_manager); + + box->type = jxl::MakeBoxType(type); + box->contents.assign(contents, contents + size); + box->compress_box = !!compress_box; + QueueBox(enc, box); + return JXL_ENC_SUCCESS; +} + +JXL_EXPORT JxlEncoderStatus JxlEncoderSetExtraChannelBuffer( + const JxlEncoderFrameSettings* frame_settings, + const JxlPixelFormat* pixel_format, const void* buffer, size_t size, + uint32_t index) { + if (index >= frame_settings->enc->metadata.m.num_extra_channels) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Invalid value for the index of extra channel"); + } + if (!frame_settings->enc->basic_info_set || + !frame_settings->enc->color_encoding_set) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Basic info has to be set first"); + } + if (frame_settings->enc->input_queue.empty()) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "First add image frame, then extra channels"); + } + if (frame_settings->enc->frames_closed) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Frame input already closed"); + } + size_t xsize, ysize; + if (GetCurrentDimensions(frame_settings, xsize, ysize) != JXL_ENC_SUCCESS) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_GENERIC, + "bad dimensions"); + } + JxlPixelFormat ec_format = *pixel_format; + ec_format.num_channels = 1; + if (JXL_ENC_SUCCESS != + VerifyInputBitDepth(frame_settings->values.image_bit_depth, ec_format)) { + return JXL_API_ERROR_NOSET("Invalid input bit depth"); + } + size_t bits_per_sample = GetBitDepth( + frame_settings->values.image_bit_depth, + frame_settings->enc->metadata.m.extra_channel_info[index], ec_format); + const uint8_t* uint8_buffer = reinterpret_cast(buffer); + auto queued_frame = frame_settings->enc->input_queue.back().frame.get(); + if (!jxl::ConvertFromExternal(jxl::Span(uint8_buffer, size), + xsize, ysize, bits_per_sample, ec_format, 0, + frame_settings->enc->thread_pool.get(), + &queued_frame->frame.extra_channels()[index])) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Failed to set buffer for extra channel"); + } + queued_frame->ec_initialized[index] = 1; + + return JXL_ENC_SUCCESS; +} + +void JxlEncoderCloseFrames(JxlEncoder* enc) { enc->frames_closed = true; } + +void JxlEncoderCloseBoxes(JxlEncoder* enc) { enc->boxes_closed = true; } + +void JxlEncoderCloseInput(JxlEncoder* enc) { + JxlEncoderCloseFrames(enc); + JxlEncoderCloseBoxes(enc); +} +JxlEncoderStatus JxlEncoderProcessOutput(JxlEncoder* enc, uint8_t** next_out, + size_t* avail_out) { + while (*avail_out >= 32 && + (!enc->output_byte_queue.empty() || + !enc->output_fast_frame_queue.empty() || !enc->input_queue.empty())) { + if (!enc->output_byte_queue.empty()) { + size_t to_copy = std::min(*avail_out, enc->output_byte_queue.size()); + std::copy_n(enc->output_byte_queue.begin(), to_copy, *next_out); + *next_out += to_copy; + *avail_out -= to_copy; + enc->output_byte_queue.erase(enc->output_byte_queue.begin(), + enc->output_byte_queue.begin() + to_copy); + } else if (!enc->output_fast_frame_queue.empty()) { + size_t count = JxlFastLosslessWriteOutput( + enc->output_fast_frame_queue.front().get(), *next_out, *avail_out); + *next_out += count; + *avail_out -= count; + if (count == 0) { + enc->output_fast_frame_queue.pop_front(); + } + + } else if (!enc->input_queue.empty()) { + if (enc->RefillOutputByteQueue() != JXL_ENC_SUCCESS) { + return JXL_ENC_ERROR; + } + } + } + + if (!enc->output_byte_queue.empty() || + !enc->output_fast_frame_queue.empty() || !enc->input_queue.empty()) { + return JXL_ENC_NEED_MORE_OUTPUT; + } + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetFrameHeader( + JxlEncoderFrameSettings* frame_settings, + const JxlFrameHeader* frame_header) { + if (frame_header->layer_info.blend_info.source > 3) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "invalid blending source index"); + } + // If there are no extra channels, it's ok for the value to be 0. + if (frame_header->layer_info.blend_info.alpha != 0 && + frame_header->layer_info.blend_info.alpha >= + frame_settings->enc->metadata.m.extra_channel_info.size()) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "alpha blend channel index out of bounds"); + } + + frame_settings->values.header = *frame_header; + // Setting the frame header resets the frame name, it must be set again with + // JxlEncoderSetFrameName if desired. + frame_settings->values.frame_name = ""; + + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetExtraChannelBlendInfo( + JxlEncoderFrameSettings* frame_settings, size_t index, + const JxlBlendInfo* blend_info) { + if (index >= frame_settings->enc->metadata.m.num_extra_channels) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "Invalid value for the index of extra channel"); + } + + if (frame_settings->values.extra_channel_blend_info.size() != + frame_settings->enc->metadata.m.num_extra_channels) { + JxlBlendInfo default_blend_info; + JxlEncoderInitBlendInfo(&default_blend_info); + frame_settings->values.extra_channel_blend_info.resize( + frame_settings->enc->metadata.m.num_extra_channels, default_blend_info); + } + frame_settings->values.extra_channel_blend_info[index] = *blend_info; + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetFrameName(JxlEncoderFrameSettings* frame_settings, + const char* frame_name) { + std::string str = frame_name ? frame_name : ""; + if (str.size() > 1071) { + return JXL_API_ERROR(frame_settings->enc, JXL_ENC_ERR_API_USAGE, + "frame name can be max 1071 bytes long"); + } + frame_settings->values.frame_name = str; + frame_settings->values.header.name_length = str.size(); + return JXL_ENC_SUCCESS; +} + +JxlEncoderStatus JxlEncoderSetFrameBitDepth( + JxlEncoderFrameSettings* frame_settings, const JxlBitDepth* bit_depth) { + if (bit_depth->type != JXL_BIT_DEPTH_FROM_PIXEL_FORMAT && + bit_depth->type != JXL_BIT_DEPTH_FROM_CODESTREAM) { + return JXL_API_ERROR_NOSET( + "Only JXL_BIT_DEPTH_FROM_PIXEL_FORMAT and " + "JXL_BIT_DEPTH_FROM_CODESTREAM is implemented " + "for input buffers."); + } + frame_settings->values.image_bit_depth = *bit_depth; + return JXL_ENC_SUCCESS; +} + +void JxlColorEncodingSetToSRGB(JxlColorEncoding* color_encoding, + JXL_BOOL is_gray) { + ConvertInternalToExternalColorEncoding(jxl::ColorEncoding::SRGB(is_gray), + color_encoding); +} + +void JxlColorEncodingSetToLinearSRGB(JxlColorEncoding* color_encoding, + JXL_BOOL is_gray) { + ConvertInternalToExternalColorEncoding( + jxl::ColorEncoding::LinearSRGB(is_gray), color_encoding); +} + +void JxlEncoderAllowExpertOptions(JxlEncoder* enc) { + enc->allow_expert_options = true; +} + +JXL_EXPORT void JxlEncoderSetDebugImageCallback( + JxlEncoderFrameSettings* frame_settings, JxlDebugImageCallback callback, + void* opaque) { + frame_settings->values.cparams.debug_image = callback; + frame_settings->values.cparams.debug_image_opaque = opaque; +} + +JXL_EXPORT JxlEncoderStats* JxlEncoderStatsCreate() { + return new JxlEncoderStats(); +} + +JXL_EXPORT void JxlEncoderStatsDestroy(JxlEncoderStats* stats) { + if (stats) delete stats; +} + +JXL_EXPORT void JxlEncoderCollectStats(JxlEncoderFrameSettings* frame_settings, + JxlEncoderStats* stats) { + if (!stats) return; + frame_settings->values.aux_out = &stats->aux_out; +} + +JXL_EXPORT size_t JxlEncoderStatsGet(const JxlEncoderStats* stats, + JxlEncoderStatsKey key) { + if (!stats) return 0; + const jxl::AuxOut& aux_out = stats->aux_out; + switch (key) { + case JXL_ENC_STAT_HEADER_BITS: + return aux_out.layers[jxl::kLayerHeader].total_bits; + case JXL_ENC_STAT_TOC_BITS: + return aux_out.layers[jxl::kLayerTOC].total_bits; + case JXL_ENC_STAT_DICTIONARY_BITS: + return aux_out.layers[jxl::kLayerDictionary].total_bits; + case JXL_ENC_STAT_SPLINES_BITS: + return aux_out.layers[jxl::kLayerSplines].total_bits; + case JXL_ENC_STAT_NOISE_BITS: + return aux_out.layers[jxl::kLayerNoise].total_bits; + case JXL_ENC_STAT_QUANT_BITS: + return aux_out.layers[jxl::kLayerQuant].total_bits; + case JXL_ENC_STAT_MODULAR_TREE_BITS: + return aux_out.layers[jxl::kLayerModularTree].total_bits; + case JXL_ENC_STAT_MODULAR_GLOBAL_BITS: + return aux_out.layers[jxl::kLayerModularGlobal].total_bits; + case JXL_ENC_STAT_DC_BITS: + return aux_out.layers[jxl::kLayerDC].total_bits; + case JXL_ENC_STAT_MODULAR_DC_GROUP_BITS: + return aux_out.layers[jxl::kLayerModularDcGroup].total_bits; + case JXL_ENC_STAT_CONTROL_FIELDS_BITS: + return aux_out.layers[jxl::kLayerControlFields].total_bits; + case JXL_ENC_STAT_COEF_ORDER_BITS: + return aux_out.layers[jxl::kLayerOrder].total_bits; + case JXL_ENC_STAT_AC_HISTOGRAM_BITS: + return aux_out.layers[jxl::kLayerAC].total_bits; + case JXL_ENC_STAT_AC_BITS: + return aux_out.layers[jxl::kLayerACTokens].total_bits; + case JXL_ENC_STAT_MODULAR_AC_GROUP_BITS: + return aux_out.layers[jxl::kLayerModularAcGroup].total_bits; + case JXL_ENC_STAT_NUM_SMALL_BLOCKS: + return aux_out.num_small_blocks; + case JXL_ENC_STAT_NUM_DCT4X8_BLOCKS: + return aux_out.num_dct4x8_blocks; + case JXL_ENC_STAT_NUM_AFV_BLOCKS: + return aux_out.num_afv_blocks; + case JXL_ENC_STAT_NUM_DCT8_BLOCKS: + return aux_out.num_dct8_blocks; + case JXL_ENC_STAT_NUM_DCT8X32_BLOCKS: + return aux_out.num_dct16_blocks; + case JXL_ENC_STAT_NUM_DCT16_BLOCKS: + return aux_out.num_dct16x32_blocks; + case JXL_ENC_STAT_NUM_DCT16X32_BLOCKS: + return aux_out.num_dct32_blocks; + case JXL_ENC_STAT_NUM_DCT32_BLOCKS: + return aux_out.num_dct32x64_blocks; + case JXL_ENC_STAT_NUM_DCT32X64_BLOCKS: + return aux_out.num_dct32x64_blocks; + case JXL_ENC_STAT_NUM_DCT64_BLOCKS: + return aux_out.num_dct64_blocks; + case JXL_ENC_STAT_NUM_BUTTERAUGLI_ITERS: + return aux_out.num_butteraugli_iters; + default: + return 0; + } +} + +JXL_EXPORT void JxlEncoderStatsMerge(JxlEncoderStats* stats, + const JxlEncoderStats* other) { + if (!stats || !other) return; + stats->aux_out.Assimilate(other->aux_out); +} diff --git a/third-party/libjxl/libjxl/lib/jxl/encode_internal.h b/third-party/libjxl/libjxl/lib/jxl/encode_internal.h new file mode 100644 index 0000000000..62096345d7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/encode_internal.h @@ -0,0 +1,282 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +#ifndef LIB_JXL_ENCODE_INTERNAL_H_ +#define LIB_JXL_ENCODE_INTERNAL_H_ + +#include +#include +#include +#include + +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_fast_lossless.h" +#include "lib/jxl/enc_frame.h" +#include "lib/jxl/memory_manager_internal.h" + +namespace jxl { + +/* Frame index box 'jxli' will start with Varint() for +NF: has type Varint(): number of frames listed in the index. +TNUM: has type u32: numerator of tick unit. +TDEN: has type u32: denominator of tick unit. Value 0 means the file is +ill-formed. per frame i listed: OFFi: has type Varint(): offset of start byte of +this frame compared to start byte of previous frame from this index in the JPEG +XL codestream. For the first frame, this is the offset from the first byte of +the JPEG XL codestream. Ti: has type Varint(): duration in ticks between the +start of this frame and the start of the next frame in the index. If this is the +last frame in the index, this is the duration in ticks between the start of this +frame and the end of the stream. A tick lasts TNUM / TDEN seconds. Fi: has type +Varint(): amount of frames the next frame in the index occurs after this frame. +If this is the last frame in the index, this is the amount of frames after this +frame in the remainder of the stream. Only frames that are presented by the +decoder are counted for this purpose, this excludes frames that are not intended +for display but for compositing with other frames, such as frames that aren't +the last frame with a duration of 0 ticks. + +All the frames listed in jxli are keyframes and the first frame is +present in the list. +There shall be either zero or one Frame Index boxes in a JPEG XL file. +The offsets OFFi per frame are given as bytes in the codestream, not as +bytes in the file format using the box structure. This means if JPEG XL Partial +Codestream boxes are used, the offset is counted within the concatenated +codestream, bytes from box headers or non-codestream boxes are not counted. +*/ + +typedef struct JxlEncoderFrameIndexBoxEntryStruct { + bool to_be_indexed; + uint32_t duration; + uint64_t OFFi; +} JxlEncoderFrameIndexBoxEntry; + +typedef struct JxlEncoderFrameIndexBoxStruct { + // We always need to record the first frame entry, so presence of the + // first entry alone is not an indication if it was requested to be + // stored. + bool index_box_requested_through_api = false; + + int64_t NF() const { return entries.size(); } + bool StoreFrameIndexBox() { + for (auto e : entries) { + if (e.to_be_indexed) { + return true; + } + } + return false; + } + int32_t TNUM = 1; + int32_t TDEN = 1000; + + std::vector entries; + + // That way we can ensure that every index box will have the first frame. + // If the API user decides to mark it as an indexed frame, we call + // the AddFrame again, this time with requested. + void AddFrame(uint64_t OFFi, uint32_t duration, bool to_be_indexed) { + // We call AddFrame to every frame. + // Recording the first frame is required by the standard. + // Knowing the last frame is required, since the last indexed frame + // needs to know how many frames until the end. + // To be able to tell how many frames there are between each index + // entry we just record every frame here. + if (entries.size() == 1) { + if (OFFi == entries[0].OFFi) { + // API use for the first frame, let's clear the already recorded first + // frame. + entries.clear(); + } + } + JxlEncoderFrameIndexBoxEntry e; + e.to_be_indexed = to_be_indexed; + e.OFFi = OFFi; + e.duration = duration; + entries.push_back(e); + } +} JxlEncoderFrameIndexBox; + +// The encoder options (such as quality, compression speed, ...) for a single +// frame, but not encoder-wide options such as box-related options. +typedef struct JxlEncoderFrameSettingsValuesStruct { + // lossless is a separate setting from cparams because it is a combination + // setting that overrides multiple settings inside of cparams. + bool lossless; + CompressParams cparams; + JxlFrameHeader header; + std::vector extra_channel_blend_info; + std::string frame_name; + JxlBitDepth image_bit_depth; + bool frame_index_box = false; + jxl::AuxOut* aux_out = nullptr; +} JxlEncoderFrameSettingsValues; + +typedef std::array BoxType; + +// Utility function that makes a BoxType from a string literal. The string must +// have 4 characters, a 5th null termination character is optional. +constexpr BoxType MakeBoxType(const char* type) { + return BoxType( + {{static_cast(type[0]), static_cast(type[1]), + static_cast(type[2]), static_cast(type[3])}}); +} + +constexpr unsigned char kContainerHeader[] = { + 0, 0, 0, 0xc, 'J', 'X', 'L', ' ', 0xd, 0xa, 0x87, + 0xa, 0, 0, 0, 0x14, 'f', 't', 'y', 'p', 'j', 'x', + 'l', ' ', 0, 0, 0, 0, 'j', 'x', 'l', ' '}; + +constexpr unsigned char kLevelBoxHeader[] = {0, 0, 0, 0x9, 'j', 'x', 'l', 'l'}; + +struct JxlEncoderQueuedFrame { + JxlEncoderFrameSettingsValues option_values; + ImageBundle frame; + std::vector ec_initialized; +}; + +struct JxlEncoderQueuedBox { + BoxType type; + std::vector contents; + bool compress_box; +}; + +using FJXLFrameUniquePtr = + std::unique_ptr; + +// Either a frame, or a box, not both. +// Can also be a FJXL frame. +struct JxlEncoderQueuedInput { + explicit JxlEncoderQueuedInput(const JxlMemoryManager& memory_manager) + : frame(nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)), + box(nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)) {} + MemoryManagerUniquePtr frame; + MemoryManagerUniquePtr box; + FJXLFrameUniquePtr fast_lossless_frame = {nullptr, + JxlFastLosslessFreeFrameState}; +}; + +// Appends a JXL container box header with given type, size, and unbounded +// properties to output. +template +void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded, + T* output) { + uint64_t box_size = 0; + bool large_size = false; + if (!unbounded) { + box_size = size + 8; + if (box_size >= 0x100000000ull) { + large_size = true; + } + } + + { + const uint64_t store = large_size ? 1 : box_size; + for (size_t i = 0; i < 4; i++) { + output->push_back(store >> (8 * (3 - i)) & 0xff); + } + } + for (size_t i = 0; i < 4; i++) { + output->push_back(type[i]); + } + + if (large_size) { + for (size_t i = 0; i < 8; i++) { + output->push_back(box_size >> (8 * (7 - i)) & 0xff); + } + } +} + +} // namespace jxl + +// Internal use only struct, can only be initialized correctly by +// JxlEncoderCreate. +struct JxlEncoderStruct { + JxlEncoderError error = JxlEncoderError::JXL_ENC_ERR_OK; + JxlMemoryManager memory_manager; + jxl::MemoryManagerUniquePtr thread_pool{ + nullptr, jxl::MemoryManagerDeleteHelper(&memory_manager)}; + JxlCmsInterface cms; + bool cms_set; + std::vector> + encoder_options; + + size_t num_queued_frames; + size_t num_queued_boxes; + std::vector input_queue; + std::deque output_byte_queue; + std::deque output_fast_frame_queue; + + // How many codestream bytes have been written, i.e., + // content of jxlc and jxlp boxes. Frame index box jxli + // requires position indices to point to codestream bytes, + // so we need to keep track of the total of flushed or queue + // codestream bytes. These bytes may be in a single jxlc box + // or across multiple jxlp boxes. + size_t codestream_bytes_written_beginning_of_frame; + size_t codestream_bytes_written_end_of_frame; + jxl::JxlEncoderFrameIndexBox frame_index_box; + + // Force using the container even if not needed + bool use_container; + // User declared they will add metadata boxes + bool use_boxes; + + // TODO(lode): move level into jxl::CompressParams since some C++ + // implementation decisions should be based on it: level 10 allows more + // features to be used. + int32_t codestream_level; + bool store_jpeg_metadata; + jxl::CodecMetadata metadata; + std::vector jpeg_metadata; + + // Wrote any output at all, so wrote the data before the first user added + // frame or box, such as signature, basic info, ICC profile or jpeg + // reconstruction box. + bool wrote_bytes; + jxl::CompressParams last_used_cparams; + JxlBasicInfo basic_info; + + // Encoder wrote a jxlp (partial codestream) box, so any next codestream + // parts must also be written in jxlp boxes, a single jxlc box cannot be + // used. The counter is used for the 4-byte jxlp box index header. + size_t jxlp_counter; + + bool frames_closed; + bool boxes_closed; + bool basic_info_set; + bool color_encoding_set; + bool intensity_target_set; + bool allow_expert_options = false; + int brotli_effort = -1; + + // Takes the first frame in the input_queue, encodes it, and appends + // the bytes to the output_byte_queue. + JxlEncoderStatus RefillOutputByteQueue(); + + bool MustUseContainer() const { + return use_container || (codestream_level != 5 && codestream_level != -1) || + store_jpeg_metadata || use_boxes; + } + + // Appends the bytes of a JXL box header with the provided type and size to + // the end of the output_byte_queue. If unbounded is true, the size won't be + // added to the header and the box will be assumed to continue until EOF. + void AppendBoxHeader(const jxl::BoxType& type, size_t size, bool unbounded); +}; + +struct JxlEncoderFrameSettingsStruct { + JxlEncoder* enc; + jxl::JxlEncoderFrameSettingsValues values; +}; + +struct JxlEncoderStatsStruct { + jxl::AuxOut aux_out; +}; + +#endif // LIB_JXL_ENCODE_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/encode_test.cc b/third-party/libjxl/libjxl/lib/jxl/encode_test.cc new file mode 100644 index 0000000000..8aac853321 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/encode_test.cc @@ -0,0 +1,1406 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/extras/dec/jxl.h" +#include "lib/extras/metrics.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/encode_internal.h" +#include "lib/jxl/jpeg/dec_jpeg_data.h" +#include "lib/jxl/jpeg/dec_jpeg_data_writer.h" +#include "lib/jxl/test_image.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +TEST(EncodeTest, AddFrameAfterCloseInputTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + JxlEncoderCloseInput(enc.get()); + + size_t xsize = 64; + size_t ysize = 64; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + + jxl::CodecInOut input_io = + jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize); + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = false; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); +} + +TEST(EncodeTest, AddJPEGAfterCloseTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + JxlEncoderCloseInput(enc.get()); + + const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path); + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size())); +} + +TEST(EncodeTest, AddFrameBeforeColorEncodingTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + size_t xsize = 64; + size_t ysize = 64; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + + jxl::CodecInOut input_io = + jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize); + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = true; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); +} + +TEST(EncodeTest, AddFrameBeforeBasicInfoTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + size_t xsize = 64; + size_t ysize = 64; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + + jxl::CodecInOut input_io = + jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize); + + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); +} + +TEST(EncodeTest, DefaultAllocTest) { + JxlEncoder* enc = JxlEncoderCreate(nullptr); + EXPECT_NE(nullptr, enc); + JxlEncoderDestroy(enc); +} + +TEST(EncodeTest, CustomAllocTest) { + struct CalledCounters { + int allocs = 0; + int frees = 0; + } counters; + + JxlMemoryManager mm; + mm.opaque = &counters; + mm.alloc = [](void* opaque, size_t size) { + reinterpret_cast(opaque)->allocs++; + return malloc(size); + }; + mm.free = [](void* opaque, void* address) { + reinterpret_cast(opaque)->frees++; + free(address); + }; + + { + JxlEncoderPtr enc = JxlEncoderMake(&mm); + EXPECT_NE(nullptr, enc.get()); + EXPECT_LE(1, counters.allocs); + EXPECT_EQ(0, counters.frees); + } + EXPECT_LE(1, counters.frees); +} + +TEST(EncodeTest, DefaultParallelRunnerTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetParallelRunner(enc.get(), nullptr, nullptr)); +} + +void VerifyFrameEncoding(size_t xsize, size_t ysize, JxlEncoder* enc, + const JxlEncoderFrameSettings* frame_settings, + size_t max_compressed_size, + bool lossy_use_original_profile) { + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + + jxl::CodecInOut input_io = + jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize); + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + if (frame_settings->values.lossless || lossy_use_original_profile) { + basic_info.uses_original_profile = true; + } else { + basic_info.uses_original_profile = false; + } + // 16-bit alpha means this requires level 10 + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, true); + EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetColorEncoding(enc, &color_encoding)); + JxlColorEncodingSetToSRGB(&color_encoding, false); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding)); + pixel_format.num_channels = 1; + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + pixel_format.num_channels = 4; + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + JxlEncoderCloseInput(enc); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + EXPECT_LE(compressed.size(), max_compressed_size); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); + jxl::CodecInOut decoded_io; + EXPECT_TRUE(jxl::test::DecodeFile( + {}, jxl::Span(compressed.data(), compressed.size()), + &decoded_io)); + + EXPECT_LE( + ComputeDistance2(input_io.Main(), decoded_io.Main(), jxl::GetJxlCms()), +#if JXL_HIGH_PRECISION + 1.84); +#else + 8.7); +#endif +} + +void VerifyFrameEncoding(JxlEncoder* enc, + const JxlEncoderFrameSettings* frame_settings) { + VerifyFrameEncoding(63, 129, enc, frame_settings, 2700, + /*lossy_use_original_profile=*/false); +} + +TEST(EncodeTest, FrameEncodingTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + VerifyFrameEncoding(enc.get(), + JxlEncoderFrameSettingsCreate(enc.get(), nullptr)); +} + +TEST(EncodeTest, EncoderResetTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + VerifyFrameEncoding(50, 200, enc.get(), + JxlEncoderFrameSettingsCreate(enc.get(), nullptr), 4300, + false); + // Encoder should become reusable for a new image from scratch after using + // reset. + JxlEncoderReset(enc.get()); + VerifyFrameEncoding(157, 77, enc.get(), + JxlEncoderFrameSettingsCreate(enc.get(), nullptr), 2300, + false); +} + +TEST(EncodeTest, CmsTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + bool cms_called = false; + JxlCmsInterface cms = jxl::GetJxlCms(); + struct InitData { + void* original_init_data; + jpegxl_cms_init_func original_init; + bool* cms_called; + }; + InitData init_data = {/*original_init_data=*/cms.init_data, + /*original_init=*/cms.init, + /*cms_called=*/&cms_called}; + cms.init_data = &init_data; + cms.init = +[](void* raw_init_data, size_t num_threads, + size_t pixels_per_thread, const JxlColorProfile* input_profile, + const JxlColorProfile* output_profile, + float intensity_target) { + const InitData* init_data = static_cast(raw_init_data); + *init_data->cms_called = true; + return init_data->original_init(init_data->original_init_data, num_threads, + pixels_per_thread, input_profile, + output_profile, intensity_target); + }; + JxlEncoderSetCms(enc.get(), cms); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), nullptr); + JxlEncoderSetFrameLossless(frame_settings, false); + ASSERT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption(frame_settings, + JXL_ENC_FRAME_SETTING_EFFORT, 8)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_TRUE(cms_called); +} + +TEST(EncodeTest, frame_settingsTest) { + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 5)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_EQ(jxl::SpeedTier::kHare, enc->last_used_cparams.speed_tier); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + // Lower than currently supported values + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 0)); + // Higher than currently supported values + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 11)); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE)); + VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 3000, false); + EXPECT_EQ(true, enc->last_used_cparams.IsLossless()); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetFrameDistance(frame_settings, 0.5)); + VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 3030, false); + EXPECT_EQ(0.5, enc->last_used_cparams.butteraugli_distance); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + // Disallowed negative distance + EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetFrameDistance(frame_settings, -1)); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_DECODING_SPEED, 2)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_EQ(2u, enc->last_used_cparams.decoding_speed_tier); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_ERROR, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER, 100)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER, 1)); + EXPECT_EQ( + JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_GROUP_ORDER_CENTER_X, 5)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_EQ(true, enc->last_used_cparams.centerfirst); + EXPECT_EQ(5, enc->last_used_cparams.center_x); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_RESPONSIVE, 0)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_QPROGRESSIVE_AC, -1)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2)); + VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 2830, + /*lossy_use_original_profile=*/false); + EXPECT_EQ(false, enc->last_used_cparams.responsive); + EXPECT_EQ(true, enc->last_used_cparams.progressive_mode); + EXPECT_EQ(2, enc->last_used_cparams.progressive_dc); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ( + JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetFloatOption( + frame_settings, JXL_ENC_FRAME_SETTING_PHOTON_NOISE, 1777.777)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_NEAR(1777.777f, enc->last_used_cparams.photon_noise_iso, 1E-4); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetFloatOption( + frame_settings, + JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GLOBAL_PERCENT, 55.0f)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetFloatOption( + frame_settings, + JXL_ENC_FRAME_SETTING_CHANNEL_COLORS_GROUP_PERCENT, 25.0f)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_PALETTE_COLORS, 70000)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_LOSSY_PALETTE, 1)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_NEAR(55.0f, + enc->last_used_cparams.channel_colors_pre_transform_percent, + 1E-6); + EXPECT_NEAR(25.0f, enc->last_used_cparams.channel_colors_percent, 1E-6); + EXPECT_EQ(70000, enc->last_used_cparams.palette_colors); + EXPECT_EQ(true, enc->last_used_cparams.lossy_palette); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ( + JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_COLOR_SPACE, 30)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_GROUP_SIZE, 2)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 14)); + EXPECT_EQ( + JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetFloatOption( + frame_settings, + JXL_ENC_FRAME_SETTING_MODULAR_MA_TREE_LEARNING_PERCENT, 77.0f)); + EXPECT_EQ( + JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_MODULAR_NB_PREV_CHANNELS, 7)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_EQ(30, enc->last_used_cparams.colorspace); + EXPECT_EQ(2, enc->last_used_cparams.modular_group_size_shift); + EXPECT_EQ(jxl::Predictor::Best, enc->last_used_cparams.options.predictor); + EXPECT_NEAR(0.77f, enc->last_used_cparams.options.nb_repeats, 1E-6); + EXPECT_EQ(7, enc->last_used_cparams.options.max_properties); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, 0)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_EQ(false, enc->last_used_cparams.force_cfl_jpeg_recompression); + } + + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_JPEG_RECON_CFL, 1)); + VerifyFrameEncoding(enc.get(), frame_settings); + EXPECT_EQ(true, enc->last_used_cparams.force_cfl_jpeg_recompression); + } +} + +TEST(EncodeTest, LossyEncoderUseOriginalProfileTest) { + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + ASSERT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 7897, true); + } + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + ASSERT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2)); + VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 8310, true); + } + { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + ASSERT_NE(nullptr, enc.get()); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + ASSERT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, 8)); + VerifyFrameEncoding(63, 129, enc.get(), frame_settings, 7228, true); + } +} + +namespace { +// Returns a copy of buf from offset to offset+size, or a new zeroed vector if +// the result would have been out of bounds taking integer overflow into +// account. +std::vector SliceSpan(const jxl::Span& buf, + size_t offset, size_t size) { + if (offset + size >= buf.size()) { + return std::vector(size, 0); + } + if (offset + size < offset) { + return std::vector(size, 0); + } + return std::vector(buf.data() + offset, buf.data() + offset + size); +} + +struct Box { + // The type of the box. + // If "uuid", use extended_type instead + char type[4] = {0, 0, 0, 0}; + + // The extended_type is only used when type == "uuid". + // Extended types are not used in JXL. However, the box format itself + // supports this so they are handled correctly. + char extended_type[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + + // Box data. + jxl::Span data = jxl::Span(nullptr, 0); + + // If the size is not given, the datasize extends to the end of the file. + // If this field is false, the size field is not encoded when the box is + // serialized. + bool data_size_given = true; + + // If successful, returns true and sets `in` to be the rest data (if any). + // If `in` contains a box with a size larger than `in.size()`, will not + // modify `in`, and will return true but the data `Span` will + // remain set to nullptr. + // If unsuccessful, returns error and doesn't modify `in`. + jxl::Status Decode(jxl::Span* in) { + // Total box_size including this header itself. + uint64_t box_size = LoadBE32(SliceSpan(*in, 0, 4).data()); + size_t pos = 4; + + memcpy(type, SliceSpan(*in, pos, 4).data(), 4); + pos += 4; + + if (box_size == 1) { + // If the size is 1, it indicates extended size read from 64-bit integer. + box_size = LoadBE64(SliceSpan(*in, pos, 8).data()); + pos += 8; + } + + if (!memcmp("uuid", type, 4)) { + memcpy(extended_type, SliceSpan(*in, pos, 16).data(), 16); + pos += 16; + } + + // This is the end of the box header, the box data begins here. Handle + // the data size now. + const size_t header_size = pos; + + if (box_size != 0) { + if (box_size < header_size) { + return JXL_FAILURE("Invalid box size"); + } + if (box_size > in->size()) { + // The box is fine, but the input is too short. + return true; + } + data_size_given = true; + data = jxl::Span(in->data() + header_size, + box_size - header_size); + } else { + data_size_given = false; + data = jxl::Span(in->data() + header_size, + in->size() - header_size); + } + + *in = jxl::Span(in->data() + header_size + data.size(), + in->size() - header_size - data.size()); + return true; + } +}; + +struct Container { + std::vector boxes; + + // If successful, returns true and sets `in` to be the rest data (if any). + // If unsuccessful, returns error and doesn't modify `in`. + jxl::Status Decode(jxl::Span* in) { + boxes.clear(); + + Box signature_box; + JXL_RETURN_IF_ERROR(signature_box.Decode(in)); + if (memcmp("JXL ", signature_box.type, 4) != 0) { + return JXL_FAILURE("Invalid magic signature"); + } + if (signature_box.data.size() != 4) + return JXL_FAILURE("Invalid magic signature"); + if (signature_box.data[0] != 0xd || signature_box.data[1] != 0xa || + signature_box.data[2] != 0x87 || signature_box.data[3] != 0xa) { + return JXL_FAILURE("Invalid magic signature"); + } + + Box ftyp_box; + JXL_RETURN_IF_ERROR(ftyp_box.Decode(in)); + if (memcmp("ftyp", ftyp_box.type, 4) != 0) { + return JXL_FAILURE("Invalid ftyp"); + } + if (ftyp_box.data.size() != 12) return JXL_FAILURE("Invalid ftyp"); + const char* expected = "jxl \0\0\0\0jxl "; + if (memcmp(expected, ftyp_box.data.data(), 12) != 0) + return JXL_FAILURE("Invalid ftyp"); + + while (!in->empty()) { + Box box = {}; + JXL_RETURN_IF_ERROR(box.Decode(in)); + if (box.data.data() == nullptr) { + // The decoding encountered a box, but not enough data yet. + return true; + } + boxes.emplace_back(box); + } + + return true; + } +}; + +} // namespace + +TEST(EncodeTest, SingleFrameBoundedJXLCTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), true)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + + size_t xsize = 71; + size_t ysize = 23; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = false; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + JxlEncoderCloseInput(enc.get()); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); + + Container container = {}; + jxl::Span encoded_span = + jxl::Span(compressed.data(), compressed.size()); + EXPECT_TRUE(container.Decode(&encoded_span)); + EXPECT_EQ(0u, encoded_span.size()); + bool found_jxlc = false; + bool found_jxlp = false; + // The encoder is allowed to either emit a jxlc or one or more jxlp. + for (size_t i = 0; i < container.boxes.size(); ++i) { + if (memcmp("jxlc", container.boxes[i].type, 4) == 0) { + EXPECT_EQ(false, found_jxlc); // Max 1 jxlc + EXPECT_EQ(false, found_jxlp); // Can't mix jxlc and jxlp + found_jxlc = true; + } + if (memcmp("jxlp", container.boxes[i].type, 4) == 0) { + EXPECT_EQ(false, found_jxlc); // Can't mix jxlc and jxlp + found_jxlp = true; + } + // The encoder shouldn't create an unbounded box in this case, with the + // single frame it knows the full size in time, so can help make decoding + // more efficient by giving the full box size of the final box. + EXPECT_EQ(true, container.boxes[i].data_size_given); + } + EXPECT_EQ(true, found_jxlc || found_jxlp); +} + +TEST(EncodeTest, CodestreamLevelTest) { + size_t xsize = 64; + size_t ysize = 64; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + + jxl::CodecInOut input_io = + jxl::test::SomeTestImageToCodecInOut(pixels, 4, xsize, ysize); + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = false; + + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + JxlEncoderCloseInput(enc.get()); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); + + Container container = {}; + jxl::Span encoded_span = + jxl::Span(compressed.data(), compressed.size()); + EXPECT_TRUE(container.Decode(&encoded_span)); + EXPECT_EQ(0u, encoded_span.size()); + EXPECT_EQ(0, memcmp("jxll", container.boxes[0].type, 4)); +} + +TEST(EncodeTest, CodestreamLevelVerificationTest) { + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT8, JXL_BIG_ENDIAN, 0}; + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = 64; + basic_info.ysize = 64; + basic_info.uses_original_profile = false; + + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + + EXPECT_EQ(5, JxlEncoderGetRequiredCodestreamLevel(enc.get())); + + // Set an image dimension that is too large for level 5, but fits in level 10 + + basic_info.xsize = 1ull << 30ull; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 5)); + EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + EXPECT_EQ(10, JxlEncoderGetRequiredCodestreamLevel(enc.get())); + + // Set an image dimension that is too large even for level 10 + + basic_info.xsize = 1ull << 31ull; + EXPECT_EQ(JXL_ENC_ERROR, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); +} + +TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGReconstructionTest)) { + const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path); + + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size())); + JxlEncoderCloseInput(enc.get()); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); + + jxl::extras::JXLDecompressParams dparams; + jxl::test::DefaultAcceptedFormats(dparams); + std::vector decoded_jpeg_bytes; + jxl::extras::PackedPixelFile ppf; + EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams, + nullptr, &ppf, &decoded_jpeg_bytes)); + + EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size()); + EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size())); +} + +TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(ProgressiveJPEGReconstructionTest)) { + const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path); + + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + + frame_settings->values.cparams.progressive_mode = true; + + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size())); + JxlEncoderCloseInput(enc.get()); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); + + jxl::extras::JXLDecompressParams dparams; + jxl::test::DefaultAcceptedFormats(dparams); + std::vector decoded_jpeg_bytes; + jxl::extras::PackedPixelFile ppf; + EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams, + nullptr, &ppf, &decoded_jpeg_bytes)); + + EXPECT_EQ(decoded_jpeg_bytes.size(), orig.size()); + EXPECT_EQ(0, memcmp(decoded_jpeg_bytes.data(), orig.data(), orig.size())); +} + +static void ProcessEncoder(JxlEncoder* enc, std::vector& compressed, + uint8_t*& next_out, size_t& avail_out) { + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + size_t offset = next_out - compressed.data(); + compressed.resize(next_out - compressed.data()); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); +} + +TEST(EncodeTest, BasicInfoTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + size_t xsize = 1; + size_t ysize = 1; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = false; + basic_info.have_animation = true; + basic_info.intensity_target = 123.4; + basic_info.min_nits = 5.0; + basic_info.linear_below = 12.7; + basic_info.orientation = JXL_ORIENT_ROTATE_90_CW; + basic_info.intrinsic_xsize = 88; + basic_info.intrinsic_ysize = 99; + basic_info.animation.tps_numerator = 55; + basic_info.animation.tps_denominator = 77; + basic_info.animation.num_loops = 10; + basic_info.animation.have_timecodes = JXL_TRUE; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + JxlEncoderCloseFrames(enc.get()); + ProcessEncoder(enc.get(), compressed, next_out, avail_out); + + // Decode to verify the boxes, we don't decode to pixels, only the boxes. + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_NE(nullptr, dec.get()); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_BASIC_INFO)); + // Allow testing the orientation field, without this setting it will be + // overridden to identity. + JxlDecoderSetKeepOrientation(dec.get(), JXL_TRUE); + JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size()); + JxlDecoderCloseInput(dec.get()); + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + if (status == JXL_DEC_ERROR) { + FAIL(); + } else if (status == JXL_DEC_SUCCESS) { + break; + } else if (status == JXL_DEC_BASIC_INFO) { + JxlBasicInfo basic_info2; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetBasicInfo(dec.get(), &basic_info2)); + EXPECT_EQ(basic_info.xsize, basic_info2.xsize); + EXPECT_EQ(basic_info.ysize, basic_info2.ysize); + EXPECT_EQ(basic_info.bits_per_sample, basic_info2.bits_per_sample); + EXPECT_EQ(basic_info.exponent_bits_per_sample, + basic_info2.exponent_bits_per_sample); + EXPECT_NEAR(basic_info.intensity_target, basic_info2.intensity_target, + 0.5); + EXPECT_NEAR(basic_info.min_nits, basic_info2.min_nits, 0.5); + EXPECT_NEAR(basic_info.linear_below, basic_info2.linear_below, 0.5); + EXPECT_EQ(basic_info.relative_to_max_display, + basic_info2.relative_to_max_display); + EXPECT_EQ(basic_info.uses_original_profile, + basic_info2.uses_original_profile); + EXPECT_EQ(basic_info.orientation, basic_info2.orientation); + EXPECT_EQ(basic_info.intrinsic_xsize, basic_info2.intrinsic_xsize); + EXPECT_EQ(basic_info.intrinsic_ysize, basic_info2.intrinsic_ysize); + EXPECT_EQ(basic_info.num_color_channels, basic_info2.num_color_channels); + // TODO(lode): also test num_extra_channels, but currently there may be a + // mismatch between 0 and 1 if there is alpha, until encoder support for + // extra channels is fully implemented. + EXPECT_EQ(basic_info.alpha_bits, basic_info2.alpha_bits); + EXPECT_EQ(basic_info.alpha_exponent_bits, + basic_info2.alpha_exponent_bits); + EXPECT_EQ(basic_info.alpha_premultiplied, + basic_info2.alpha_premultiplied); + + EXPECT_EQ(basic_info.have_preview, basic_info2.have_preview); + if (basic_info.have_preview) { + EXPECT_EQ(basic_info.preview.xsize, basic_info2.preview.xsize); + EXPECT_EQ(basic_info.preview.ysize, basic_info2.preview.ysize); + } + + EXPECT_EQ(basic_info.have_animation, basic_info2.have_animation); + if (basic_info.have_animation) { + EXPECT_EQ(basic_info.animation.tps_numerator, + basic_info2.animation.tps_numerator); + EXPECT_EQ(basic_info.animation.tps_denominator, + basic_info2.animation.tps_denominator); + EXPECT_EQ(basic_info.animation.num_loops, + basic_info2.animation.num_loops); + EXPECT_EQ(basic_info.animation.have_timecodes, + basic_info2.animation.have_timecodes); + } + } else { + FAIL(); // unexpected status + } + } +} + +TEST(EncodeTest, AnimationHeaderTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + size_t xsize = 1; + size_t ysize = 1; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.have_animation = true; + basic_info.animation.tps_numerator = 1000; + basic_info.animation.tps_denominator = 1; + basic_info.animation.have_timecodes = JXL_TRUE; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + + std::string frame_name = "test frame"; + JxlFrameHeader header; + JxlEncoderInitFrameHeader(&header); + header.duration = 50; + header.timecode = 800; + header.layer_info.blend_info.blendmode = JXL_BLEND_BLEND; + header.layer_info.blend_info.source = 2; + header.layer_info.blend_info.clamp = 1; + JxlBlendInfo extra_channel_blend_info; + JxlEncoderInitBlendInfo(&extra_channel_blend_info); + extra_channel_blend_info.blendmode = JXL_BLEND_MULADD; + JxlEncoderSetFrameHeader(frame_settings, &header); + JxlEncoderSetExtraChannelBlendInfo(frame_settings, 0, + &extra_channel_blend_info); + JxlEncoderSetFrameName(frame_settings, frame_name.c_str()); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + JxlEncoderCloseFrames(enc.get()); + ProcessEncoder(enc.get(), compressed, next_out, avail_out); + + // Decode to verify the boxes, we don't decode to pixels, only the boxes. + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_NE(nullptr, dec.get()); + + // To test the blend_info fields, coalescing must be set to false in the + // decoder. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec.get(), JXL_FALSE)); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_FRAME)); + JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size()); + JxlDecoderCloseInput(dec.get()); + + bool seen_frame = false; + + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + if (status == JXL_DEC_ERROR) { + FAIL(); + } else if (status == JXL_DEC_SUCCESS) { + break; + } else if (status == JXL_DEC_FRAME) { + seen_frame = true; + JxlFrameHeader header2; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec.get(), &header2)); + EXPECT_EQ(header.duration, header2.duration); + EXPECT_EQ(header.timecode, header2.timecode); + EXPECT_EQ(header.layer_info.blend_info.blendmode, + header2.layer_info.blend_info.blendmode); + EXPECT_EQ(header.layer_info.blend_info.clamp, + header2.layer_info.blend_info.clamp); + EXPECT_EQ(header.layer_info.blend_info.source, + header2.layer_info.blend_info.source); + EXPECT_EQ(frame_name.size(), header2.name_length); + JxlBlendInfo extra_channel_blend_info2; + JxlDecoderGetExtraChannelBlendInfo(dec.get(), 0, + &extra_channel_blend_info2); + EXPECT_EQ(extra_channel_blend_info.blendmode, + extra_channel_blend_info2.blendmode); + if (header2.name_length > 0) { + std::string frame_name2(header2.name_length + 1, '\0'); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetFrameName(dec.get(), &frame_name2.front(), + frame_name2.size())); + frame_name2.resize(header2.name_length); + EXPECT_EQ(frame_name, frame_name2); + } + } else { + FAIL(); // unexpected status + } + } + + EXPECT_EQ(true, seen_frame); +} +TEST(EncodeTest, CroppedFrameTest) { + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + size_t xsize = 300; + size_t ysize = 300; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + std::vector pixels2(pixels.size()); + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + // Encoding a 300x300 frame in an image that is only 100x100 + basic_info.xsize = 100; + basic_info.ysize = 100; + basic_info.uses_original_profile = JXL_TRUE; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + + JxlFrameHeader header; + JxlEncoderInitFrameHeader(&header); + header.layer_info.have_crop = JXL_TRUE; + header.layer_info.xsize = xsize; + header.layer_info.ysize = ysize; + header.layer_info.crop_x0 = -50; + header.layer_info.crop_y0 = -250; + JxlEncoderSetFrameLossless(frame_settings, JXL_TRUE); + JxlEncoderSetFrameHeader(frame_settings, &header); + JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, + 1); + + std::vector compressed = std::vector(100); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + JxlEncoderCloseFrames(enc.get()); + ProcessEncoder(enc.get(), compressed, next_out, avail_out); + + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_NE(nullptr, dec.get()); + // Non-coalesced decoding so we can get the full uncropped frame + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetCoalescing(dec.get(), JXL_FALSE)); + EXPECT_EQ( + JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec.get(), JXL_DEC_FRAME | JXL_DEC_FULL_IMAGE)); + JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size()); + JxlDecoderCloseInput(dec.get()); + + bool seen_frame = false; + bool checked_frame = false; + for (;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + if (status == JXL_DEC_ERROR) { + FAIL(); + } else if (status == JXL_DEC_SUCCESS) { + break; + } else if (status == JXL_DEC_FRAME) { + seen_frame = true; + JxlFrameHeader header2; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetFrameHeader(dec.get(), &header2)); + EXPECT_EQ(header.layer_info.xsize, header2.layer_info.xsize); + EXPECT_EQ(header.layer_info.ysize, header2.layer_info.ysize); + EXPECT_EQ(header.layer_info.crop_x0, header2.layer_info.crop_x0); + EXPECT_EQ(header.layer_info.crop_y0, header2.layer_info.crop_y0); + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec.get(), &pixel_format, + pixels2.data(), pixels2.size())); + } else if (status == JXL_DEC_FULL_IMAGE) { + EXPECT_EQ(0, memcmp(pixels.data(), pixels2.data(), pixels.size())); + checked_frame = true; + } else { + FAIL(); // unexpected status + } + } + EXPECT_EQ(true, checked_frame); + EXPECT_EQ(true, seen_frame); +} + +TEST(EncodeTest, JXL_BOXES_TEST(BoxTest)) { + // Test with uncompressed boxes and with brob boxes + for (int compress_box = 0; compress_box <= 1; ++compress_box) { + // Tests adding two metadata boxes with the encoder: an exif box before the + // image frame, and an xml box after the image frame. Then verifies the + // decoder can decode them, they are in the expected place, and have the + // correct content after decoding. + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + EXPECT_NE(nullptr, enc.get()); + + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseBoxes(enc.get())); + + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + size_t xsize = 50; + size_t ysize = 17; + JxlPixelFormat pixel_format = {4, JXL_TYPE_UINT16, JXL_BIG_ENDIAN, 0}; + std::vector pixels = + jxl::test::GetSomeTestImage(xsize, ysize, 4, 0); + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = false; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc.get(), 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + + // Add an early metadata box. Also add a valid 4-byte TIFF offset header + // before the fake exif data of these box contents. + constexpr const char* exif_test_string = "\0\0\0\0exif test data"; + const uint8_t* exif_data = + reinterpret_cast(exif_test_string); + // Skip the 4 zeroes for strlen + const size_t exif_size = 4 + strlen(exif_test_string + 4); + JxlEncoderAddBox(enc.get(), "Exif", exif_data, exif_size, compress_box); + + // Write to output + ProcessEncoder(enc.get(), compressed, next_out, avail_out); + + // Add image frame + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + pixels.data(), pixels.size())); + // Indicate this is the last frame + JxlEncoderCloseFrames(enc.get()); + + // Write to output + ProcessEncoder(enc.get(), compressed, next_out, avail_out); + + // Add a late metadata box + constexpr const char* xml_test_string = ""; + const uint8_t* xml_data = reinterpret_cast(xml_test_string); + size_t xml_size = strlen(xml_test_string); + JxlEncoderAddBox(enc.get(), "XML ", xml_data, xml_size, compress_box); + + // Indicate this is the last box + JxlEncoderCloseBoxes(enc.get()); + + // Write to output + ProcessEncoder(enc.get(), compressed, next_out, avail_out); + + // Decode to verify the boxes, we don't decode to pixels, only the boxes. + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_NE(nullptr, dec.get()); + + if (compress_box) { + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetDecompressBoxes(dec.get(), JXL_TRUE)); + } + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSubscribeEvents( + dec.get(), JXL_DEC_FRAME | JXL_DEC_BOX)); + + JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size()); + JxlDecoderCloseInput(dec.get()); + + std::vector dec_exif_box(exif_size); + std::vector dec_xml_box(xml_size); + + for (bool post_frame = false;;) { + JxlDecoderStatus status = JxlDecoderProcessInput(dec.get()); + if (status == JXL_DEC_ERROR) { + FAIL(); + } else if (status == JXL_DEC_SUCCESS) { + EXPECT_EQ(0, JxlDecoderReleaseBoxBuffer(dec.get())); + break; + } else if (status == JXL_DEC_FRAME) { + post_frame = true; + } else if (status == JXL_DEC_BOX) { + // Since we gave the exif/xml box output buffer of the exact known + // correct size, 0 bytes should be released. Same when no buffer was + // set. + EXPECT_EQ(0, JxlDecoderReleaseBoxBuffer(dec.get())); + JxlBoxType type; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBoxType(dec.get(), type, true)); + if (!memcmp(type, "Exif", 4)) { + // This box should have been encoded before the image frame + EXPECT_EQ(false, post_frame); + JxlDecoderSetBoxBuffer(dec.get(), dec_exif_box.data(), + dec_exif_box.size()); + } else if (!memcmp(type, "XML ", 4)) { + // This box should have been encoded after the image frame + EXPECT_EQ(true, post_frame); + JxlDecoderSetBoxBuffer(dec.get(), dec_xml_box.data(), + dec_xml_box.size()); + } + } else { + FAIL(); // unexpected status + } + } + + EXPECT_EQ(0, memcmp(exif_data, dec_exif_box.data(), exif_size)); + EXPECT_EQ(0, memcmp(xml_data, dec_xml_box.data(), xml_size)); + } +} + +TEST(EncodeTest, JXL_TRANSCODE_JPEG_TEST(JPEGFrameTest)) { + TEST_LIBJPEG_SUPPORT(); + for (int skip_basic_info = 0; skip_basic_info < 2; skip_basic_info++) { + for (int skip_color_encoding = 0; skip_color_encoding < 2; + skip_color_encoding++) { + // cannot set color encoding if basic info is not set + if (skip_basic_info && !skip_color_encoding) continue; + const std::string jpeg_path = "jxl/flower/flower_cropped.jpg"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path); + jxl::CodecInOut orig_io; + ASSERT_TRUE(SetFromBytes(jxl::Span(orig), &orig_io, + /*pool=*/nullptr)); + + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + JxlEncoderFrameSettingsSetOption(frame_settings, + JXL_ENC_FRAME_SETTING_EFFORT, 1); + if (!skip_basic_info) { + JxlBasicInfo basic_info; + JxlEncoderInitBasicInfo(&basic_info); + basic_info.xsize = orig_io.xsize(); + basic_info.ysize = orig_io.ysize(); + basic_info.uses_original_profile = true; + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetBasicInfo(enc.get(), &basic_info)); + } + if (!skip_color_encoding) { + JxlColorEncoding color_encoding; + JxlColorEncodingSetToSRGB(&color_encoding, /*is_gray=*/false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)); + } + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderAddJPEGFrame( + frame_settings, orig.data(), orig.size())); + JxlEncoderCloseInput(enc.get()); + + std::vector compressed = std::vector(64); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size() - (next_out - compressed.data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = + JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() * 2); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); + + jxl::CodecInOut decoded_io; + EXPECT_TRUE(jxl::test::DecodeFile( + {}, jxl::Span(compressed.data(), compressed.size()), + &decoded_io)); + + EXPECT_LE( + ComputeDistance2(orig_io.Main(), decoded_io.Main(), jxl::GetJxlCms()), + 3.5); + } + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/entropy_coder.cc b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.cc new file mode 100644 index 0000000000..62ca1becf8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.cc @@ -0,0 +1,69 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/entropy_coder.h" + +#include +#include + +#include +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_context_map.h" +#include "lib/jxl/epf.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map) { + auto& dct = block_ctx_map->dc_thresholds; + auto& qft = block_ctx_map->qf_thresholds; + auto& ctx_map = block_ctx_map->ctx_map; + bool is_default = br->ReadFixedBits<1>(); + if (is_default) { + *block_ctx_map = BlockCtxMap(); + return true; + } + block_ctx_map->num_dc_ctxs = 1; + for (int j : {0, 1, 2}) { + dct[j].resize(br->ReadFixedBits<4>()); + block_ctx_map->num_dc_ctxs *= dct[j].size() + 1; + for (int& i : dct[j]) { + i = UnpackSigned(U32Coder::Read(kDCThresholdDist, br)); + } + } + qft.resize(br->ReadFixedBits<4>()); + for (uint32_t& i : qft) { + i = U32Coder::Read(kQFThresholdDist, br) + 1; + } + + if (block_ctx_map->num_dc_ctxs * (qft.size() + 1) > 64) { + return JXL_FAILURE("Invalid block context map: too big"); + } + + ctx_map.resize(3 * kNumOrders * block_ctx_map->num_dc_ctxs * + (qft.size() + 1)); + JXL_RETURN_IF_ERROR(DecodeContextMap(&ctx_map, &block_ctx_map->num_ctxs, br)); + if (block_ctx_map->num_ctxs > 16) { + return JXL_FAILURE("Invalid block context map: too many distinct contexts"); + } + return true; +} + +constexpr uint8_t BlockCtxMap::kDefaultCtxMap[]; // from ac_context.h + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/entropy_coder.h b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.h new file mode 100644 index 0000000000..e4afa7a631 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/entropy_coder.h @@ -0,0 +1,45 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ENTROPY_CODER_H_ +#define LIB_JXL_ENTROPY_CODER_H_ + +#include +#include + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/field_encodings.h" + +// Entropy coding and context modeling of DC and AC coefficients, as well as AC +// strategy and quantization field. + +namespace jxl { + +static JXL_INLINE int32_t PredictFromTopAndLeft( + const int32_t* const JXL_RESTRICT row_top, + const int32_t* const JXL_RESTRICT row, size_t x, int32_t default_val) { + if (x == 0) { + return row_top == nullptr ? default_val : row_top[x]; + } + if (row_top == nullptr) { + return row[x - 1]; + } + return (row_top[x] + row[x - 1] + 1) / 2; +} + +static constexpr U32Enc kDCThresholdDist(Bits(4), BitsOffset(8, 16), + BitsOffset(16, 272), + BitsOffset(32, 65808)); + +static constexpr U32Enc kQFThresholdDist(Bits(2), BitsOffset(3, 4), + BitsOffset(5, 12), BitsOffset(8, 44)); + +Status DecodeBlockCtxMap(BitReader* br, BlockCtxMap* block_ctx_map); + +} // namespace jxl + +#endif // LIB_JXL_ENTROPY_CODER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/entropy_coder_test.cc b/third-party/libjxl/libjxl/lib/jxl/entropy_coder_test.cc new file mode 100644 index 0000000000..9dbeb137af --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/entropy_coder_test.cc @@ -0,0 +1,68 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// TODO(deymo): Move these tests to dec_ans.h and common.h + +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(EntropyCoderTest, PackUnpack) { + for (int32_t i = -31; i < 32; ++i) { + uint32_t packed = PackSigned(i); + EXPECT_LT(packed, 63u); + int32_t unpacked = UnpackSigned(packed); + EXPECT_EQ(i, unpacked); + } +} + +struct DummyBitReader { + uint32_t nbits, bits; + void Consume(uint32_t nbits) {} + uint32_t PeekBits(uint32_t n) { + EXPECT_EQ(n, nbits); + return bits; + } +}; + +void HybridUintRoundtrip(HybridUintConfig config, size_t limit = 1 << 24) { + Rng rng(0); + constexpr size_t kNumIntegers = 1 << 20; + std::vector integers(kNumIntegers); + std::vector token(kNumIntegers); + std::vector nbits(kNumIntegers); + std::vector bits(kNumIntegers); + for (size_t i = 0; i < kNumIntegers; i++) { + integers[i] = rng.UniformU(0, limit + 1); + config.Encode(integers[i], &token[i], &nbits[i], &bits[i]); + } + for (size_t i = 0; i < kNumIntegers; i++) { + DummyBitReader br{nbits[i], bits[i]}; + EXPECT_EQ(integers[i], + ANSSymbolReader::ReadHybridUintConfig(config, token[i], &br)); + } +} + +TEST(HybridUintTest, Test000) { + HybridUintRoundtrip(HybridUintConfig{0, 0, 0}); +} +TEST(HybridUintTest, Test411) { + HybridUintRoundtrip(HybridUintConfig{4, 1, 1}); +} +TEST(HybridUintTest, Test420) { + HybridUintRoundtrip(HybridUintConfig{4, 2, 0}); +} +TEST(HybridUintTest, Test421) { + HybridUintRoundtrip(HybridUintConfig{4, 2, 1}, 256); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/epf.cc b/third-party/libjxl/libjxl/lib/jxl/epf.cc new file mode 100644 index 0000000000..7288ed9ca6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/epf.cc @@ -0,0 +1,146 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Edge-preserving smoothing: weighted average based on L1 patch similarity. + +#include "lib/jxl/epf.h" + +#include +#include +#include +#include +#include + +#include +#include +#include // std::accumulate +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { + +// Mirror n floats starting at *p and store them before p. +JXL_INLINE void LeftMirror(float* p, size_t n) { + for (size_t i = 0; i < n; i++) { + *(p - 1 - i) = p[i]; + } +} + +// Mirror n floats starting at *(p - n) and store them at *p. +JXL_INLINE void RightMirror(float* p, size_t n) { + for (size_t i = 0; i < n; i++) { + p[i] = *(p - 1 - i); + } +} + +void ComputeSigma(const Rect& block_rect, PassesDecoderState* state) { + const LoopFilter& lf = state->shared->frame_header.loop_filter; + JXL_CHECK(lf.epf_iters > 0); + const AcStrategyImage& ac_strategy = state->shared->ac_strategy; + const float quant_scale = state->shared->quantizer.Scale(); + + const size_t sigma_stride = state->sigma.PixelsPerRow(); + const size_t sharpness_stride = state->shared->epf_sharpness.PixelsPerRow(); + + for (size_t by = 0; by < block_rect.ysize(); ++by) { + float* JXL_RESTRICT sigma_row = block_rect.Row(&state->sigma, by); + const uint8_t* JXL_RESTRICT sharpness_row = + block_rect.ConstRow(state->shared->epf_sharpness, by); + AcStrategyRow acs_row = ac_strategy.ConstRow(block_rect, by); + const int32_t* const JXL_RESTRICT row_quant = + block_rect.ConstRow(state->shared->raw_quant_field, by); + + for (size_t bx = 0; bx < block_rect.xsize(); bx++) { + AcStrategy acs = acs_row[bx]; + size_t llf_x = acs.covered_blocks_x(); + if (!acs.IsFirstBlock()) continue; + // quant_scale is smaller for low quality. + // quant_scale is roughly 0.08 / butteraugli score. + // + // row_quant is smaller for low quality. + // row_quant is a quantization multiplier of form 1.0 / + // row_quant[bx] + // + // lf.epf_quant_mul is a parameter in the format + // kInvSigmaNum is a constant + float sigma_quant = + lf.epf_quant_mul / (quant_scale * row_quant[bx] * kInvSigmaNum); + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + for (size_t ix = 0; ix < acs.covered_blocks_x(); ix++) { + float sigma = + sigma_quant * + lf.epf_sharp_lut[sharpness_row[bx + ix + iy * sharpness_stride]]; + // Avoid infinities. + sigma = std::min(-1e-4f, sigma); // TODO(veluca): remove this. + sigma_row[bx + ix + kSigmaPadding + + (iy + kSigmaPadding) * sigma_stride] = 1.0f / sigma; + } + } + // TODO(veluca): remove this padding. + // Left padding with mirroring. + if (bx + block_rect.x0() == 0) { + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + LeftMirror( + sigma_row + kSigmaPadding + (iy + kSigmaPadding) * sigma_stride, + kSigmaBorder); + } + } + // Right padding with mirroring. + if (bx + block_rect.x0() + llf_x == + state->shared->frame_dim.xsize_blocks) { + for (size_t iy = 0; iy < acs.covered_blocks_y(); iy++) { + RightMirror(sigma_row + kSigmaPadding + bx + llf_x + + (iy + kSigmaPadding) * sigma_stride, + kSigmaBorder); + } + } + // Offsets for row copying, in blocks. + size_t offset_before = bx + block_rect.x0() == 0 ? 1 : bx + kSigmaPadding; + size_t offset_after = + bx + block_rect.x0() + llf_x == state->shared->frame_dim.xsize_blocks + ? kSigmaPadding + llf_x + bx + kSigmaBorder + : kSigmaPadding + llf_x + bx; + size_t num = offset_after - offset_before; + // Above + if (by + block_rect.y0() == 0) { + for (size_t iy = 0; iy < kSigmaBorder; iy++) { + memcpy( + sigma_row + offset_before + + (kSigmaPadding - 1 - iy) * sigma_stride, + sigma_row + offset_before + (kSigmaPadding + iy) * sigma_stride, + num * sizeof(*sigma_row)); + } + } + // Below + if (by + block_rect.y0() + acs.covered_blocks_y() == + state->shared->frame_dim.ysize_blocks) { + for (size_t iy = 0; iy < kSigmaBorder; iy++) { + memcpy( + sigma_row + offset_before + + sigma_stride * (acs.covered_blocks_y() + kSigmaPadding + iy), + sigma_row + offset_before + + sigma_stride * + (acs.covered_blocks_y() + kSigmaPadding - 1 - iy), + num * sizeof(*sigma_row)); + } + } + } + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/epf.h b/third-party/libjxl/libjxl/lib/jxl/epf.h new file mode 100644 index 0000000000..7a0834ed97 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/epf.h @@ -0,0 +1,33 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_EPF_H_ +#define LIB_JXL_EPF_H_ + +// Fast SIMD "in-loop" edge preserving filter (adaptive, nonlinear). + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/passes_state.h" + +namespace jxl { + +// 4 * (sqrt(0.5)-1), so that Weight(sigma) = 0.5. +static constexpr float kInvSigmaNum = -1.1715728752538099024f; + +// kInvSigmaNum / 0.3 +constexpr float kMinSigma = -3.90524291751269967465540850526868f; + +// Fills the `state->filter_weights.sigma` image with the precomputed sigma +// values in the area inside `block_rect`. Accesses the AC strategy, quant field +// and epf_sharpness fields in the corresponding positions. +void ComputeSigma(const Rect& block_rect, PassesDecoderState* state); + +} // namespace jxl + +#endif // LIB_JXL_EPF_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/exif.h b/third-party/libjxl/libjxl/lib/jxl/exif.h new file mode 100644 index 0000000000..0cf493fc71 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/exif.h @@ -0,0 +1,87 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_EXIF_H_ +#define LIB_JXL_EXIF_H_ + +// Basic parsing of Exif (just enough for the render-impacting things +// like orientation) + +#include + +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/image_metadata.h" + +namespace jxl { + +constexpr uint16_t kExifOrientationTag = 274; + +// Checks if a blob looks like Exif, and if so, sets bigendian +// according to the tiff endianness +inline bool IsExif(const std::vector& exif, bool* bigendian) { + if (exif.size() < 12) return false; // not enough bytes for a valid exif blob + const uint8_t* t = exif.data(); + if (LoadLE32(t) == 0x2A004D4D) { + *bigendian = true; + return true; + } else if (LoadLE32(t) == 0x002A4949) { + *bigendian = false; + return true; + } + return false; // not a valid tiff header +} + +// Finds the position of an Exif tag, or 0 if it is not found +inline size_t FindExifTagPosition(const std::vector& exif, + uint16_t tagname) { + bool bigendian; + if (!IsExif(exif, &bigendian)) return 0; + const uint8_t* t = exif.data() + 4; + uint64_t offset = (bigendian ? LoadBE32(t) : LoadLE32(t)); + if (exif.size() < 12 + offset + 2 || offset < 8) return 0; + t += offset - 4; + if (offset + 2 >= exif.size()) return 0; + uint16_t nb_tags = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 2; + while (nb_tags > 0) { + if (t + 12 >= exif.data() + exif.size()) return 0; + uint16_t tag = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 2; + if (tag == tagname) return static_cast(t - exif.data()); + t += 10; + nb_tags--; + } + return 0; +} + +// TODO (jon): tag 1 can be used to represent Adobe RGB 1998 if it has value +// "R03" +// TODO (jon): set intrinsic dimensions according to +// https://discourse.wicg.io/t/proposal-exif-image-resolution-auto-and-from-image/4326/24 +// Parses the Exif data just enough to extract any render-impacting info. +// If the Exif data is invalid or could not be parsed, then it is treated +// as a no-op. +inline void InterpretExif(const std::vector& exif, + JxlOrientation* orientation) { + bool bigendian; + if (!IsExif(exif, &bigendian)) return; + size_t o_pos = FindExifTagPosition(exif, kExifOrientationTag); + if (o_pos) { + const uint8_t* t = exif.data() + o_pos; + uint16_t type = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 2; + uint32_t count = (bigendian ? LoadBE32(t) : LoadLE32(t)); + t += 4; + uint16_t value = (bigendian ? LoadBE16(t) : LoadLE16(t)); + t += 4; + if (type == 3 && count == 1 && value >= 1 && value <= 8) { + *orientation = static_cast(value); + } + } +} + +} // namespace jxl + +#endif // LIB_JXL_EXIF_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/fake_parallel_runner_testonly.h b/third-party/libjxl/libjxl/lib/jxl/fake_parallel_runner_testonly.h new file mode 100644 index 0000000000..508d808cc5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fake_parallel_runner_testonly.h @@ -0,0 +1,79 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_ +#define LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_ + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/random.h" + +namespace jxl { + +// A parallel runner implementation that runs all the jobs in a single thread +// (the caller thread) but runs them pretending to use multiple threads and +// potentially out of order. This is useful for testing conditions that only +// occur under heavy load where the order of operations is different. +class FakeParallelRunner { + public: + FakeParallelRunner(uint32_t order_seed, uint32_t num_threads) + : order_seed_(order_seed), rng_(order_seed), num_threads_(num_threads) { + if (num_threads_ < 1) num_threads_ = 1; + } + + JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start, + uint32_t end) { + JxlParallelRetCode ret = init(jxl_opaque, num_threads_); + if (ret != 0) return ret; + + if (order_seed_ == 0) { + for (uint32_t i = start; i < end; i++) { + func(jxl_opaque, i, i % num_threads_); + } + } else { + std::vector order(end - start); + for (uint32_t i = start; i < end; i++) { + order[i - start] = i; + } + rng_.Shuffle(order.data(), order.size()); + for (uint32_t i = start; i < end; i++) { + func(jxl_opaque, order[i - start], i % num_threads_); + } + } + return ret; + } + + private: + // Seed for the RNG for defining the execution order. A value of 0 means + // sequential order from start to end. + uint32_t order_seed_; + + // The PRNG object, initialized with the order_seed_. Only used if the seed is + // not 0. + Rng rng_; + + // Number of fake threads. All the tasks are run on the same thread, but using + // different thread_id values based on this num_threads. + uint32_t num_threads_; +}; + +} // namespace jxl + +extern "C" { +// Function to pass as the parallel runner. +JXL_INLINE JxlParallelRetCode JxlFakeParallelRunner( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) { + return static_cast(runner_opaque) + ->Run(jpegxl_opaque, init, func, start_range, end_range); +} +} + +#endif // LIB_JXL_FAKE_PARALLEL_RUNNER_TESTONLY_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct-inl.h new file mode 100644 index 0000000000..e359c6ab71 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct-inl.h @@ -0,0 +1,237 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JXL_FAST_DCT_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_FAST_DCT_INL_H_ +#undef LIB_JXL_FAST_DCT_INL_H_ +#else +#define LIB_JXL_FAST_DCT_INL_H_ +#endif + +#include +#include +#include + +#include "lib/jxl/base/status.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +#if HWY_TARGET == HWY_NEON +HWY_NOINLINE void FastTransposeBlock(const int16_t* JXL_RESTRICT data_in, + size_t stride_in, size_t N, size_t M, + int16_t* JXL_RESTRICT data_out, + size_t stride_out) { + JXL_DASSERT(N % 8 == 0); + JXL_DASSERT(M % 8 == 0); + for (size_t i = 0; i < N; i += 8) { + for (size_t j = 0; j < M; j += 8) { + // TODO(veluca): one could optimize the M==8, stride_in==8 case further + // with vld4. + // This code is about 40% faster for N == M == stride_in == + // stride_out == 8 + // Using loads + stores to reshuffle things to be able to + // use vld4 doesn't help. + /* + auto a0 = vld4q_s16(data_in); auto a1 = vld4q_s16(data_in + 32); + int16x8x4_t out0; + int16x8x4_t out1; + out0.val[0] = vuzp1q_s16(a0.val[0], a1.val[0]); + out0.val[1] = vuzp1q_s16(a0.val[1], a1.val[1]); + out0.val[2] = vuzp1q_s16(a0.val[2], a1.val[2]); + out0.val[3] = vuzp1q_s16(a0.val[3], a1.val[3]); + out1.val[0] = vuzp2q_s16(a0.val[0], a1.val[0]); + out1.val[1] = vuzp2q_s16(a0.val[1], a1.val[1]); + out1.val[2] = vuzp2q_s16(a0.val[2], a1.val[2]); + out1.val[3] = vuzp2q_s16(a0.val[3], a1.val[3]); + vst1q_s16_x4(data_out, out0); + vst1q_s16_x4(data_out + 32, out1); + */ + auto a0 = vld1q_s16(data_in + i * stride_in + j); + auto a1 = vld1q_s16(data_in + (i + 1) * stride_in + j); + auto a2 = vld1q_s16(data_in + (i + 2) * stride_in + j); + auto a3 = vld1q_s16(data_in + (i + 3) * stride_in + j); + + auto a01 = vtrnq_s16(a0, a1); + auto a23 = vtrnq_s16(a2, a3); + + auto four0 = vtrnq_s32(vreinterpretq_s32_s16(a01.val[0]), + vreinterpretq_s32_s16(a23.val[0])); + auto four1 = vtrnq_s32(vreinterpretq_s32_s16(a01.val[1]), + vreinterpretq_s32_s16(a23.val[1])); + + auto a4 = vld1q_s16(data_in + (i + 4) * stride_in + j); + auto a5 = vld1q_s16(data_in + (i + 5) * stride_in + j); + auto a6 = vld1q_s16(data_in + (i + 6) * stride_in + j); + auto a7 = vld1q_s16(data_in + (i + 7) * stride_in + j); + + auto a45 = vtrnq_s16(a4, a5); + auto a67 = vtrnq_s16(a6, a7); + + auto four2 = vtrnq_s32(vreinterpretq_s32_s16(a45.val[0]), + vreinterpretq_s32_s16(a67.val[0])); + auto four3 = vtrnq_s32(vreinterpretq_s32_s16(a45.val[1]), + vreinterpretq_s32_s16(a67.val[1])); + + auto out0 = + vcombine_s32(vget_low_s32(four0.val[0]), vget_low_s32(four2.val[0])); + auto out1 = + vcombine_s32(vget_low_s32(four1.val[0]), vget_low_s32(four3.val[0])); + auto out2 = + vcombine_s32(vget_low_s32(four0.val[1]), vget_low_s32(four2.val[1])); + auto out3 = + vcombine_s32(vget_low_s32(four1.val[1]), vget_low_s32(four3.val[1])); + auto out4 = vcombine_s32(vget_high_s32(four0.val[0]), + vget_high_s32(four2.val[0])); + auto out5 = vcombine_s32(vget_high_s32(four1.val[0]), + vget_high_s32(four3.val[0])); + auto out6 = vcombine_s32(vget_high_s32(four0.val[1]), + vget_high_s32(four2.val[1])); + auto out7 = vcombine_s32(vget_high_s32(four1.val[1]), + vget_high_s32(four3.val[1])); + vst1q_s16(data_out + j * stride_out + i, vreinterpretq_s16_s32(out0)); + vst1q_s16(data_out + (j + 1) * stride_out + i, + vreinterpretq_s16_s32(out1)); + vst1q_s16(data_out + (j + 2) * stride_out + i, + vreinterpretq_s16_s32(out2)); + vst1q_s16(data_out + (j + 3) * stride_out + i, + vreinterpretq_s16_s32(out3)); + vst1q_s16(data_out + (j + 4) * stride_out + i, + vreinterpretq_s16_s32(out4)); + vst1q_s16(data_out + (j + 5) * stride_out + i, + vreinterpretq_s16_s32(out5)); + vst1q_s16(data_out + (j + 6) * stride_out + i, + vreinterpretq_s16_s32(out6)); + vst1q_s16(data_out + (j + 7) * stride_out + i, + vreinterpretq_s16_s32(out7)); + } + } +} + +template +struct FastDCTTag {}; + +#include "lib/jxl/fast_dct128-inl.h" +#include "lib/jxl/fast_dct16-inl.h" +#include "lib/jxl/fast_dct256-inl.h" +#include "lib/jxl/fast_dct32-inl.h" +#include "lib/jxl/fast_dct64-inl.h" +#include "lib/jxl/fast_dct8-inl.h" + +template +struct ComputeFastScaledIDCT { + // scratch_space must be aligned, and should have space for ROWS*COLS + // int16_ts. + HWY_MAYBE_UNUSED void operator()(int16_t* JXL_RESTRICT from, int16_t* to, + size_t to_stride, + int16_t* JXL_RESTRICT scratch_space) { + // Reverse the steps done in ComputeScaledDCT. + if (ROWS < COLS) { + FastTransposeBlock(from, COLS, ROWS, COLS, scratch_space, ROWS); + FastIDCT(FastDCTTag(), scratch_space, ROWS, from, ROWS, ROWS); + FastTransposeBlock(from, ROWS, COLS, ROWS, scratch_space, COLS); + FastIDCT(FastDCTTag(), scratch_space, COLS, to, to_stride, COLS); + } else { + FastIDCT(FastDCTTag(), from, ROWS, scratch_space, ROWS, ROWS); + FastTransposeBlock(scratch_space, ROWS, COLS, ROWS, from, COLS); + FastIDCT(FastDCTTag(), from, COLS, to, to_stride, COLS); + } + } +}; +#endif + +template +HWY_NOINLINE void TestFastIDCT() { +#if HWY_TARGET == HWY_NEON + auto pixels_mem = hwy::AllocateAligned(N * M); + float* pixels = pixels_mem.get(); + auto dct_mem = hwy::AllocateAligned(N * M); + float* dct = dct_mem.get(); + auto dct_i_mem = hwy::AllocateAligned(N * M); + int16_t* dct_i = dct_i_mem.get(); + auto dct_in_mem = hwy::AllocateAligned(N * M); + int16_t* dct_in = dct_in_mem.get(); + auto idct_mem = hwy::AllocateAligned(N * M); + int16_t* idct = idct_mem.get(); + + auto scratch_space_mem = hwy::AllocateAligned(N * M * 2); + float* scratch_space = scratch_space_mem.get(); + auto scratch_space_i_mem = hwy::AllocateAligned(N * M * 2); + int16_t* scratch_space_i = scratch_space_i_mem.get(); + + Rng rng(0); + for (size_t i = 0; i < N * M; i++) { + pixels[i] = rng.UniformF(-1, 1); + } + ComputeScaledDCT()(DCTFrom(pixels, N), dct, scratch_space); + size_t integer_bits = std::max(FastIDCTIntegerBits(FastDCTTag()), + FastIDCTIntegerBits(FastDCTTag())); + // Enough range for [-2, 2] output values. + JXL_ASSERT(integer_bits <= 14); + float scale = (1 << (14 - integer_bits)); + for (size_t i = 0; i < N * M; i++) { + dct_i[i] = std::round(dct[i] * scale); + } + + for (size_t j = 0; j < 40000000 / (M * N); j++) { + memcpy(dct_in, dct_i, sizeof(*dct_i) * N * M); + ComputeFastScaledIDCT()(dct_in, idct, N, scratch_space_i); + } + float max_error = 0; + for (size_t i = 0; i < M * N; i++) { + float err = std::abs(idct[i] * (1.0f / scale) - pixels[i]); + if (std::abs(err) > max_error) { + max_error = std::abs(err); + } + } + printf("max error: %f mantissa bits: %d\n", max_error, + 14 - (int)integer_bits); +#endif +} + +template +HWY_NOINLINE void TestFloatIDCT() { + auto pixels_mem = hwy::AllocateAligned(N * M); + float* pixels = pixels_mem.get(); + auto dct_mem = hwy::AllocateAligned(N * M); + float* dct = dct_mem.get(); + auto idct_mem = hwy::AllocateAligned(N * M); + float* idct = idct_mem.get(); + + auto dct_in_mem = hwy::AllocateAligned(N * M); + float* dct_in = dct_mem.get(); + + auto scratch_space_mem = hwy::AllocateAligned(N * M * 2); + float* scratch_space = scratch_space_mem.get(); + + Rng rng(0); + for (size_t i = 0; i < N * M; i++) { + pixels[i] = rng.UniformF(-1, 1); + } + ComputeScaledDCT()(DCTFrom(pixels, N), dct, scratch_space); + + for (size_t j = 0; j < 40000000 / (M * N); j++) { + memcpy(dct_in, dct, sizeof(*dct) * N * M); + ComputeScaledIDCT()(dct_in, DCTTo(idct, N), scratch_space); + } + float max_error = 0; + for (size_t i = 0; i < M * N; i++) { + float err = std::abs(idct[i] - pixels[i]); + if (std::abs(err) > max_error) { + max_error = std::abs(err); + } + } + printf("max error: %e\n", max_error); +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_FAST_DCT_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct.cc b/third-party/libjxl/libjxl/lib/jxl/fast_dct.cc new file mode 100644 index 0000000000..d796018fd0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct.cc @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/fast_dct.cc" +#include +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/dct-inl.h" +#include "lib/jxl/fast_dct-inl.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { +void BenchmarkFloatIDCT32x32() { TestFloatIDCT<32, 32>(); } +void BenchmarkFastIDCT32x32() { TestFastIDCT<32, 32>(); } +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(BenchmarkFloatIDCT32x32); +HWY_EXPORT(BenchmarkFastIDCT32x32); +void BenchmarkFloatIDCT32x32() { + HWY_DYNAMIC_DISPATCH(BenchmarkFloatIDCT32x32)(); +} +void BenchmarkFastIDCT32x32() { + HWY_DYNAMIC_DISPATCH(BenchmarkFastIDCT32x32)(); +} +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct.h new file mode 100644 index 0000000000..641933d8a0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct.h @@ -0,0 +1,9 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +namespace jxl { +void BenchmarkFloatIDCT32x32(); +void BenchmarkFastIDCT32x32(); +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct128-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct128-inl.h new file mode 100644 index 0000000000..1a94d3ee92 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct128-inl.h @@ -0,0 +1,2137 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* This file is automatically generated. Do not modify it directly. */ +#if HWY_TARGET != HWY_NEON +#error "only include this file from fast_dct-inl.h" +#endif + +constexpr size_t FastIDCTIntegerBits(FastDCTTag<128>) { return 2; } + +void FastIDCT(FastDCTTag<128>, const int16_t* in, size_t in_stride, + int16_t* out, size_t out_stride, size_t count) { + JXL_ASSERT(count % 8 == 0); + for (size_t i = 0; i < count; i += 8) { + int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i); + int16x8_t v1 = vld1q_s16(in + in_stride * 64 + i); + int16x8_t v2 = vaddq_s16(v0, v1); + int16x8_t v3 = vld1q_s16(in + in_stride * 32 + i); + int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573); + int16x8_t v4 = vaddq_s16(v4_tmp, v3); + int16x8_t v5 = vld1q_s16(in + in_stride * 96 + i); + int16x8_t v6 = vaddq_s16(v5, v3); + int16x8_t v7 = vaddq_s16(v4, v6); + int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734); + int16x8_t v9 = vaddq_s16(v2, v8); + int16x8_t v10 = vld1q_s16(in + in_stride * 16 + i); + int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573); + int16x8_t v11 = vaddq_s16(v11_tmp, v10); + int16x8_t v12 = vld1q_s16(in + in_stride * 80 + i); + int16x8_t v13 = vld1q_s16(in + in_stride * 48 + i); + int16x8_t v14 = vaddq_s16(v12, v13); + int16x8_t v15 = vaddq_s16(v11, v14); + int16x8_t v16 = vaddq_s16(v13, v10); + int16x8_t v17_tmp = vqrdmulhq_n_s16(v16, 13573); + int16x8_t v17 = vaddq_s16(v17_tmp, v16); + int16x8_t v18 = vld1q_s16(in + in_stride * 112 + i); + int16x8_t v19 = vaddq_s16(v18, v12); + int16x8_t v20 = vaddq_s16(v19, v16); + int16x8_t v21 = vaddq_s16(v17, v20); + int16x8_t v22 = vqrdmulhq_n_s16(v21, 17734); + int16x8_t v23 = vaddq_s16(v15, v22); + int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705); + int16x8_t v25 = vaddq_s16(v9, v24); + int16x8_t v26 = vld1q_s16(in + in_stride * 8 + i); + int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573); + int16x8_t v27 = vaddq_s16(v27_tmp, v26); + int16x8_t v28 = vld1q_s16(in + in_stride * 72 + i); + int16x8_t v29 = vld1q_s16(in + in_stride * 56 + i); + int16x8_t v30 = vaddq_s16(v28, v29); + int16x8_t v31 = vaddq_s16(v27, v30); + int16x8_t v32 = vld1q_s16(in + in_stride * 40 + i); + int16x8_t v33 = vld1q_s16(in + in_stride * 24 + i); + int16x8_t v34 = vaddq_s16(v32, v33); + int16x8_t v35_tmp = vqrdmulhq_n_s16(v34, 13573); + int16x8_t v35 = vaddq_s16(v35_tmp, v34); + int16x8_t v36 = vld1q_s16(in + in_stride * 104 + i); + int16x8_t v37 = vld1q_s16(in + in_stride * 88 + i); + int16x8_t v38 = vaddq_s16(v36, v37); + int16x8_t v39 = vaddq_s16(v38, v34); + int16x8_t v40 = vaddq_s16(v35, v39); + int16x8_t v41 = vqrdmulhq_n_s16(v40, 17734); + int16x8_t v42 = vaddq_s16(v31, v41); + int16x8_t v43 = vaddq_s16(v33, v26); + int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573); + int16x8_t v44 = vaddq_s16(v44_tmp, v43); + int16x8_t v45 = vaddq_s16(v37, v28); + int16x8_t v46 = vaddq_s16(v29, v32); + int16x8_t v47 = vaddq_s16(v45, v46); + int16x8_t v48 = vaddq_s16(v44, v47); + int16x8_t v49 = vaddq_s16(v46, v43); + int16x8_t v50_tmp = vqrdmulhq_n_s16(v49, 13573); + int16x8_t v50 = vaddq_s16(v50_tmp, v49); + int16x8_t v51 = vld1q_s16(in + in_stride * 120 + i); + int16x8_t v52 = vaddq_s16(v51, v36); + int16x8_t v53 = vaddq_s16(v52, v45); + int16x8_t v54 = vaddq_s16(v53, v49); + int16x8_t v55 = vaddq_s16(v50, v54); + int16x8_t v56 = vqrdmulhq_n_s16(v55, 17734); + int16x8_t v57 = vaddq_s16(v48, v56); + int16x8_t v58 = vqrdmulhq_n_s16(v57, 16705); + int16x8_t v59 = vaddq_s16(v42, v58); + int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463); + int16x8_t v61 = vaddq_s16(v25, v60); + int16x8_t v62 = vld1q_s16(in + in_stride * 4 + i); + int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573); + int16x8_t v63 = vaddq_s16(v63_tmp, v62); + int16x8_t v64 = vld1q_s16(in + in_stride * 68 + i); + int16x8_t v65 = vld1q_s16(in + in_stride * 60 + i); + int16x8_t v66 = vaddq_s16(v64, v65); + int16x8_t v67 = vaddq_s16(v63, v66); + int16x8_t v68 = vld1q_s16(in + in_stride * 36 + i); + int16x8_t v69 = vld1q_s16(in + in_stride * 28 + i); + int16x8_t v70 = vaddq_s16(v68, v69); + int16x8_t v71_tmp = vqrdmulhq_n_s16(v70, 13573); + int16x8_t v71 = vaddq_s16(v71_tmp, v70); + int16x8_t v72 = vld1q_s16(in + in_stride * 100 + i); + int16x8_t v73 = vld1q_s16(in + in_stride * 92 + i); + int16x8_t v74 = vaddq_s16(v72, v73); + int16x8_t v75 = vaddq_s16(v74, v70); + int16x8_t v76 = vaddq_s16(v71, v75); + int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734); + int16x8_t v78 = vaddq_s16(v67, v77); + int16x8_t v79 = vld1q_s16(in + in_stride * 20 + i); + int16x8_t v80 = vld1q_s16(in + in_stride * 12 + i); + int16x8_t v81 = vaddq_s16(v79, v80); + int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573); + int16x8_t v82 = vaddq_s16(v82_tmp, v81); + int16x8_t v83 = vld1q_s16(in + in_stride * 84 + i); + int16x8_t v84 = vld1q_s16(in + in_stride * 76 + i); + int16x8_t v85 = vaddq_s16(v83, v84); + int16x8_t v86 = vld1q_s16(in + in_stride * 52 + i); + int16x8_t v87 = vld1q_s16(in + in_stride * 44 + i); + int16x8_t v88 = vaddq_s16(v86, v87); + int16x8_t v89 = vaddq_s16(v85, v88); + int16x8_t v90 = vaddq_s16(v82, v89); + int16x8_t v91 = vaddq_s16(v88, v81); + int16x8_t v92_tmp = vqrdmulhq_n_s16(v91, 13573); + int16x8_t v92 = vaddq_s16(v92_tmp, v91); + int16x8_t v93 = vld1q_s16(in + in_stride * 116 + i); + int16x8_t v94 = vld1q_s16(in + in_stride * 108 + i); + int16x8_t v95 = vaddq_s16(v93, v94); + int16x8_t v96 = vaddq_s16(v95, v85); + int16x8_t v97 = vaddq_s16(v96, v91); + int16x8_t v98 = vaddq_s16(v92, v97); + int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734); + int16x8_t v100 = vaddq_s16(v90, v99); + int16x8_t v101 = vqrdmulhq_n_s16(v100, 16705); + int16x8_t v102 = vaddq_s16(v78, v101); + int16x8_t v103 = vaddq_s16(v80, v62); + int16x8_t v104_tmp = vqrdmulhq_n_s16(v103, 13573); + int16x8_t v104 = vaddq_s16(v104_tmp, v103); + int16x8_t v105 = vaddq_s16(v84, v64); + int16x8_t v106 = vaddq_s16(v65, v86); + int16x8_t v107 = vaddq_s16(v105, v106); + int16x8_t v108 = vaddq_s16(v104, v107); + int16x8_t v109 = vaddq_s16(v87, v68); + int16x8_t v110 = vaddq_s16(v69, v79); + int16x8_t v111 = vaddq_s16(v109, v110); + int16x8_t v112_tmp = vqrdmulhq_n_s16(v111, 13573); + int16x8_t v112 = vaddq_s16(v112_tmp, v111); + int16x8_t v113 = vaddq_s16(v94, v72); + int16x8_t v114 = vaddq_s16(v73, v83); + int16x8_t v115 = vaddq_s16(v113, v114); + int16x8_t v116 = vaddq_s16(v115, v111); + int16x8_t v117 = vaddq_s16(v112, v116); + int16x8_t v118 = vqrdmulhq_n_s16(v117, 17734); + int16x8_t v119 = vaddq_s16(v108, v118); + int16x8_t v120 = vaddq_s16(v110, v103); + int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 13573); + int16x8_t v121 = vaddq_s16(v121_tmp, v120); + int16x8_t v122 = vaddq_s16(v114, v105); + int16x8_t v123 = vaddq_s16(v106, v109); + int16x8_t v124 = vaddq_s16(v122, v123); + int16x8_t v125 = vaddq_s16(v121, v124); + int16x8_t v126 = vaddq_s16(v123, v120); + int16x8_t v127_tmp = vqrdmulhq_n_s16(v126, 13573); + int16x8_t v127 = vaddq_s16(v127_tmp, v126); + int16x8_t v128 = vld1q_s16(in + in_stride * 124 + i); + int16x8_t v129 = vaddq_s16(v128, v93); + int16x8_t v130 = vaddq_s16(v129, v113); + int16x8_t v131 = vaddq_s16(v130, v122); + int16x8_t v132 = vaddq_s16(v131, v126); + int16x8_t v133 = vaddq_s16(v127, v132); + int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734); + int16x8_t v135 = vaddq_s16(v125, v134); + int16x8_t v136 = vqrdmulhq_n_s16(v135, 16705); + int16x8_t v137 = vaddq_s16(v119, v136); + int16x8_t v138 = vqrdmulhq_n_s16(v137, 16463); + int16x8_t v139 = vaddq_s16(v102, v138); + int16x8_t v140 = vqrdmulhq_n_s16(v139, 16404); + int16x8_t v141 = vaddq_s16(v61, v140); + int16x8_t v142 = vld1q_s16(in + in_stride * 2 + i); + int16x8_t v143_tmp = vqrdmulhq_n_s16(v142, 13573); + int16x8_t v143 = vaddq_s16(v143_tmp, v142); + int16x8_t v144 = vld1q_s16(in + in_stride * 66 + i); + int16x8_t v145 = vld1q_s16(in + in_stride * 62 + i); + int16x8_t v146 = vaddq_s16(v144, v145); + int16x8_t v147 = vaddq_s16(v143, v146); + int16x8_t v148 = vld1q_s16(in + in_stride * 34 + i); + int16x8_t v149 = vld1q_s16(in + in_stride * 30 + i); + int16x8_t v150 = vaddq_s16(v148, v149); + int16x8_t v151_tmp = vqrdmulhq_n_s16(v150, 13573); + int16x8_t v151 = vaddq_s16(v151_tmp, v150); + int16x8_t v152 = vld1q_s16(in + in_stride * 98 + i); + int16x8_t v153 = vld1q_s16(in + in_stride * 94 + i); + int16x8_t v154 = vaddq_s16(v152, v153); + int16x8_t v155 = vaddq_s16(v154, v150); + int16x8_t v156 = vaddq_s16(v151, v155); + int16x8_t v157 = vqrdmulhq_n_s16(v156, 17734); + int16x8_t v158 = vaddq_s16(v147, v157); + int16x8_t v159 = vld1q_s16(in + in_stride * 18 + i); + int16x8_t v160 = vld1q_s16(in + in_stride * 14 + i); + int16x8_t v161 = vaddq_s16(v159, v160); + int16x8_t v162_tmp = vqrdmulhq_n_s16(v161, 13573); + int16x8_t v162 = vaddq_s16(v162_tmp, v161); + int16x8_t v163 = vld1q_s16(in + in_stride * 82 + i); + int16x8_t v164 = vld1q_s16(in + in_stride * 78 + i); + int16x8_t v165 = vaddq_s16(v163, v164); + int16x8_t v166 = vld1q_s16(in + in_stride * 50 + i); + int16x8_t v167 = vld1q_s16(in + in_stride * 46 + i); + int16x8_t v168 = vaddq_s16(v166, v167); + int16x8_t v169 = vaddq_s16(v165, v168); + int16x8_t v170 = vaddq_s16(v162, v169); + int16x8_t v171 = vaddq_s16(v168, v161); + int16x8_t v172_tmp = vqrdmulhq_n_s16(v171, 13573); + int16x8_t v172 = vaddq_s16(v172_tmp, v171); + int16x8_t v173 = vld1q_s16(in + in_stride * 114 + i); + int16x8_t v174 = vld1q_s16(in + in_stride * 110 + i); + int16x8_t v175 = vaddq_s16(v173, v174); + int16x8_t v176 = vaddq_s16(v175, v165); + int16x8_t v177 = vaddq_s16(v176, v171); + int16x8_t v178 = vaddq_s16(v172, v177); + int16x8_t v179 = vqrdmulhq_n_s16(v178, 17734); + int16x8_t v180 = vaddq_s16(v170, v179); + int16x8_t v181 = vqrdmulhq_n_s16(v180, 16705); + int16x8_t v182 = vaddq_s16(v158, v181); + int16x8_t v183 = vld1q_s16(in + in_stride * 10 + i); + int16x8_t v184 = vld1q_s16(in + in_stride * 6 + i); + int16x8_t v185 = vaddq_s16(v183, v184); + int16x8_t v186_tmp = vqrdmulhq_n_s16(v185, 13573); + int16x8_t v186 = vaddq_s16(v186_tmp, v185); + int16x8_t v187 = vld1q_s16(in + in_stride * 74 + i); + int16x8_t v188 = vld1q_s16(in + in_stride * 70 + i); + int16x8_t v189 = vaddq_s16(v187, v188); + int16x8_t v190 = vld1q_s16(in + in_stride * 58 + i); + int16x8_t v191 = vld1q_s16(in + in_stride * 54 + i); + int16x8_t v192 = vaddq_s16(v190, v191); + int16x8_t v193 = vaddq_s16(v189, v192); + int16x8_t v194 = vaddq_s16(v186, v193); + int16x8_t v195 = vld1q_s16(in + in_stride * 42 + i); + int16x8_t v196 = vld1q_s16(in + in_stride * 38 + i); + int16x8_t v197 = vaddq_s16(v195, v196); + int16x8_t v198 = vld1q_s16(in + in_stride * 26 + i); + int16x8_t v199 = vld1q_s16(in + in_stride * 22 + i); + int16x8_t v200 = vaddq_s16(v198, v199); + int16x8_t v201 = vaddq_s16(v197, v200); + int16x8_t v202_tmp = vqrdmulhq_n_s16(v201, 13573); + int16x8_t v202 = vaddq_s16(v202_tmp, v201); + int16x8_t v203 = vld1q_s16(in + in_stride * 106 + i); + int16x8_t v204 = vld1q_s16(in + in_stride * 102 + i); + int16x8_t v205 = vaddq_s16(v203, v204); + int16x8_t v206 = vld1q_s16(in + in_stride * 90 + i); + int16x8_t v207 = vld1q_s16(in + in_stride * 86 + i); + int16x8_t v208 = vaddq_s16(v206, v207); + int16x8_t v209 = vaddq_s16(v205, v208); + int16x8_t v210 = vaddq_s16(v209, v201); + int16x8_t v211 = vaddq_s16(v202, v210); + int16x8_t v212 = vqrdmulhq_n_s16(v211, 17734); + int16x8_t v213 = vaddq_s16(v194, v212); + int16x8_t v214 = vaddq_s16(v200, v185); + int16x8_t v215_tmp = vqrdmulhq_n_s16(v214, 13573); + int16x8_t v215 = vaddq_s16(v215_tmp, v214); + int16x8_t v216 = vaddq_s16(v208, v189); + int16x8_t v217 = vaddq_s16(v192, v197); + int16x8_t v218 = vaddq_s16(v216, v217); + int16x8_t v219 = vaddq_s16(v215, v218); + int16x8_t v220 = vaddq_s16(v217, v214); + int16x8_t v221_tmp = vqrdmulhq_n_s16(v220, 13573); + int16x8_t v221 = vaddq_s16(v221_tmp, v220); + int16x8_t v222 = vld1q_s16(in + in_stride * 122 + i); + int16x8_t v223 = vld1q_s16(in + in_stride * 118 + i); + int16x8_t v224 = vaddq_s16(v222, v223); + int16x8_t v225 = vaddq_s16(v224, v205); + int16x8_t v226 = vaddq_s16(v225, v216); + int16x8_t v227 = vaddq_s16(v226, v220); + int16x8_t v228 = vaddq_s16(v221, v227); + int16x8_t v229 = vqrdmulhq_n_s16(v228, 17734); + int16x8_t v230 = vaddq_s16(v219, v229); + int16x8_t v231 = vqrdmulhq_n_s16(v230, 16705); + int16x8_t v232 = vaddq_s16(v213, v231); + int16x8_t v233 = vqrdmulhq_n_s16(v232, 16463); + int16x8_t v234 = vaddq_s16(v182, v233); + int16x8_t v235 = vaddq_s16(v184, v142); + int16x8_t v236_tmp = vqrdmulhq_n_s16(v235, 13573); + int16x8_t v236 = vaddq_s16(v236_tmp, v235); + int16x8_t v237 = vaddq_s16(v188, v144); + int16x8_t v238 = vaddq_s16(v145, v190); + int16x8_t v239 = vaddq_s16(v237, v238); + int16x8_t v240 = vaddq_s16(v236, v239); + int16x8_t v241 = vaddq_s16(v196, v148); + int16x8_t v242 = vaddq_s16(v149, v198); + int16x8_t v243 = vaddq_s16(v241, v242); + int16x8_t v244_tmp = vqrdmulhq_n_s16(v243, 13573); + int16x8_t v244 = vaddq_s16(v244_tmp, v243); + int16x8_t v245 = vaddq_s16(v204, v152); + int16x8_t v246 = vaddq_s16(v153, v206); + int16x8_t v247 = vaddq_s16(v245, v246); + int16x8_t v248 = vaddq_s16(v247, v243); + int16x8_t v249 = vaddq_s16(v244, v248); + int16x8_t v250 = vqrdmulhq_n_s16(v249, 17734); + int16x8_t v251 = vaddq_s16(v240, v250); + int16x8_t v252 = vaddq_s16(v199, v159); + int16x8_t v253 = vaddq_s16(v160, v183); + int16x8_t v254 = vaddq_s16(v252, v253); + int16x8_t v255_tmp = vqrdmulhq_n_s16(v254, 13573); + int16x8_t v255 = vaddq_s16(v255_tmp, v254); + int16x8_t v256 = vaddq_s16(v207, v163); + int16x8_t v257 = vaddq_s16(v164, v187); + int16x8_t v258 = vaddq_s16(v256, v257); + int16x8_t v259 = vaddq_s16(v191, v166); + int16x8_t v260 = vaddq_s16(v167, v195); + int16x8_t v261 = vaddq_s16(v259, v260); + int16x8_t v262 = vaddq_s16(v258, v261); + int16x8_t v263 = vaddq_s16(v255, v262); + int16x8_t v264 = vaddq_s16(v261, v254); + int16x8_t v265_tmp = vqrdmulhq_n_s16(v264, 13573); + int16x8_t v265 = vaddq_s16(v265_tmp, v264); + int16x8_t v266 = vaddq_s16(v223, v173); + int16x8_t v267 = vaddq_s16(v174, v203); + int16x8_t v268 = vaddq_s16(v266, v267); + int16x8_t v269 = vaddq_s16(v268, v258); + int16x8_t v270 = vaddq_s16(v269, v264); + int16x8_t v271 = vaddq_s16(v265, v270); + int16x8_t v272 = vqrdmulhq_n_s16(v271, 17734); + int16x8_t v273 = vaddq_s16(v263, v272); + int16x8_t v274 = vqrdmulhq_n_s16(v273, 16705); + int16x8_t v275 = vaddq_s16(v251, v274); + int16x8_t v276 = vaddq_s16(v253, v235); + int16x8_t v277_tmp = vqrdmulhq_n_s16(v276, 13573); + int16x8_t v277 = vaddq_s16(v277_tmp, v276); + int16x8_t v278 = vaddq_s16(v257, v237); + int16x8_t v279 = vaddq_s16(v238, v259); + int16x8_t v280 = vaddq_s16(v278, v279); + int16x8_t v281 = vaddq_s16(v277, v280); + int16x8_t v282 = vaddq_s16(v260, v241); + int16x8_t v283 = vaddq_s16(v242, v252); + int16x8_t v284 = vaddq_s16(v282, v283); + int16x8_t v285_tmp = vqrdmulhq_n_s16(v284, 13573); + int16x8_t v285 = vaddq_s16(v285_tmp, v284); + int16x8_t v286 = vaddq_s16(v267, v245); + int16x8_t v287 = vaddq_s16(v246, v256); + int16x8_t v288 = vaddq_s16(v286, v287); + int16x8_t v289 = vaddq_s16(v288, v284); + int16x8_t v290 = vaddq_s16(v285, v289); + int16x8_t v291 = vqrdmulhq_n_s16(v290, 17734); + int16x8_t v292 = vaddq_s16(v281, v291); + int16x8_t v293 = vaddq_s16(v283, v276); + int16x8_t v294_tmp = vqrdmulhq_n_s16(v293, 13573); + int16x8_t v294 = vaddq_s16(v294_tmp, v293); + int16x8_t v295 = vaddq_s16(v287, v278); + int16x8_t v296 = vaddq_s16(v279, v282); + int16x8_t v297 = vaddq_s16(v295, v296); + int16x8_t v298 = vaddq_s16(v294, v297); + int16x8_t v299 = vaddq_s16(v296, v293); + int16x8_t v300_tmp = vqrdmulhq_n_s16(v299, 13573); + int16x8_t v300 = vaddq_s16(v300_tmp, v299); + int16x8_t v301 = vld1q_s16(in + in_stride * 126 + i); + int16x8_t v302 = vaddq_s16(v301, v222); + int16x8_t v303 = vaddq_s16(v302, v266); + int16x8_t v304 = vaddq_s16(v303, v286); + int16x8_t v305 = vaddq_s16(v304, v295); + int16x8_t v306 = vaddq_s16(v305, v299); + int16x8_t v307 = vaddq_s16(v300, v306); + int16x8_t v308 = vqrdmulhq_n_s16(v307, 17734); + int16x8_t v309 = vaddq_s16(v298, v308); + int16x8_t v310 = vqrdmulhq_n_s16(v309, 16705); + int16x8_t v311 = vaddq_s16(v292, v310); + int16x8_t v312 = vqrdmulhq_n_s16(v311, 16463); + int16x8_t v313 = vaddq_s16(v275, v312); + int16x8_t v314 = vqrdmulhq_n_s16(v313, 16404); + int16x8_t v315 = vaddq_s16(v234, v314); + int16x8_t v316 = vqrdmulhq_n_s16(v315, 16389); + int16x8_t v317 = vaddq_s16(v141, v316); + int16x8_t v318 = vld1q_s16(in + in_stride * 1 + i); + int16x8_t v319_tmp = vqrdmulhq_n_s16(v318, 13573); + int16x8_t v319 = vaddq_s16(v319_tmp, v318); + int16x8_t v320 = vld1q_s16(in + in_stride * 65 + i); + int16x8_t v321 = vld1q_s16(in + in_stride * 63 + i); + int16x8_t v322 = vaddq_s16(v320, v321); + int16x8_t v323 = vaddq_s16(v319, v322); + int16x8_t v324 = vld1q_s16(in + in_stride * 33 + i); + int16x8_t v325 = vld1q_s16(in + in_stride * 31 + i); + int16x8_t v326 = vaddq_s16(v324, v325); + int16x8_t v327_tmp = vqrdmulhq_n_s16(v326, 13573); + int16x8_t v327 = vaddq_s16(v327_tmp, v326); + int16x8_t v328 = vld1q_s16(in + in_stride * 97 + i); + int16x8_t v329 = vld1q_s16(in + in_stride * 95 + i); + int16x8_t v330 = vaddq_s16(v328, v329); + int16x8_t v331 = vaddq_s16(v330, v326); + int16x8_t v332 = vaddq_s16(v327, v331); + int16x8_t v333 = vqrdmulhq_n_s16(v332, 17734); + int16x8_t v334 = vaddq_s16(v323, v333); + int16x8_t v335 = vld1q_s16(in + in_stride * 17 + i); + int16x8_t v336 = vld1q_s16(in + in_stride * 15 + i); + int16x8_t v337 = vaddq_s16(v335, v336); + int16x8_t v338_tmp = vqrdmulhq_n_s16(v337, 13573); + int16x8_t v338 = vaddq_s16(v338_tmp, v337); + int16x8_t v339 = vld1q_s16(in + in_stride * 81 + i); + int16x8_t v340 = vld1q_s16(in + in_stride * 79 + i); + int16x8_t v341 = vaddq_s16(v339, v340); + int16x8_t v342 = vld1q_s16(in + in_stride * 49 + i); + int16x8_t v343 = vld1q_s16(in + in_stride * 47 + i); + int16x8_t v344 = vaddq_s16(v342, v343); + int16x8_t v345 = vaddq_s16(v341, v344); + int16x8_t v346 = vaddq_s16(v338, v345); + int16x8_t v347 = vaddq_s16(v344, v337); + int16x8_t v348_tmp = vqrdmulhq_n_s16(v347, 13573); + int16x8_t v348 = vaddq_s16(v348_tmp, v347); + int16x8_t v349 = vld1q_s16(in + in_stride * 113 + i); + int16x8_t v350 = vld1q_s16(in + in_stride * 111 + i); + int16x8_t v351 = vaddq_s16(v349, v350); + int16x8_t v352 = vaddq_s16(v351, v341); + int16x8_t v353 = vaddq_s16(v352, v347); + int16x8_t v354 = vaddq_s16(v348, v353); + int16x8_t v355 = vqrdmulhq_n_s16(v354, 17734); + int16x8_t v356 = vaddq_s16(v346, v355); + int16x8_t v357 = vqrdmulhq_n_s16(v356, 16705); + int16x8_t v358 = vaddq_s16(v334, v357); + int16x8_t v359 = vld1q_s16(in + in_stride * 9 + i); + int16x8_t v360 = vld1q_s16(in + in_stride * 7 + i); + int16x8_t v361 = vaddq_s16(v359, v360); + int16x8_t v362_tmp = vqrdmulhq_n_s16(v361, 13573); + int16x8_t v362 = vaddq_s16(v362_tmp, v361); + int16x8_t v363 = vld1q_s16(in + in_stride * 73 + i); + int16x8_t v364 = vld1q_s16(in + in_stride * 71 + i); + int16x8_t v365 = vaddq_s16(v363, v364); + int16x8_t v366 = vld1q_s16(in + in_stride * 57 + i); + int16x8_t v367 = vld1q_s16(in + in_stride * 55 + i); + int16x8_t v368 = vaddq_s16(v366, v367); + int16x8_t v369 = vaddq_s16(v365, v368); + int16x8_t v370 = vaddq_s16(v362, v369); + int16x8_t v371 = vld1q_s16(in + in_stride * 41 + i); + int16x8_t v372 = vld1q_s16(in + in_stride * 39 + i); + int16x8_t v373 = vaddq_s16(v371, v372); + int16x8_t v374 = vld1q_s16(in + in_stride * 25 + i); + int16x8_t v375 = vld1q_s16(in + in_stride * 23 + i); + int16x8_t v376 = vaddq_s16(v374, v375); + int16x8_t v377 = vaddq_s16(v373, v376); + int16x8_t v378_tmp = vqrdmulhq_n_s16(v377, 13573); + int16x8_t v378 = vaddq_s16(v378_tmp, v377); + int16x8_t v379 = vld1q_s16(in + in_stride * 105 + i); + int16x8_t v380 = vld1q_s16(in + in_stride * 103 + i); + int16x8_t v381 = vaddq_s16(v379, v380); + int16x8_t v382 = vld1q_s16(in + in_stride * 89 + i); + int16x8_t v383 = vld1q_s16(in + in_stride * 87 + i); + int16x8_t v384 = vaddq_s16(v382, v383); + int16x8_t v385 = vaddq_s16(v381, v384); + int16x8_t v386 = vaddq_s16(v385, v377); + int16x8_t v387 = vaddq_s16(v378, v386); + int16x8_t v388 = vqrdmulhq_n_s16(v387, 17734); + int16x8_t v389 = vaddq_s16(v370, v388); + int16x8_t v390 = vaddq_s16(v376, v361); + int16x8_t v391_tmp = vqrdmulhq_n_s16(v390, 13573); + int16x8_t v391 = vaddq_s16(v391_tmp, v390); + int16x8_t v392 = vaddq_s16(v384, v365); + int16x8_t v393 = vaddq_s16(v368, v373); + int16x8_t v394 = vaddq_s16(v392, v393); + int16x8_t v395 = vaddq_s16(v391, v394); + int16x8_t v396 = vaddq_s16(v393, v390); + int16x8_t v397_tmp = vqrdmulhq_n_s16(v396, 13573); + int16x8_t v397 = vaddq_s16(v397_tmp, v396); + int16x8_t v398 = vld1q_s16(in + in_stride * 121 + i); + int16x8_t v399 = vld1q_s16(in + in_stride * 119 + i); + int16x8_t v400 = vaddq_s16(v398, v399); + int16x8_t v401 = vaddq_s16(v400, v381); + int16x8_t v402 = vaddq_s16(v401, v392); + int16x8_t v403 = vaddq_s16(v402, v396); + int16x8_t v404 = vaddq_s16(v397, v403); + int16x8_t v405 = vqrdmulhq_n_s16(v404, 17734); + int16x8_t v406 = vaddq_s16(v395, v405); + int16x8_t v407 = vqrdmulhq_n_s16(v406, 16705); + int16x8_t v408 = vaddq_s16(v389, v407); + int16x8_t v409 = vqrdmulhq_n_s16(v408, 16463); + int16x8_t v410 = vaddq_s16(v358, v409); + int16x8_t v411 = vld1q_s16(in + in_stride * 5 + i); + int16x8_t v412 = vld1q_s16(in + in_stride * 3 + i); + int16x8_t v413 = vaddq_s16(v411, v412); + int16x8_t v414_tmp = vqrdmulhq_n_s16(v413, 13573); + int16x8_t v414 = vaddq_s16(v414_tmp, v413); + int16x8_t v415 = vld1q_s16(in + in_stride * 69 + i); + int16x8_t v416 = vld1q_s16(in + in_stride * 67 + i); + int16x8_t v417 = vaddq_s16(v415, v416); + int16x8_t v418 = vld1q_s16(in + in_stride * 61 + i); + int16x8_t v419 = vld1q_s16(in + in_stride * 59 + i); + int16x8_t v420 = vaddq_s16(v418, v419); + int16x8_t v421 = vaddq_s16(v417, v420); + int16x8_t v422 = vaddq_s16(v414, v421); + int16x8_t v423 = vld1q_s16(in + in_stride * 37 + i); + int16x8_t v424 = vld1q_s16(in + in_stride * 35 + i); + int16x8_t v425 = vaddq_s16(v423, v424); + int16x8_t v426 = vld1q_s16(in + in_stride * 29 + i); + int16x8_t v427 = vld1q_s16(in + in_stride * 27 + i); + int16x8_t v428 = vaddq_s16(v426, v427); + int16x8_t v429 = vaddq_s16(v425, v428); + int16x8_t v430_tmp = vqrdmulhq_n_s16(v429, 13573); + int16x8_t v430 = vaddq_s16(v430_tmp, v429); + int16x8_t v431 = vld1q_s16(in + in_stride * 101 + i); + int16x8_t v432 = vld1q_s16(in + in_stride * 99 + i); + int16x8_t v433 = vaddq_s16(v431, v432); + int16x8_t v434 = vld1q_s16(in + in_stride * 93 + i); + int16x8_t v435 = vld1q_s16(in + in_stride * 91 + i); + int16x8_t v436 = vaddq_s16(v434, v435); + int16x8_t v437 = vaddq_s16(v433, v436); + int16x8_t v438 = vaddq_s16(v437, v429); + int16x8_t v439 = vaddq_s16(v430, v438); + int16x8_t v440 = vqrdmulhq_n_s16(v439, 17734); + int16x8_t v441 = vaddq_s16(v422, v440); + int16x8_t v442 = vld1q_s16(in + in_stride * 21 + i); + int16x8_t v443 = vld1q_s16(in + in_stride * 19 + i); + int16x8_t v444 = vaddq_s16(v442, v443); + int16x8_t v445 = vld1q_s16(in + in_stride * 13 + i); + int16x8_t v446 = vld1q_s16(in + in_stride * 11 + i); + int16x8_t v447 = vaddq_s16(v445, v446); + int16x8_t v448 = vaddq_s16(v444, v447); + int16x8_t v449_tmp = vqrdmulhq_n_s16(v448, 13573); + int16x8_t v449 = vaddq_s16(v449_tmp, v448); + int16x8_t v450 = vld1q_s16(in + in_stride * 85 + i); + int16x8_t v451 = vld1q_s16(in + in_stride * 83 + i); + int16x8_t v452 = vaddq_s16(v450, v451); + int16x8_t v453 = vld1q_s16(in + in_stride * 77 + i); + int16x8_t v454 = vld1q_s16(in + in_stride * 75 + i); + int16x8_t v455 = vaddq_s16(v453, v454); + int16x8_t v456 = vaddq_s16(v452, v455); + int16x8_t v457 = vld1q_s16(in + in_stride * 53 + i); + int16x8_t v458 = vld1q_s16(in + in_stride * 51 + i); + int16x8_t v459 = vaddq_s16(v457, v458); + int16x8_t v460 = vld1q_s16(in + in_stride * 45 + i); + int16x8_t v461 = vld1q_s16(in + in_stride * 43 + i); + int16x8_t v462 = vaddq_s16(v460, v461); + int16x8_t v463 = vaddq_s16(v459, v462); + int16x8_t v464 = vaddq_s16(v456, v463); + int16x8_t v465 = vaddq_s16(v449, v464); + int16x8_t v466 = vaddq_s16(v463, v448); + int16x8_t v467_tmp = vqrdmulhq_n_s16(v466, 13573); + int16x8_t v467 = vaddq_s16(v467_tmp, v466); + int16x8_t v468 = vld1q_s16(in + in_stride * 117 + i); + int16x8_t v469 = vld1q_s16(in + in_stride * 115 + i); + int16x8_t v470 = vaddq_s16(v468, v469); + int16x8_t v471 = vld1q_s16(in + in_stride * 109 + i); + int16x8_t v472 = vld1q_s16(in + in_stride * 107 + i); + int16x8_t v473 = vaddq_s16(v471, v472); + int16x8_t v474 = vaddq_s16(v470, v473); + int16x8_t v475 = vaddq_s16(v474, v456); + int16x8_t v476 = vaddq_s16(v475, v466); + int16x8_t v477 = vaddq_s16(v467, v476); + int16x8_t v478 = vqrdmulhq_n_s16(v477, 17734); + int16x8_t v479 = vaddq_s16(v465, v478); + int16x8_t v480 = vqrdmulhq_n_s16(v479, 16705); + int16x8_t v481 = vaddq_s16(v441, v480); + int16x8_t v482 = vaddq_s16(v447, v413); + int16x8_t v483_tmp = vqrdmulhq_n_s16(v482, 13573); + int16x8_t v483 = vaddq_s16(v483_tmp, v482); + int16x8_t v484 = vaddq_s16(v455, v417); + int16x8_t v485 = vaddq_s16(v420, v459); + int16x8_t v486 = vaddq_s16(v484, v485); + int16x8_t v487 = vaddq_s16(v483, v486); + int16x8_t v488 = vaddq_s16(v462, v425); + int16x8_t v489 = vaddq_s16(v428, v444); + int16x8_t v490 = vaddq_s16(v488, v489); + int16x8_t v491_tmp = vqrdmulhq_n_s16(v490, 13573); + int16x8_t v491 = vaddq_s16(v491_tmp, v490); + int16x8_t v492 = vaddq_s16(v473, v433); + int16x8_t v493 = vaddq_s16(v436, v452); + int16x8_t v494 = vaddq_s16(v492, v493); + int16x8_t v495 = vaddq_s16(v494, v490); + int16x8_t v496 = vaddq_s16(v491, v495); + int16x8_t v497 = vqrdmulhq_n_s16(v496, 17734); + int16x8_t v498 = vaddq_s16(v487, v497); + int16x8_t v499 = vaddq_s16(v489, v482); + int16x8_t v500_tmp = vqrdmulhq_n_s16(v499, 13573); + int16x8_t v500 = vaddq_s16(v500_tmp, v499); + int16x8_t v501 = vaddq_s16(v493, v484); + int16x8_t v502 = vaddq_s16(v485, v488); + int16x8_t v503 = vaddq_s16(v501, v502); + int16x8_t v504 = vaddq_s16(v500, v503); + int16x8_t v505 = vaddq_s16(v502, v499); + int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 13573); + int16x8_t v506 = vaddq_s16(v506_tmp, v505); + int16x8_t v507 = vld1q_s16(in + in_stride * 125 + i); + int16x8_t v508 = vld1q_s16(in + in_stride * 123 + i); + int16x8_t v509 = vaddq_s16(v507, v508); + int16x8_t v510 = vaddq_s16(v509, v470); + int16x8_t v511 = vaddq_s16(v510, v492); + int16x8_t v512 = vaddq_s16(v511, v501); + int16x8_t v513 = vaddq_s16(v512, v505); + int16x8_t v514 = vaddq_s16(v506, v513); + int16x8_t v515 = vqrdmulhq_n_s16(v514, 17734); + int16x8_t v516 = vaddq_s16(v504, v515); + int16x8_t v517 = vqrdmulhq_n_s16(v516, 16705); + int16x8_t v518 = vaddq_s16(v498, v517); + int16x8_t v519 = vqrdmulhq_n_s16(v518, 16463); + int16x8_t v520 = vaddq_s16(v481, v519); + int16x8_t v521 = vqrdmulhq_n_s16(v520, 16404); + int16x8_t v522 = vaddq_s16(v410, v521); + int16x8_t v523 = vaddq_s16(v412, v318); + int16x8_t v524_tmp = vqrdmulhq_n_s16(v523, 13573); + int16x8_t v524 = vaddq_s16(v524_tmp, v523); + int16x8_t v525 = vaddq_s16(v416, v320); + int16x8_t v526 = vaddq_s16(v321, v418); + int16x8_t v527 = vaddq_s16(v525, v526); + int16x8_t v528 = vaddq_s16(v524, v527); + int16x8_t v529 = vaddq_s16(v424, v324); + int16x8_t v530 = vaddq_s16(v325, v426); + int16x8_t v531 = vaddq_s16(v529, v530); + int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 13573); + int16x8_t v532 = vaddq_s16(v532_tmp, v531); + int16x8_t v533 = vaddq_s16(v432, v328); + int16x8_t v534 = vaddq_s16(v329, v434); + int16x8_t v535 = vaddq_s16(v533, v534); + int16x8_t v536 = vaddq_s16(v535, v531); + int16x8_t v537 = vaddq_s16(v532, v536); + int16x8_t v538 = vqrdmulhq_n_s16(v537, 17734); + int16x8_t v539 = vaddq_s16(v528, v538); + int16x8_t v540 = vaddq_s16(v443, v335); + int16x8_t v541 = vaddq_s16(v336, v445); + int16x8_t v542 = vaddq_s16(v540, v541); + int16x8_t v543_tmp = vqrdmulhq_n_s16(v542, 13573); + int16x8_t v543 = vaddq_s16(v543_tmp, v542); + int16x8_t v544 = vaddq_s16(v451, v339); + int16x8_t v545 = vaddq_s16(v340, v453); + int16x8_t v546 = vaddq_s16(v544, v545); + int16x8_t v547 = vaddq_s16(v458, v342); + int16x8_t v548 = vaddq_s16(v343, v460); + int16x8_t v549 = vaddq_s16(v547, v548); + int16x8_t v550 = vaddq_s16(v546, v549); + int16x8_t v551 = vaddq_s16(v543, v550); + int16x8_t v552 = vaddq_s16(v549, v542); + int16x8_t v553_tmp = vqrdmulhq_n_s16(v552, 13573); + int16x8_t v553 = vaddq_s16(v553_tmp, v552); + int16x8_t v554 = vaddq_s16(v469, v349); + int16x8_t v555 = vaddq_s16(v350, v471); + int16x8_t v556 = vaddq_s16(v554, v555); + int16x8_t v557 = vaddq_s16(v556, v546); + int16x8_t v558 = vaddq_s16(v557, v552); + int16x8_t v559 = vaddq_s16(v553, v558); + int16x8_t v560 = vqrdmulhq_n_s16(v559, 17734); + int16x8_t v561 = vaddq_s16(v551, v560); + int16x8_t v562 = vqrdmulhq_n_s16(v561, 16705); + int16x8_t v563 = vaddq_s16(v539, v562); + int16x8_t v564 = vaddq_s16(v446, v359); + int16x8_t v565 = vaddq_s16(v360, v411); + int16x8_t v566 = vaddq_s16(v564, v565); + int16x8_t v567_tmp = vqrdmulhq_n_s16(v566, 13573); + int16x8_t v567 = vaddq_s16(v567_tmp, v566); + int16x8_t v568 = vaddq_s16(v454, v363); + int16x8_t v569 = vaddq_s16(v364, v415); + int16x8_t v570 = vaddq_s16(v568, v569); + int16x8_t v571 = vaddq_s16(v419, v366); + int16x8_t v572 = vaddq_s16(v367, v457); + int16x8_t v573 = vaddq_s16(v571, v572); + int16x8_t v574 = vaddq_s16(v570, v573); + int16x8_t v575 = vaddq_s16(v567, v574); + int16x8_t v576 = vaddq_s16(v461, v371); + int16x8_t v577 = vaddq_s16(v372, v423); + int16x8_t v578 = vaddq_s16(v576, v577); + int16x8_t v579 = vaddq_s16(v427, v374); + int16x8_t v580 = vaddq_s16(v375, v442); + int16x8_t v581 = vaddq_s16(v579, v580); + int16x8_t v582 = vaddq_s16(v578, v581); + int16x8_t v583_tmp = vqrdmulhq_n_s16(v582, 13573); + int16x8_t v583 = vaddq_s16(v583_tmp, v582); + int16x8_t v584 = vaddq_s16(v472, v379); + int16x8_t v585 = vaddq_s16(v380, v431); + int16x8_t v586 = vaddq_s16(v584, v585); + int16x8_t v587 = vaddq_s16(v435, v382); + int16x8_t v588 = vaddq_s16(v383, v450); + int16x8_t v589 = vaddq_s16(v587, v588); + int16x8_t v590 = vaddq_s16(v586, v589); + int16x8_t v591 = vaddq_s16(v590, v582); + int16x8_t v592 = vaddq_s16(v583, v591); + int16x8_t v593 = vqrdmulhq_n_s16(v592, 17734); + int16x8_t v594 = vaddq_s16(v575, v593); + int16x8_t v595 = vaddq_s16(v581, v566); + int16x8_t v596_tmp = vqrdmulhq_n_s16(v595, 13573); + int16x8_t v596 = vaddq_s16(v596_tmp, v595); + int16x8_t v597 = vaddq_s16(v589, v570); + int16x8_t v598 = vaddq_s16(v573, v578); + int16x8_t v599 = vaddq_s16(v597, v598); + int16x8_t v600 = vaddq_s16(v596, v599); + int16x8_t v601 = vaddq_s16(v598, v595); + int16x8_t v602_tmp = vqrdmulhq_n_s16(v601, 13573); + int16x8_t v602 = vaddq_s16(v602_tmp, v601); + int16x8_t v603 = vaddq_s16(v508, v398); + int16x8_t v604 = vaddq_s16(v399, v468); + int16x8_t v605 = vaddq_s16(v603, v604); + int16x8_t v606 = vaddq_s16(v605, v586); + int16x8_t v607 = vaddq_s16(v606, v597); + int16x8_t v608 = vaddq_s16(v607, v601); + int16x8_t v609 = vaddq_s16(v602, v608); + int16x8_t v610 = vqrdmulhq_n_s16(v609, 17734); + int16x8_t v611 = vaddq_s16(v600, v610); + int16x8_t v612 = vqrdmulhq_n_s16(v611, 16705); + int16x8_t v613 = vaddq_s16(v594, v612); + int16x8_t v614 = vqrdmulhq_n_s16(v613, 16463); + int16x8_t v615 = vaddq_s16(v563, v614); + int16x8_t v616 = vaddq_s16(v565, v523); + int16x8_t v617_tmp = vqrdmulhq_n_s16(v616, 13573); + int16x8_t v617 = vaddq_s16(v617_tmp, v616); + int16x8_t v618 = vaddq_s16(v569, v525); + int16x8_t v619 = vaddq_s16(v526, v571); + int16x8_t v620 = vaddq_s16(v618, v619); + int16x8_t v621 = vaddq_s16(v617, v620); + int16x8_t v622 = vaddq_s16(v577, v529); + int16x8_t v623 = vaddq_s16(v530, v579); + int16x8_t v624 = vaddq_s16(v622, v623); + int16x8_t v625_tmp = vqrdmulhq_n_s16(v624, 13573); + int16x8_t v625 = vaddq_s16(v625_tmp, v624); + int16x8_t v626 = vaddq_s16(v585, v533); + int16x8_t v627 = vaddq_s16(v534, v587); + int16x8_t v628 = vaddq_s16(v626, v627); + int16x8_t v629 = vaddq_s16(v628, v624); + int16x8_t v630 = vaddq_s16(v625, v629); + int16x8_t v631 = vqrdmulhq_n_s16(v630, 17734); + int16x8_t v632 = vaddq_s16(v621, v631); + int16x8_t v633 = vaddq_s16(v580, v540); + int16x8_t v634 = vaddq_s16(v541, v564); + int16x8_t v635 = vaddq_s16(v633, v634); + int16x8_t v636_tmp = vqrdmulhq_n_s16(v635, 13573); + int16x8_t v636 = vaddq_s16(v636_tmp, v635); + int16x8_t v637 = vaddq_s16(v588, v544); + int16x8_t v638 = vaddq_s16(v545, v568); + int16x8_t v639 = vaddq_s16(v637, v638); + int16x8_t v640 = vaddq_s16(v572, v547); + int16x8_t v641 = vaddq_s16(v548, v576); + int16x8_t v642 = vaddq_s16(v640, v641); + int16x8_t v643 = vaddq_s16(v639, v642); + int16x8_t v644 = vaddq_s16(v636, v643); + int16x8_t v645 = vaddq_s16(v642, v635); + int16x8_t v646_tmp = vqrdmulhq_n_s16(v645, 13573); + int16x8_t v646 = vaddq_s16(v646_tmp, v645); + int16x8_t v647 = vaddq_s16(v604, v554); + int16x8_t v648 = vaddq_s16(v555, v584); + int16x8_t v649 = vaddq_s16(v647, v648); + int16x8_t v650 = vaddq_s16(v649, v639); + int16x8_t v651 = vaddq_s16(v650, v645); + int16x8_t v652 = vaddq_s16(v646, v651); + int16x8_t v653 = vqrdmulhq_n_s16(v652, 17734); + int16x8_t v654 = vaddq_s16(v644, v653); + int16x8_t v655 = vqrdmulhq_n_s16(v654, 16705); + int16x8_t v656 = vaddq_s16(v632, v655); + int16x8_t v657 = vaddq_s16(v634, v616); + int16x8_t v658_tmp = vqrdmulhq_n_s16(v657, 13573); + int16x8_t v658 = vaddq_s16(v658_tmp, v657); + int16x8_t v659 = vaddq_s16(v638, v618); + int16x8_t v660 = vaddq_s16(v619, v640); + int16x8_t v661 = vaddq_s16(v659, v660); + int16x8_t v662 = vaddq_s16(v658, v661); + int16x8_t v663 = vaddq_s16(v641, v622); + int16x8_t v664 = vaddq_s16(v623, v633); + int16x8_t v665 = vaddq_s16(v663, v664); + int16x8_t v666_tmp = vqrdmulhq_n_s16(v665, 13573); + int16x8_t v666 = vaddq_s16(v666_tmp, v665); + int16x8_t v667 = vaddq_s16(v648, v626); + int16x8_t v668 = vaddq_s16(v627, v637); + int16x8_t v669 = vaddq_s16(v667, v668); + int16x8_t v670 = vaddq_s16(v669, v665); + int16x8_t v671 = vaddq_s16(v666, v670); + int16x8_t v672 = vqrdmulhq_n_s16(v671, 17734); + int16x8_t v673 = vaddq_s16(v662, v672); + int16x8_t v674 = vaddq_s16(v664, v657); + int16x8_t v675_tmp = vqrdmulhq_n_s16(v674, 13573); + int16x8_t v675 = vaddq_s16(v675_tmp, v674); + int16x8_t v676 = vaddq_s16(v668, v659); + int16x8_t v677 = vaddq_s16(v660, v663); + int16x8_t v678 = vaddq_s16(v676, v677); + int16x8_t v679 = vaddq_s16(v675, v678); + int16x8_t v680 = vaddq_s16(v677, v674); + int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 13573); + int16x8_t v681 = vaddq_s16(v681_tmp, v680); + int16x8_t v682 = vld1q_s16(in + in_stride * 127 + i); + int16x8_t v683 = vaddq_s16(v682, v507); + int16x8_t v684 = vaddq_s16(v683, v603); + int16x8_t v685 = vaddq_s16(v684, v647); + int16x8_t v686 = vaddq_s16(v685, v667); + int16x8_t v687 = vaddq_s16(v686, v676); + int16x8_t v688 = vaddq_s16(v687, v680); + int16x8_t v689 = vaddq_s16(v681, v688); + int16x8_t v690 = vqrdmulhq_n_s16(v689, 17734); + int16x8_t v691 = vaddq_s16(v679, v690); + int16x8_t v692 = vqrdmulhq_n_s16(v691, 16705); + int16x8_t v693 = vaddq_s16(v673, v692); + int16x8_t v694 = vqrdmulhq_n_s16(v693, 16463); + int16x8_t v695 = vaddq_s16(v656, v694); + int16x8_t v696 = vqrdmulhq_n_s16(v695, 16404); + int16x8_t v697 = vaddq_s16(v615, v696); + int16x8_t v698 = vqrdmulhq_n_s16(v697, 16389); + int16x8_t v699 = vaddq_s16(v522, v698); + int16x8_t v700 = vqrdmulhq_n_s16(v699, 16385); + int16x8_t v701 = vaddq_s16(v317, v700); + int16x8_t v702 = vsubq_s16(v0, v1); + int16x8_t v703 = vsubq_s16(v4, v6); + int16x8_t v704_tmp = vqrdmulhq_n_s16(v703, 10045); + int16x8_t v704 = vaddq_s16(v704_tmp, v703); + int16x8_t v705 = vaddq_s16(v702, v704); + int16x8_t v706 = vsubq_s16(v11, v14); + int16x8_t v707 = vsubq_s16(v17, v20); + int16x8_t v708_tmp = vqrdmulhq_n_s16(v707, 10045); + int16x8_t v708 = vaddq_s16(v708_tmp, v707); + int16x8_t v709 = vaddq_s16(v706, v708); + int16x8_t v710 = vqrdmulhq_n_s16(v709, 19705); + int16x8_t v711 = vaddq_s16(v705, v710); + int16x8_t v712 = vsubq_s16(v27, v30); + int16x8_t v713 = vsubq_s16(v35, v39); + int16x8_t v714_tmp = vqrdmulhq_n_s16(v713, 10045); + int16x8_t v714 = vaddq_s16(v714_tmp, v713); + int16x8_t v715 = vaddq_s16(v712, v714); + int16x8_t v716 = vsubq_s16(v44, v47); + int16x8_t v717 = vsubq_s16(v50, v54); + int16x8_t v718_tmp = vqrdmulhq_n_s16(v717, 10045); + int16x8_t v718 = vaddq_s16(v718_tmp, v717); + int16x8_t v719 = vaddq_s16(v716, v718); + int16x8_t v720 = vqrdmulhq_n_s16(v719, 19705); + int16x8_t v721 = vaddq_s16(v715, v720); + int16x8_t v722 = vqrdmulhq_n_s16(v721, 17121); + int16x8_t v723 = vaddq_s16(v711, v722); + int16x8_t v724 = vsubq_s16(v63, v66); + int16x8_t v725 = vsubq_s16(v71, v75); + int16x8_t v726_tmp = vqrdmulhq_n_s16(v725, 10045); + int16x8_t v726 = vaddq_s16(v726_tmp, v725); + int16x8_t v727 = vaddq_s16(v724, v726); + int16x8_t v728 = vsubq_s16(v82, v89); + int16x8_t v729 = vsubq_s16(v92, v97); + int16x8_t v730_tmp = vqrdmulhq_n_s16(v729, 10045); + int16x8_t v730 = vaddq_s16(v730_tmp, v729); + int16x8_t v731 = vaddq_s16(v728, v730); + int16x8_t v732 = vqrdmulhq_n_s16(v731, 19705); + int16x8_t v733 = vaddq_s16(v727, v732); + int16x8_t v734 = vsubq_s16(v104, v107); + int16x8_t v735 = vsubq_s16(v112, v116); + int16x8_t v736_tmp = vqrdmulhq_n_s16(v735, 10045); + int16x8_t v736 = vaddq_s16(v736_tmp, v735); + int16x8_t v737 = vaddq_s16(v734, v736); + int16x8_t v738 = vsubq_s16(v121, v124); + int16x8_t v739 = vsubq_s16(v127, v132); + int16x8_t v740_tmp = vqrdmulhq_n_s16(v739, 10045); + int16x8_t v740 = vaddq_s16(v740_tmp, v739); + int16x8_t v741 = vaddq_s16(v738, v740); + int16x8_t v742 = vqrdmulhq_n_s16(v741, 19705); + int16x8_t v743 = vaddq_s16(v737, v742); + int16x8_t v744 = vqrdmulhq_n_s16(v743, 17121); + int16x8_t v745 = vaddq_s16(v733, v744); + int16x8_t v746 = vqrdmulhq_n_s16(v745, 16563); + int16x8_t v747 = vaddq_s16(v723, v746); + int16x8_t v748 = vsubq_s16(v143, v146); + int16x8_t v749 = vsubq_s16(v151, v155); + int16x8_t v750_tmp = vqrdmulhq_n_s16(v749, 10045); + int16x8_t v750 = vaddq_s16(v750_tmp, v749); + int16x8_t v751 = vaddq_s16(v748, v750); + int16x8_t v752 = vsubq_s16(v162, v169); + int16x8_t v753 = vqrdmulhq_n_s16(v752, 19705); + int16x8_t v754 = vsubq_s16(v172, v177); + int16x8_t v755 = vqrdmulhq_n_s16(v754, 25746); + int16x8_t v756 = vaddq_s16(v753, v755); + int16x8_t v757 = vaddq_s16(v751, v756); + int16x8_t v758 = vsubq_s16(v186, v193); + int16x8_t v759 = vsubq_s16(v202, v210); + int16x8_t v760_tmp = vqrdmulhq_n_s16(v759, 10045); + int16x8_t v760 = vaddq_s16(v760_tmp, v759); + int16x8_t v761 = vaddq_s16(v758, v760); + int16x8_t v762 = vsubq_s16(v215, v218); + int16x8_t v763 = vsubq_s16(v221, v227); + int16x8_t v764_tmp = vqrdmulhq_n_s16(v763, 10045); + int16x8_t v764 = vaddq_s16(v764_tmp, v763); + int16x8_t v765 = vaddq_s16(v762, v764); + int16x8_t v766 = vqrdmulhq_n_s16(v765, 19705); + int16x8_t v767 = vaddq_s16(v761, v766); + int16x8_t v768 = vqrdmulhq_n_s16(v767, 17121); + int16x8_t v769 = vaddq_s16(v757, v768); + int16x8_t v770 = vsubq_s16(v236, v239); + int16x8_t v771 = vsubq_s16(v244, v248); + int16x8_t v772_tmp = vqrdmulhq_n_s16(v771, 10045); + int16x8_t v772 = vaddq_s16(v772_tmp, v771); + int16x8_t v773 = vaddq_s16(v770, v772); + int16x8_t v774 = vsubq_s16(v255, v262); + int16x8_t v775 = vsubq_s16(v265, v270); + int16x8_t v776_tmp = vqrdmulhq_n_s16(v775, 10045); + int16x8_t v776 = vaddq_s16(v776_tmp, v775); + int16x8_t v777 = vaddq_s16(v774, v776); + int16x8_t v778 = vqrdmulhq_n_s16(v777, 19705); + int16x8_t v779 = vaddq_s16(v773, v778); + int16x8_t v780 = vsubq_s16(v277, v280); + int16x8_t v781 = vsubq_s16(v285, v289); + int16x8_t v782_tmp = vqrdmulhq_n_s16(v781, 10045); + int16x8_t v782 = vaddq_s16(v782_tmp, v781); + int16x8_t v783 = vaddq_s16(v780, v782); + int16x8_t v784 = vsubq_s16(v294, v297); + int16x8_t v785 = vsubq_s16(v300, v306); + int16x8_t v786_tmp = vqrdmulhq_n_s16(v785, 10045); + int16x8_t v786 = vaddq_s16(v786_tmp, v785); + int16x8_t v787 = vaddq_s16(v784, v786); + int16x8_t v788 = vqrdmulhq_n_s16(v787, 19705); + int16x8_t v789 = vaddq_s16(v783, v788); + int16x8_t v790 = vqrdmulhq_n_s16(v789, 17121); + int16x8_t v791 = vaddq_s16(v779, v790); + int16x8_t v792 = vqrdmulhq_n_s16(v791, 16563); + int16x8_t v793 = vaddq_s16(v769, v792); + int16x8_t v794 = vqrdmulhq_n_s16(v793, 16429); + int16x8_t v795 = vaddq_s16(v747, v794); + int16x8_t v796 = vsubq_s16(v319, v322); + int16x8_t v797 = vsubq_s16(v327, v331); + int16x8_t v798_tmp = vqrdmulhq_n_s16(v797, 10045); + int16x8_t v798 = vaddq_s16(v798_tmp, v797); + int16x8_t v799 = vaddq_s16(v796, v798); + int16x8_t v800 = vsubq_s16(v338, v345); + int16x8_t v801 = vsubq_s16(v348, v353); + int16x8_t v802_tmp = vqrdmulhq_n_s16(v801, 10045); + int16x8_t v802 = vaddq_s16(v802_tmp, v801); + int16x8_t v803 = vaddq_s16(v800, v802); + int16x8_t v804 = vqrdmulhq_n_s16(v803, 19705); + int16x8_t v805 = vaddq_s16(v799, v804); + int16x8_t v806 = vsubq_s16(v362, v369); + int16x8_t v807 = vsubq_s16(v378, v386); + int16x8_t v808_tmp = vqrdmulhq_n_s16(v807, 10045); + int16x8_t v808 = vaddq_s16(v808_tmp, v807); + int16x8_t v809 = vaddq_s16(v806, v808); + int16x8_t v810 = vsubq_s16(v391, v394); + int16x8_t v811 = vsubq_s16(v397, v403); + int16x8_t v812_tmp = vqrdmulhq_n_s16(v811, 10045); + int16x8_t v812 = vaddq_s16(v812_tmp, v811); + int16x8_t v813 = vaddq_s16(v810, v812); + int16x8_t v814 = vqrdmulhq_n_s16(v813, 19705); + int16x8_t v815 = vaddq_s16(v809, v814); + int16x8_t v816 = vqrdmulhq_n_s16(v815, 17121); + int16x8_t v817 = vaddq_s16(v805, v816); + int16x8_t v818 = vsubq_s16(v414, v421); + int16x8_t v819 = vsubq_s16(v430, v438); + int16x8_t v820_tmp = vqrdmulhq_n_s16(v819, 10045); + int16x8_t v820 = vaddq_s16(v820_tmp, v819); + int16x8_t v821 = vaddq_s16(v818, v820); + int16x8_t v822 = vsubq_s16(v449, v464); + int16x8_t v823 = vsubq_s16(v467, v476); + int16x8_t v824_tmp = vqrdmulhq_n_s16(v823, 10045); + int16x8_t v824 = vaddq_s16(v824_tmp, v823); + int16x8_t v825 = vaddq_s16(v822, v824); + int16x8_t v826 = vqrdmulhq_n_s16(v825, 19705); + int16x8_t v827 = vaddq_s16(v821, v826); + int16x8_t v828 = vsubq_s16(v483, v486); + int16x8_t v829 = vsubq_s16(v491, v495); + int16x8_t v830_tmp = vqrdmulhq_n_s16(v829, 10045); + int16x8_t v830 = vaddq_s16(v830_tmp, v829); + int16x8_t v831 = vaddq_s16(v828, v830); + int16x8_t v832 = vsubq_s16(v500, v503); + int16x8_t v833 = vsubq_s16(v506, v513); + int16x8_t v834_tmp = vqrdmulhq_n_s16(v833, 10045); + int16x8_t v834 = vaddq_s16(v834_tmp, v833); + int16x8_t v835 = vaddq_s16(v832, v834); + int16x8_t v836 = vqrdmulhq_n_s16(v835, 19705); + int16x8_t v837 = vaddq_s16(v831, v836); + int16x8_t v838 = vqrdmulhq_n_s16(v837, 17121); + int16x8_t v839 = vaddq_s16(v827, v838); + int16x8_t v840 = vqrdmulhq_n_s16(v839, 16563); + int16x8_t v841 = vaddq_s16(v817, v840); + int16x8_t v842 = vsubq_s16(v524, v527); + int16x8_t v843 = vsubq_s16(v532, v536); + int16x8_t v844_tmp = vqrdmulhq_n_s16(v843, 10045); + int16x8_t v844 = vaddq_s16(v844_tmp, v843); + int16x8_t v845 = vaddq_s16(v842, v844); + int16x8_t v846 = vsubq_s16(v543, v550); + int16x8_t v847 = vsubq_s16(v553, v558); + int16x8_t v848_tmp = vqrdmulhq_n_s16(v847, 10045); + int16x8_t v848 = vaddq_s16(v848_tmp, v847); + int16x8_t v849 = vaddq_s16(v846, v848); + int16x8_t v850 = vqrdmulhq_n_s16(v849, 19705); + int16x8_t v851 = vaddq_s16(v845, v850); + int16x8_t v852 = vsubq_s16(v567, v574); + int16x8_t v853 = vsubq_s16(v583, v591); + int16x8_t v854_tmp = vqrdmulhq_n_s16(v853, 10045); + int16x8_t v854 = vaddq_s16(v854_tmp, v853); + int16x8_t v855 = vaddq_s16(v852, v854); + int16x8_t v856 = vsubq_s16(v596, v599); + int16x8_t v857 = vsubq_s16(v602, v608); + int16x8_t v858_tmp = vqrdmulhq_n_s16(v857, 10045); + int16x8_t v858 = vaddq_s16(v858_tmp, v857); + int16x8_t v859 = vaddq_s16(v856, v858); + int16x8_t v860 = vqrdmulhq_n_s16(v859, 19705); + int16x8_t v861 = vaddq_s16(v855, v860); + int16x8_t v862 = vqrdmulhq_n_s16(v861, 17121); + int16x8_t v863 = vaddq_s16(v851, v862); + int16x8_t v864 = vsubq_s16(v617, v620); + int16x8_t v865 = vsubq_s16(v625, v629); + int16x8_t v866_tmp = vqrdmulhq_n_s16(v865, 10045); + int16x8_t v866 = vaddq_s16(v866_tmp, v865); + int16x8_t v867 = vaddq_s16(v864, v866); + int16x8_t v868 = vsubq_s16(v636, v643); + int16x8_t v869 = vsubq_s16(v646, v651); + int16x8_t v870_tmp = vqrdmulhq_n_s16(v869, 10045); + int16x8_t v870 = vaddq_s16(v870_tmp, v869); + int16x8_t v871 = vaddq_s16(v868, v870); + int16x8_t v872 = vqrdmulhq_n_s16(v871, 19705); + int16x8_t v873 = vaddq_s16(v867, v872); + int16x8_t v874 = vsubq_s16(v658, v661); + int16x8_t v875 = vsubq_s16(v666, v670); + int16x8_t v876_tmp = vqrdmulhq_n_s16(v875, 10045); + int16x8_t v876 = vaddq_s16(v876_tmp, v875); + int16x8_t v877 = vaddq_s16(v874, v876); + int16x8_t v878 = vsubq_s16(v675, v678); + int16x8_t v879 = vsubq_s16(v681, v688); + int16x8_t v880_tmp = vqrdmulhq_n_s16(v879, 10045); + int16x8_t v880 = vaddq_s16(v880_tmp, v879); + int16x8_t v881 = vaddq_s16(v878, v880); + int16x8_t v882 = vqrdmulhq_n_s16(v881, 19705); + int16x8_t v883 = vaddq_s16(v877, v882); + int16x8_t v884 = vqrdmulhq_n_s16(v883, 17121); + int16x8_t v885 = vaddq_s16(v873, v884); + int16x8_t v886 = vqrdmulhq_n_s16(v885, 16563); + int16x8_t v887 = vaddq_s16(v863, v886); + int16x8_t v888 = vqrdmulhq_n_s16(v887, 16429); + int16x8_t v889 = vaddq_s16(v841, v888); + int16x8_t v890 = vqrdmulhq_n_s16(v889, 16395); + int16x8_t v891 = vaddq_s16(v795, v890); + int16x8_t v892 = vsubq_s16(v702, v704); + int16x8_t v893 = vsubq_s16(v706, v708); + int16x8_t v894 = vqrdmulhq_n_s16(v893, 29490); + int16x8_t v895 = vaddq_s16(v892, v894); + int16x8_t v896 = vsubq_s16(v712, v714); + int16x8_t v897 = vsubq_s16(v716, v718); + int16x8_t v898 = vqrdmulhq_n_s16(v897, 29490); + int16x8_t v899 = vaddq_s16(v896, v898); + int16x8_t v900 = vqrdmulhq_n_s16(v899, 18578); + int16x8_t v901 = vaddq_s16(v895, v900); + int16x8_t v902 = vsubq_s16(v724, v726); + int16x8_t v903 = vsubq_s16(v728, v730); + int16x8_t v904 = vqrdmulhq_n_s16(v903, 29490); + int16x8_t v905 = vaddq_s16(v902, v904); + int16x8_t v906 = vsubq_s16(v734, v736); + int16x8_t v907 = vsubq_s16(v738, v740); + int16x8_t v908 = vqrdmulhq_n_s16(v907, 29490); + int16x8_t v909 = vaddq_s16(v906, v908); + int16x8_t v910 = vqrdmulhq_n_s16(v909, 18578); + int16x8_t v911 = vaddq_s16(v905, v910); + int16x8_t v912 = vqrdmulhq_n_s16(v911, 16890); + int16x8_t v913 = vaddq_s16(v901, v912); + int16x8_t v914 = vsubq_s16(v748, v750); + int16x8_t v915_tmp = vqrdmulhq_n_s16(v754, 10045); + int16x8_t v915 = vaddq_s16(v915_tmp, v754); + int16x8_t v916 = vsubq_s16(v752, v915); + int16x8_t v917 = vqrdmulhq_n_s16(v916, 29490); + int16x8_t v918 = vaddq_s16(v914, v917); + int16x8_t v919 = vsubq_s16(v758, v760); + int16x8_t v920 = vsubq_s16(v762, v764); + int16x8_t v921 = vqrdmulhq_n_s16(v920, 29490); + int16x8_t v922 = vaddq_s16(v919, v921); + int16x8_t v923 = vqrdmulhq_n_s16(v922, 18578); + int16x8_t v924 = vaddq_s16(v918, v923); + int16x8_t v925 = vsubq_s16(v770, v772); + int16x8_t v926 = vsubq_s16(v774, v776); + int16x8_t v927 = vqrdmulhq_n_s16(v926, 29490); + int16x8_t v928 = vaddq_s16(v925, v927); + int16x8_t v929 = vsubq_s16(v780, v782); + int16x8_t v930 = vsubq_s16(v784, v786); + int16x8_t v931 = vqrdmulhq_n_s16(v930, 29490); + int16x8_t v932 = vaddq_s16(v929, v931); + int16x8_t v933 = vqrdmulhq_n_s16(v932, 18578); + int16x8_t v934 = vaddq_s16(v928, v933); + int16x8_t v935 = vqrdmulhq_n_s16(v934, 16890); + int16x8_t v936 = vaddq_s16(v924, v935); + int16x8_t v937 = vqrdmulhq_n_s16(v936, 16508); + int16x8_t v938 = vaddq_s16(v913, v937); + int16x8_t v939 = vsubq_s16(v796, v798); + int16x8_t v940 = vsubq_s16(v800, v802); + int16x8_t v941 = vqrdmulhq_n_s16(v940, 29490); + int16x8_t v942 = vaddq_s16(v939, v941); + int16x8_t v943 = vsubq_s16(v806, v808); + int16x8_t v944 = vsubq_s16(v810, v812); + int16x8_t v945 = vqrdmulhq_n_s16(v944, 29490); + int16x8_t v946 = vaddq_s16(v943, v945); + int16x8_t v947 = vqrdmulhq_n_s16(v946, 18578); + int16x8_t v948 = vaddq_s16(v942, v947); + int16x8_t v949 = vsubq_s16(v818, v820); + int16x8_t v950 = vsubq_s16(v822, v824); + int16x8_t v951 = vqrdmulhq_n_s16(v950, 29490); + int16x8_t v952 = vaddq_s16(v949, v951); + int16x8_t v953 = vsubq_s16(v828, v830); + int16x8_t v954 = vsubq_s16(v832, v834); + int16x8_t v955 = vqrdmulhq_n_s16(v954, 29490); + int16x8_t v956 = vaddq_s16(v953, v955); + int16x8_t v957 = vqrdmulhq_n_s16(v956, 18578); + int16x8_t v958 = vaddq_s16(v952, v957); + int16x8_t v959 = vqrdmulhq_n_s16(v958, 16890); + int16x8_t v960 = vaddq_s16(v948, v959); + int16x8_t v961 = vsubq_s16(v842, v844); + int16x8_t v962 = vsubq_s16(v846, v848); + int16x8_t v963 = vqrdmulhq_n_s16(v962, 29490); + int16x8_t v964 = vaddq_s16(v961, v963); + int16x8_t v965 = vsubq_s16(v852, v854); + int16x8_t v966 = vsubq_s16(v856, v858); + int16x8_t v967 = vqrdmulhq_n_s16(v966, 29490); + int16x8_t v968 = vaddq_s16(v965, v967); + int16x8_t v969 = vqrdmulhq_n_s16(v968, 18578); + int16x8_t v970 = vaddq_s16(v964, v969); + int16x8_t v971 = vsubq_s16(v864, v866); + int16x8_t v972 = vsubq_s16(v868, v870); + int16x8_t v973 = vqrdmulhq_n_s16(v972, 29490); + int16x8_t v974 = vaddq_s16(v971, v973); + int16x8_t v975 = vsubq_s16(v874, v876); + int16x8_t v976 = vsubq_s16(v878, v880); + int16x8_t v977 = vqrdmulhq_n_s16(v976, 29490); + int16x8_t v978 = vaddq_s16(v975, v977); + int16x8_t v979 = vqrdmulhq_n_s16(v978, 18578); + int16x8_t v980 = vaddq_s16(v974, v979); + int16x8_t v981 = vqrdmulhq_n_s16(v980, 16890); + int16x8_t v982 = vaddq_s16(v970, v981); + int16x8_t v983 = vqrdmulhq_n_s16(v982, 16508); + int16x8_t v984 = vaddq_s16(v960, v983); + int16x8_t v985 = vqrdmulhq_n_s16(v984, 16415); + int16x8_t v986 = vaddq_s16(v938, v985); + int16x8_t v987 = vsubq_s16(v2, v8); + int16x8_t v988 = vsubq_s16(v15, v22); + int16x8_t v989_tmp = vqrdmulhq_n_s16(v988, 18446); + int16x8_t v989 = vmlaq_n_s16(v989_tmp, v988, 2); + int16x8_t v990 = vaddq_s16(v987, v989); + int16x8_t v991 = vsubq_s16(v31, v41); + int16x8_t v992 = vsubq_s16(v48, v56); + int16x8_t v993_tmp = vqrdmulhq_n_s16(v992, 18446); + int16x8_t v993 = vmlaq_n_s16(v993_tmp, v992, 2); + int16x8_t v994 = vaddq_s16(v991, v993); + int16x8_t v995 = vqrdmulhq_n_s16(v994, 21195); + int16x8_t v996 = vaddq_s16(v990, v995); + int16x8_t v997 = vsubq_s16(v67, v77); + int16x8_t v998 = vsubq_s16(v90, v99); + int16x8_t v999_tmp = vqrdmulhq_n_s16(v998, 18446); + int16x8_t v999 = vmlaq_n_s16(v999_tmp, v998, 2); + int16x8_t v1000 = vaddq_s16(v997, v999); + int16x8_t v1001 = vsubq_s16(v108, v118); + int16x8_t v1002 = vsubq_s16(v125, v134); + int16x8_t v1003_tmp = vqrdmulhq_n_s16(v1002, 18446); + int16x8_t v1003 = vmlaq_n_s16(v1003_tmp, v1002, 2); + int16x8_t v1004 = vaddq_s16(v1001, v1003); + int16x8_t v1005 = vqrdmulhq_n_s16(v1004, 21195); + int16x8_t v1006 = vaddq_s16(v1000, v1005); + int16x8_t v1007 = vqrdmulhq_n_s16(v1006, 17401); + int16x8_t v1008 = vaddq_s16(v996, v1007); + int16x8_t v1009 = vsubq_s16(v147, v157); + int16x8_t v1010 = vsubq_s16(v170, v179); + int16x8_t v1011_tmp = vqrdmulhq_n_s16(v1010, 18446); + int16x8_t v1011 = vmlaq_n_s16(v1011_tmp, v1010, 2); + int16x8_t v1012 = vaddq_s16(v1009, v1011); + int16x8_t v1013 = vsubq_s16(v194, v212); + int16x8_t v1014 = vsubq_s16(v219, v229); + int16x8_t v1015_tmp = vqrdmulhq_n_s16(v1014, 18446); + int16x8_t v1015 = vmlaq_n_s16(v1015_tmp, v1014, 2); + int16x8_t v1016 = vaddq_s16(v1013, v1015); + int16x8_t v1017 = vqrdmulhq_n_s16(v1016, 21195); + int16x8_t v1018 = vaddq_s16(v1012, v1017); + int16x8_t v1019 = vsubq_s16(v240, v250); + int16x8_t v1020 = vsubq_s16(v263, v272); + int16x8_t v1021_tmp = vqrdmulhq_n_s16(v1020, 18446); + int16x8_t v1021 = vmlaq_n_s16(v1021_tmp, v1020, 2); + int16x8_t v1022 = vaddq_s16(v1019, v1021); + int16x8_t v1023 = vsubq_s16(v281, v291); + int16x8_t v1024 = vsubq_s16(v298, v308); + int16x8_t v1025_tmp = vqrdmulhq_n_s16(v1024, 18446); + int16x8_t v1025 = vmlaq_n_s16(v1025_tmp, v1024, 2); + int16x8_t v1026 = vaddq_s16(v1023, v1025); + int16x8_t v1027 = vqrdmulhq_n_s16(v1026, 21195); + int16x8_t v1028 = vaddq_s16(v1022, v1027); + int16x8_t v1029 = vqrdmulhq_n_s16(v1028, 17401); + int16x8_t v1030 = vaddq_s16(v1018, v1029); + int16x8_t v1031 = vqrdmulhq_n_s16(v1030, 16629); + int16x8_t v1032 = vaddq_s16(v1008, v1031); + int16x8_t v1033 = vsubq_s16(v323, v333); + int16x8_t v1034 = vsubq_s16(v346, v355); + int16x8_t v1035_tmp = vqrdmulhq_n_s16(v1034, 18446); + int16x8_t v1035 = vmlaq_n_s16(v1035_tmp, v1034, 2); + int16x8_t v1036 = vaddq_s16(v1033, v1035); + int16x8_t v1037 = vsubq_s16(v370, v388); + int16x8_t v1038 = vsubq_s16(v395, v405); + int16x8_t v1039_tmp = vqrdmulhq_n_s16(v1038, 18446); + int16x8_t v1039 = vmlaq_n_s16(v1039_tmp, v1038, 2); + int16x8_t v1040 = vaddq_s16(v1037, v1039); + int16x8_t v1041 = vqrdmulhq_n_s16(v1040, 21195); + int16x8_t v1042 = vaddq_s16(v1036, v1041); + int16x8_t v1043 = vsubq_s16(v422, v440); + int16x8_t v1044 = vsubq_s16(v465, v478); + int16x8_t v1045_tmp = vqrdmulhq_n_s16(v1044, 18446); + int16x8_t v1045 = vmlaq_n_s16(v1045_tmp, v1044, 2); + int16x8_t v1046 = vaddq_s16(v1043, v1045); + int16x8_t v1047 = vsubq_s16(v487, v497); + int16x8_t v1048 = vsubq_s16(v504, v515); + int16x8_t v1049_tmp = vqrdmulhq_n_s16(v1048, 18446); + int16x8_t v1049 = vmlaq_n_s16(v1049_tmp, v1048, 2); + int16x8_t v1050 = vaddq_s16(v1047, v1049); + int16x8_t v1051 = vqrdmulhq_n_s16(v1050, 21195); + int16x8_t v1052 = vaddq_s16(v1046, v1051); + int16x8_t v1053 = vqrdmulhq_n_s16(v1052, 17401); + int16x8_t v1054 = vaddq_s16(v1042, v1053); + int16x8_t v1055 = vsubq_s16(v528, v538); + int16x8_t v1056 = vsubq_s16(v551, v560); + int16x8_t v1057_tmp = vqrdmulhq_n_s16(v1056, 18446); + int16x8_t v1057 = vmlaq_n_s16(v1057_tmp, v1056, 2); + int16x8_t v1058 = vaddq_s16(v1055, v1057); + int16x8_t v1059 = vsubq_s16(v575, v593); + int16x8_t v1060 = vsubq_s16(v600, v610); + int16x8_t v1061_tmp = vqrdmulhq_n_s16(v1060, 18446); + int16x8_t v1061 = vmlaq_n_s16(v1061_tmp, v1060, 2); + int16x8_t v1062 = vaddq_s16(v1059, v1061); + int16x8_t v1063 = vqrdmulhq_n_s16(v1062, 21195); + int16x8_t v1064 = vaddq_s16(v1058, v1063); + int16x8_t v1065 = vsubq_s16(v621, v631); + int16x8_t v1066 = vsubq_s16(v644, v653); + int16x8_t v1067_tmp = vqrdmulhq_n_s16(v1066, 18446); + int16x8_t v1067 = vmlaq_n_s16(v1067_tmp, v1066, 2); + int16x8_t v1068 = vaddq_s16(v1065, v1067); + int16x8_t v1069 = vsubq_s16(v662, v672); + int16x8_t v1070 = vsubq_s16(v679, v690); + int16x8_t v1071_tmp = vqrdmulhq_n_s16(v1070, 18446); + int16x8_t v1071 = vmlaq_n_s16(v1071_tmp, v1070, 2); + int16x8_t v1072 = vaddq_s16(v1069, v1071); + int16x8_t v1073 = vqrdmulhq_n_s16(v1072, 21195); + int16x8_t v1074 = vaddq_s16(v1068, v1073); + int16x8_t v1075 = vqrdmulhq_n_s16(v1074, 17401); + int16x8_t v1076 = vaddq_s16(v1064, v1075); + int16x8_t v1077 = vqrdmulhq_n_s16(v1076, 16629); + int16x8_t v1078 = vaddq_s16(v1054, v1077); + int16x8_t v1079 = vqrdmulhq_n_s16(v1078, 16445); + int16x8_t v1080 = vaddq_s16(v1032, v1079); + int16x8_t v1081 = vsubq_s16(v987, v989); + int16x8_t v1082 = vsubq_s16(v991, v993); + int16x8_t v1083 = vqrdmulhq_n_s16(v1082, 25826); + int16x8_t v1084 = vaddq_s16(v1081, v1083); + int16x8_t v1085 = vsubq_s16(v997, v999); + int16x8_t v1086 = vsubq_s16(v1001, v1003); + int16x8_t v1087 = vqrdmulhq_n_s16(v1086, 25826); + int16x8_t v1088 = vaddq_s16(v1085, v1087); + int16x8_t v1089 = vqrdmulhq_n_s16(v1088, 18124); + int16x8_t v1090 = vaddq_s16(v1084, v1089); + int16x8_t v1091 = vsubq_s16(v1009, v1011); + int16x8_t v1092 = vsubq_s16(v1013, v1015); + int16x8_t v1093 = vqrdmulhq_n_s16(v1092, 25826); + int16x8_t v1094 = vaddq_s16(v1091, v1093); + int16x8_t v1095 = vsubq_s16(v1019, v1021); + int16x8_t v1096 = vsubq_s16(v1023, v1025); + int16x8_t v1097 = vqrdmulhq_n_s16(v1096, 25826); + int16x8_t v1098 = vaddq_s16(v1095, v1097); + int16x8_t v1099 = vqrdmulhq_n_s16(v1098, 18124); + int16x8_t v1100 = vaddq_s16(v1094, v1099); + int16x8_t v1101 = vqrdmulhq_n_s16(v1100, 16792); + int16x8_t v1102 = vaddq_s16(v1090, v1101); + int16x8_t v1103 = vsubq_s16(v1033, v1035); + int16x8_t v1104 = vsubq_s16(v1037, v1039); + int16x8_t v1105 = vqrdmulhq_n_s16(v1104, 25826); + int16x8_t v1106 = vaddq_s16(v1103, v1105); + int16x8_t v1107 = vsubq_s16(v1043, v1045); + int16x8_t v1108 = vsubq_s16(v1047, v1049); + int16x8_t v1109 = vqrdmulhq_n_s16(v1108, 25826); + int16x8_t v1110 = vaddq_s16(v1107, v1109); + int16x8_t v1111 = vqrdmulhq_n_s16(v1110, 18124); + int16x8_t v1112 = vaddq_s16(v1106, v1111); + int16x8_t v1113 = vsubq_s16(v1055, v1057); + int16x8_t v1114 = vsubq_s16(v1059, v1061); + int16x8_t v1115 = vqrdmulhq_n_s16(v1114, 25826); + int16x8_t v1116 = vaddq_s16(v1113, v1115); + int16x8_t v1117 = vsubq_s16(v1065, v1067); + int16x8_t v1118 = vsubq_s16(v1069, v1071); + int16x8_t v1119 = vqrdmulhq_n_s16(v1118, 25826); + int16x8_t v1120 = vaddq_s16(v1117, v1119); + int16x8_t v1121 = vqrdmulhq_n_s16(v1120, 18124); + int16x8_t v1122 = vaddq_s16(v1116, v1121); + int16x8_t v1123 = vqrdmulhq_n_s16(v1122, 16792); + int16x8_t v1124 = vaddq_s16(v1112, v1123); + int16x8_t v1125 = vqrdmulhq_n_s16(v1124, 16484); + int16x8_t v1126 = vaddq_s16(v1102, v1125); + int16x8_t v1127 = vsubq_s16(v892, v894); + int16x8_t v1128 = vsubq_s16(v896, v898); + int16x8_t v1129_tmp = vqrdmulhq_n_s16(v1128, 1988); + int16x8_t v1129 = vaddq_s16(v1129_tmp, v1128); + int16x8_t v1130 = vaddq_s16(v1127, v1129); + int16x8_t v1131 = vsubq_s16(v902, v904); + int16x8_t v1132 = vsubq_s16(v906, v908); + int16x8_t v1133_tmp = vqrdmulhq_n_s16(v1132, 1988); + int16x8_t v1133 = vaddq_s16(v1133_tmp, v1132); + int16x8_t v1134 = vaddq_s16(v1131, v1133); + int16x8_t v1135 = vqrdmulhq_n_s16(v1134, 19102); + int16x8_t v1136 = vaddq_s16(v1130, v1135); + int16x8_t v1137 = vsubq_s16(v914, v917); + int16x8_t v1138 = vsubq_s16(v919, v921); + int16x8_t v1139_tmp = vqrdmulhq_n_s16(v1138, 1988); + int16x8_t v1139 = vaddq_s16(v1139_tmp, v1138); + int16x8_t v1140 = vaddq_s16(v1137, v1139); + int16x8_t v1141 = vsubq_s16(v925, v927); + int16x8_t v1142 = vsubq_s16(v929, v931); + int16x8_t v1143_tmp = vqrdmulhq_n_s16(v1142, 1988); + int16x8_t v1143 = vaddq_s16(v1143_tmp, v1142); + int16x8_t v1144 = vaddq_s16(v1141, v1143); + int16x8_t v1145 = vqrdmulhq_n_s16(v1144, 19102); + int16x8_t v1146 = vaddq_s16(v1140, v1145); + int16x8_t v1147 = vqrdmulhq_n_s16(v1146, 17000); + int16x8_t v1148 = vaddq_s16(v1136, v1147); + int16x8_t v1149 = vsubq_s16(v939, v941); + int16x8_t v1150 = vsubq_s16(v943, v945); + int16x8_t v1151_tmp = vqrdmulhq_n_s16(v1150, 1988); + int16x8_t v1151 = vaddq_s16(v1151_tmp, v1150); + int16x8_t v1152 = vaddq_s16(v1149, v1151); + int16x8_t v1153 = vsubq_s16(v949, v951); + int16x8_t v1154 = vsubq_s16(v953, v955); + int16x8_t v1155_tmp = vqrdmulhq_n_s16(v1154, 1988); + int16x8_t v1155 = vaddq_s16(v1155_tmp, v1154); + int16x8_t v1156 = vaddq_s16(v1153, v1155); + int16x8_t v1157 = vqrdmulhq_n_s16(v1156, 19102); + int16x8_t v1158 = vaddq_s16(v1152, v1157); + int16x8_t v1159 = vsubq_s16(v961, v963); + int16x8_t v1160 = vsubq_s16(v965, v967); + int16x8_t v1161_tmp = vqrdmulhq_n_s16(v1160, 1988); + int16x8_t v1161 = vaddq_s16(v1161_tmp, v1160); + int16x8_t v1162 = vaddq_s16(v1159, v1161); + int16x8_t v1163 = vsubq_s16(v971, v973); + int16x8_t v1164 = vsubq_s16(v975, v977); + int16x8_t v1165_tmp = vqrdmulhq_n_s16(v1164, 1988); + int16x8_t v1165 = vaddq_s16(v1165_tmp, v1164); + int16x8_t v1166 = vaddq_s16(v1163, v1165); + int16x8_t v1167 = vqrdmulhq_n_s16(v1166, 19102); + int16x8_t v1168 = vaddq_s16(v1162, v1167); + int16x8_t v1169 = vqrdmulhq_n_s16(v1168, 17000); + int16x8_t v1170 = vaddq_s16(v1158, v1169); + int16x8_t v1171 = vqrdmulhq_n_s16(v1170, 16534); + int16x8_t v1172 = vaddq_s16(v1148, v1171); + int16x8_t v1173 = vsubq_s16(v705, v710); + int16x8_t v1174 = vsubq_s16(v715, v720); + int16x8_t v1175_tmp = vqrdmulhq_n_s16(v1174, 23673); + int16x8_t v1175 = vaddq_s16(v1175_tmp, v1174); + int16x8_t v1176 = vaddq_s16(v1173, v1175); + int16x8_t v1177 = vsubq_s16(v727, v732); + int16x8_t v1178 = vsubq_s16(v737, v742); + int16x8_t v1179_tmp = vqrdmulhq_n_s16(v1178, 23673); + int16x8_t v1179 = vaddq_s16(v1179_tmp, v1178); + int16x8_t v1180 = vaddq_s16(v1177, v1179); + int16x8_t v1181 = vqrdmulhq_n_s16(v1180, 20398); + int16x8_t v1182 = vaddq_s16(v1176, v1181); + int16x8_t v1183 = vsubq_s16(v751, v756); + int16x8_t v1184 = vsubq_s16(v761, v766); + int16x8_t v1185_tmp = vqrdmulhq_n_s16(v1184, 23673); + int16x8_t v1185 = vaddq_s16(v1185_tmp, v1184); + int16x8_t v1186 = vaddq_s16(v1183, v1185); + int16x8_t v1187 = vsubq_s16(v773, v778); + int16x8_t v1188 = vsubq_s16(v783, v788); + int16x8_t v1189_tmp = vqrdmulhq_n_s16(v1188, 23673); + int16x8_t v1189 = vaddq_s16(v1189_tmp, v1188); + int16x8_t v1190 = vaddq_s16(v1187, v1189); + int16x8_t v1191 = vqrdmulhq_n_s16(v1190, 20398); + int16x8_t v1192 = vaddq_s16(v1186, v1191); + int16x8_t v1193 = vqrdmulhq_n_s16(v1192, 17255); + int16x8_t v1194 = vaddq_s16(v1182, v1193); + int16x8_t v1195 = vsubq_s16(v799, v804); + int16x8_t v1196 = vsubq_s16(v809, v814); + int16x8_t v1197_tmp = vqrdmulhq_n_s16(v1196, 23673); + int16x8_t v1197 = vaddq_s16(v1197_tmp, v1196); + int16x8_t v1198 = vaddq_s16(v1195, v1197); + int16x8_t v1199 = vsubq_s16(v821, v826); + int16x8_t v1200 = vsubq_s16(v831, v836); + int16x8_t v1201_tmp = vqrdmulhq_n_s16(v1200, 23673); + int16x8_t v1201 = vaddq_s16(v1201_tmp, v1200); + int16x8_t v1202 = vaddq_s16(v1199, v1201); + int16x8_t v1203 = vqrdmulhq_n_s16(v1202, 20398); + int16x8_t v1204 = vaddq_s16(v1198, v1203); + int16x8_t v1205 = vsubq_s16(v845, v850); + int16x8_t v1206 = vsubq_s16(v855, v860); + int16x8_t v1207_tmp = vqrdmulhq_n_s16(v1206, 23673); + int16x8_t v1207 = vaddq_s16(v1207_tmp, v1206); + int16x8_t v1208 = vaddq_s16(v1205, v1207); + int16x8_t v1209 = vsubq_s16(v867, v872); + int16x8_t v1210 = vsubq_s16(v877, v882); + int16x8_t v1211_tmp = vqrdmulhq_n_s16(v1210, 23673); + int16x8_t v1211 = vaddq_s16(v1211_tmp, v1210); + int16x8_t v1212 = vaddq_s16(v1209, v1211); + int16x8_t v1213 = vqrdmulhq_n_s16(v1212, 20398); + int16x8_t v1214 = vaddq_s16(v1208, v1213); + int16x8_t v1215 = vqrdmulhq_n_s16(v1214, 17255); + int16x8_t v1216 = vaddq_s16(v1204, v1215); + int16x8_t v1217 = vqrdmulhq_n_s16(v1216, 16595); + int16x8_t v1218 = vaddq_s16(v1194, v1217); + int16x8_t v1219 = vsubq_s16(v9, v24); + int16x8_t v1220 = vsubq_s16(v42, v58); + int16x8_t v1221_tmp = vqrdmulhq_n_s16(v1220, 3314); + int16x8_t v1221 = vmlaq_n_s16(v1221_tmp, v1220, 5); + int16x8_t v1222 = vaddq_s16(v1219, v1221); + int16x8_t v1223 = vsubq_s16(v78, v101); + int16x8_t v1224 = vsubq_s16(v119, v136); + int16x8_t v1225_tmp = vqrdmulhq_n_s16(v1224, 3314); + int16x8_t v1225 = vmlaq_n_s16(v1225_tmp, v1224, 5); + int16x8_t v1226 = vaddq_s16(v1223, v1225); + int16x8_t v1227 = vqrdmulhq_n_s16(v1226, 22112); + int16x8_t v1228 = vaddq_s16(v1222, v1227); + int16x8_t v1229 = vsubq_s16(v158, v181); + int16x8_t v1230 = vsubq_s16(v213, v231); + int16x8_t v1231_tmp = vqrdmulhq_n_s16(v1230, 3314); + int16x8_t v1231 = vmlaq_n_s16(v1231_tmp, v1230, 5); + int16x8_t v1232 = vaddq_s16(v1229, v1231); + int16x8_t v1233 = vsubq_s16(v251, v274); + int16x8_t v1234 = vsubq_s16(v292, v310); + int16x8_t v1235_tmp = vqrdmulhq_n_s16(v1234, 3314); + int16x8_t v1235 = vmlaq_n_s16(v1235_tmp, v1234, 5); + int16x8_t v1236 = vaddq_s16(v1233, v1235); + int16x8_t v1237 = vqrdmulhq_n_s16(v1236, 22112); + int16x8_t v1238 = vaddq_s16(v1232, v1237); + int16x8_t v1239 = vqrdmulhq_n_s16(v1238, 17561); + int16x8_t v1240 = vaddq_s16(v1228, v1239); + int16x8_t v1241 = vsubq_s16(v334, v357); + int16x8_t v1242 = vsubq_s16(v389, v407); + int16x8_t v1243_tmp = vqrdmulhq_n_s16(v1242, 3314); + int16x8_t v1243 = vmlaq_n_s16(v1243_tmp, v1242, 5); + int16x8_t v1244 = vaddq_s16(v1241, v1243); + int16x8_t v1245 = vsubq_s16(v441, v480); + int16x8_t v1246 = vsubq_s16(v498, v517); + int16x8_t v1247_tmp = vqrdmulhq_n_s16(v1246, 3314); + int16x8_t v1247 = vmlaq_n_s16(v1247_tmp, v1246, 5); + int16x8_t v1248 = vaddq_s16(v1245, v1247); + int16x8_t v1249 = vqrdmulhq_n_s16(v1248, 22112); + int16x8_t v1250 = vaddq_s16(v1244, v1249); + int16x8_t v1251 = vsubq_s16(v539, v562); + int16x8_t v1252 = vsubq_s16(v594, v612); + int16x8_t v1253_tmp = vqrdmulhq_n_s16(v1252, 3314); + int16x8_t v1253 = vmlaq_n_s16(v1253_tmp, v1252, 5); + int16x8_t v1254 = vaddq_s16(v1251, v1253); + int16x8_t v1255 = vsubq_s16(v632, v655); + int16x8_t v1256 = vsubq_s16(v673, v692); + int16x8_t v1257_tmp = vqrdmulhq_n_s16(v1256, 3314); + int16x8_t v1257 = vmlaq_n_s16(v1257_tmp, v1256, 5); + int16x8_t v1258 = vaddq_s16(v1255, v1257); + int16x8_t v1259 = vqrdmulhq_n_s16(v1258, 22112); + int16x8_t v1260 = vaddq_s16(v1254, v1259); + int16x8_t v1261 = vqrdmulhq_n_s16(v1260, 17561); + int16x8_t v1262 = vaddq_s16(v1250, v1261); + int16x8_t v1263 = vqrdmulhq_n_s16(v1262, 16666); + int16x8_t v1264 = vaddq_s16(v1240, v1263); + int16x8_t v1265 = vsubq_s16(v1219, v1221); + int16x8_t v1266 = vsubq_s16(v1223, v1225); + int16x8_t v1267 = vqrdmulhq_n_s16(v1266, 24397); + int16x8_t v1268 = vaddq_s16(v1265, v1267); + int16x8_t v1269 = vsubq_s16(v1229, v1231); + int16x8_t v1270 = vsubq_s16(v1233, v1235); + int16x8_t v1271 = vqrdmulhq_n_s16(v1270, 24397); + int16x8_t v1272 = vaddq_s16(v1269, v1271); + int16x8_t v1273 = vqrdmulhq_n_s16(v1272, 17921); + int16x8_t v1274 = vaddq_s16(v1268, v1273); + int16x8_t v1275 = vsubq_s16(v1241, v1243); + int16x8_t v1276 = vsubq_s16(v1245, v1247); + int16x8_t v1277 = vqrdmulhq_n_s16(v1276, 24397); + int16x8_t v1278 = vaddq_s16(v1275, v1277); + int16x8_t v1279 = vsubq_s16(v1251, v1253); + int16x8_t v1280 = vsubq_s16(v1255, v1257); + int16x8_t v1281 = vqrdmulhq_n_s16(v1280, 24397); + int16x8_t v1282 = vaddq_s16(v1279, v1281); + int16x8_t v1283 = vqrdmulhq_n_s16(v1282, 17921); + int16x8_t v1284 = vaddq_s16(v1278, v1283); + int16x8_t v1285 = vqrdmulhq_n_s16(v1284, 16747); + int16x8_t v1286 = vaddq_s16(v1274, v1285); + int16x8_t v1287 = vsubq_s16(v1173, v1175); + int16x8_t v1288 = vsubq_s16(v1177, v1179); + int16x8_t v1289 = vqrdmulhq_n_s16(v1288, 27504); + int16x8_t v1290 = vaddq_s16(v1287, v1289); + int16x8_t v1291 = vsubq_s16(v1183, v1185); + int16x8_t v1292 = vsubq_s16(v1187, v1189); + int16x8_t v1293 = vqrdmulhq_n_s16(v1292, 27504); + int16x8_t v1294 = vaddq_s16(v1291, v1293); + int16x8_t v1295 = vqrdmulhq_n_s16(v1294, 18343); + int16x8_t v1296 = vaddq_s16(v1290, v1295); + int16x8_t v1297 = vsubq_s16(v1195, v1197); + int16x8_t v1298 = vsubq_s16(v1199, v1201); + int16x8_t v1299 = vqrdmulhq_n_s16(v1298, 27504); + int16x8_t v1300 = vaddq_s16(v1297, v1299); + int16x8_t v1301 = vsubq_s16(v1205, v1207); + int16x8_t v1302 = vsubq_s16(v1209, v1211); + int16x8_t v1303 = vqrdmulhq_n_s16(v1302, 27504); + int16x8_t v1304 = vaddq_s16(v1301, v1303); + int16x8_t v1305 = vqrdmulhq_n_s16(v1304, 18343); + int16x8_t v1306 = vaddq_s16(v1300, v1305); + int16x8_t v1307 = vqrdmulhq_n_s16(v1306, 16840); + int16x8_t v1308 = vaddq_s16(v1296, v1307); + int16x8_t v1309 = vsubq_s16(v1127, v1129); + int16x8_t v1310 = vsubq_s16(v1131, v1133); + int16x8_t v1311 = vqrdmulhq_n_s16(v1310, 31869); + int16x8_t v1312 = vaddq_s16(v1309, v1311); + int16x8_t v1313 = vsubq_s16(v1137, v1139); + int16x8_t v1314 = vsubq_s16(v1141, v1143); + int16x8_t v1315 = vqrdmulhq_n_s16(v1314, 31869); + int16x8_t v1316 = vaddq_s16(v1313, v1315); + int16x8_t v1317 = vqrdmulhq_n_s16(v1316, 18830); + int16x8_t v1318 = vaddq_s16(v1312, v1317); + int16x8_t v1319 = vsubq_s16(v1149, v1151); + int16x8_t v1320 = vsubq_s16(v1153, v1155); + int16x8_t v1321 = vqrdmulhq_n_s16(v1320, 31869); + int16x8_t v1322 = vaddq_s16(v1319, v1321); + int16x8_t v1323 = vsubq_s16(v1159, v1161); + int16x8_t v1324 = vsubq_s16(v1163, v1165); + int16x8_t v1325 = vqrdmulhq_n_s16(v1324, 31869); + int16x8_t v1326 = vaddq_s16(v1323, v1325); + int16x8_t v1327 = vqrdmulhq_n_s16(v1326, 18830); + int16x8_t v1328 = vaddq_s16(v1322, v1327); + int16x8_t v1329 = vqrdmulhq_n_s16(v1328, 16944); + int16x8_t v1330 = vaddq_s16(v1318, v1329); + int16x8_t v1331 = vsubq_s16(v1081, v1083); + int16x8_t v1332 = vsubq_s16(v1085, v1087); + int16x8_t v1333_tmp = vqrdmulhq_n_s16(v1332, 5552); + int16x8_t v1333 = vaddq_s16(v1333_tmp, v1332); + int16x8_t v1334 = vaddq_s16(v1331, v1333); + int16x8_t v1335 = vsubq_s16(v1091, v1093); + int16x8_t v1336 = vsubq_s16(v1095, v1097); + int16x8_t v1337_tmp = vqrdmulhq_n_s16(v1336, 5552); + int16x8_t v1337 = vaddq_s16(v1337_tmp, v1336); + int16x8_t v1338 = vaddq_s16(v1335, v1337); + int16x8_t v1339 = vqrdmulhq_n_s16(v1338, 19393); + int16x8_t v1340 = vaddq_s16(v1334, v1339); + int16x8_t v1341 = vsubq_s16(v1103, v1105); + int16x8_t v1342 = vsubq_s16(v1107, v1109); + int16x8_t v1343_tmp = vqrdmulhq_n_s16(v1342, 5552); + int16x8_t v1343 = vaddq_s16(v1343_tmp, v1342); + int16x8_t v1344 = vaddq_s16(v1341, v1343); + int16x8_t v1345 = vsubq_s16(v1113, v1115); + int16x8_t v1346 = vsubq_s16(v1117, v1119); + int16x8_t v1347_tmp = vqrdmulhq_n_s16(v1346, 5552); + int16x8_t v1347 = vaddq_s16(v1347_tmp, v1346); + int16x8_t v1348 = vaddq_s16(v1345, v1347); + int16x8_t v1349 = vqrdmulhq_n_s16(v1348, 19393); + int16x8_t v1350 = vaddq_s16(v1344, v1349); + int16x8_t v1351 = vqrdmulhq_n_s16(v1350, 17059); + int16x8_t v1352 = vaddq_s16(v1340, v1351); + int16x8_t v1353 = vsubq_s16(v990, v995); + int16x8_t v1354 = vsubq_s16(v1000, v1005); + int16x8_t v1355_tmp = vqrdmulhq_n_s16(v1354, 15865); + int16x8_t v1355 = vaddq_s16(v1355_tmp, v1354); + int16x8_t v1356 = vaddq_s16(v1353, v1355); + int16x8_t v1357 = vsubq_s16(v1012, v1017); + int16x8_t v1358 = vsubq_s16(v1022, v1027); + int16x8_t v1359_tmp = vqrdmulhq_n_s16(v1358, 15865); + int16x8_t v1359 = vaddq_s16(v1359_tmp, v1358); + int16x8_t v1360 = vaddq_s16(v1357, v1359); + int16x8_t v1361 = vqrdmulhq_n_s16(v1360, 20040); + int16x8_t v1362 = vaddq_s16(v1356, v1361); + int16x8_t v1363 = vsubq_s16(v1036, v1041); + int16x8_t v1364 = vsubq_s16(v1046, v1051); + int16x8_t v1365_tmp = vqrdmulhq_n_s16(v1364, 15865); + int16x8_t v1365 = vaddq_s16(v1365_tmp, v1364); + int16x8_t v1366 = vaddq_s16(v1363, v1365); + int16x8_t v1367 = vsubq_s16(v1058, v1063); + int16x8_t v1368 = vsubq_s16(v1068, v1073); + int16x8_t v1369_tmp = vqrdmulhq_n_s16(v1368, 15865); + int16x8_t v1369 = vaddq_s16(v1369_tmp, v1368); + int16x8_t v1370 = vaddq_s16(v1367, v1369); + int16x8_t v1371 = vqrdmulhq_n_s16(v1370, 20040); + int16x8_t v1372 = vaddq_s16(v1366, v1371); + int16x8_t v1373 = vqrdmulhq_n_s16(v1372, 17187); + int16x8_t v1374 = vaddq_s16(v1362, v1373); + int16x8_t v1375 = vsubq_s16(v895, v900); + int16x8_t v1376 = vsubq_s16(v905, v910); + int16x8_t v1377_tmp = vqrdmulhq_n_s16(v1376, 1893); + int16x8_t v1377 = vmlaq_n_s16(v1377_tmp, v1376, 2); + int16x8_t v1378 = vaddq_s16(v1375, v1377); + int16x8_t v1379 = vsubq_s16(v918, v923); + int16x8_t v1380 = vsubq_s16(v928, v933); + int16x8_t v1381_tmp = vqrdmulhq_n_s16(v1380, 1893); + int16x8_t v1381 = vmlaq_n_s16(v1381_tmp, v1380, 2); + int16x8_t v1382 = vaddq_s16(v1379, v1381); + int16x8_t v1383 = vqrdmulhq_n_s16(v1382, 20783); + int16x8_t v1384 = vaddq_s16(v1378, v1383); + int16x8_t v1385 = vsubq_s16(v942, v947); + int16x8_t v1386 = vsubq_s16(v952, v957); + int16x8_t v1387_tmp = vqrdmulhq_n_s16(v1386, 1893); + int16x8_t v1387 = vmlaq_n_s16(v1387_tmp, v1386, 2); + int16x8_t v1388 = vaddq_s16(v1385, v1387); + int16x8_t v1389 = vsubq_s16(v964, v969); + int16x8_t v1390 = vsubq_s16(v974, v979); + int16x8_t v1391_tmp = vqrdmulhq_n_s16(v1390, 1893); + int16x8_t v1391 = vmlaq_n_s16(v1391_tmp, v1390, 2); + int16x8_t v1392 = vaddq_s16(v1389, v1391); + int16x8_t v1393 = vqrdmulhq_n_s16(v1392, 20783); + int16x8_t v1394 = vaddq_s16(v1388, v1393); + int16x8_t v1395 = vqrdmulhq_n_s16(v1394, 17326); + int16x8_t v1396 = vaddq_s16(v1384, v1395); + int16x8_t v1397 = vsubq_s16(v711, v722); + int16x8_t v1398 = vsubq_s16(v733, v744); + int16x8_t v1399_tmp = vqrdmulhq_n_s16(v1398, 13357); + int16x8_t v1399 = vmlaq_n_s16(v1399_tmp, v1398, 3); + int16x8_t v1400 = vaddq_s16(v1397, v1399); + int16x8_t v1401 = vsubq_s16(v757, v768); + int16x8_t v1402 = vsubq_s16(v779, v790); + int16x8_t v1403_tmp = vqrdmulhq_n_s16(v1402, 13357); + int16x8_t v1403 = vmlaq_n_s16(v1403_tmp, v1402, 3); + int16x8_t v1404 = vaddq_s16(v1401, v1403); + int16x8_t v1405 = vqrdmulhq_n_s16(v1404, 21637); + int16x8_t v1406 = vaddq_s16(v1400, v1405); + int16x8_t v1407 = vsubq_s16(v805, v816); + int16x8_t v1408 = vsubq_s16(v827, v838); + int16x8_t v1409_tmp = vqrdmulhq_n_s16(v1408, 13357); + int16x8_t v1409 = vmlaq_n_s16(v1409_tmp, v1408, 3); + int16x8_t v1410 = vaddq_s16(v1407, v1409); + int16x8_t v1411 = vsubq_s16(v851, v862); + int16x8_t v1412 = vsubq_s16(v873, v884); + int16x8_t v1413_tmp = vqrdmulhq_n_s16(v1412, 13357); + int16x8_t v1413 = vmlaq_n_s16(v1413_tmp, v1412, 3); + int16x8_t v1414 = vaddq_s16(v1411, v1413); + int16x8_t v1415 = vqrdmulhq_n_s16(v1414, 21637); + int16x8_t v1416 = vaddq_s16(v1410, v1415); + int16x8_t v1417 = vqrdmulhq_n_s16(v1416, 17479); + int16x8_t v1418 = vaddq_s16(v1406, v1417); + int16x8_t v1419 = vsubq_s16(v25, v60); + int16x8_t v1420 = vsubq_s16(v102, v138); + int16x8_t v1421_tmp = vqrdmulhq_n_s16(v1420, 6226); + int16x8_t v1421 = vmlaq_n_s16(v1421_tmp, v1420, 10); + int16x8_t v1422 = vaddq_s16(v1419, v1421); + int16x8_t v1423 = vsubq_s16(v182, v233); + int16x8_t v1424 = vsubq_s16(v275, v312); + int16x8_t v1425_tmp = vqrdmulhq_n_s16(v1424, 6226); + int16x8_t v1425 = vmlaq_n_s16(v1425_tmp, v1424, 10); + int16x8_t v1426 = vaddq_s16(v1423, v1425); + int16x8_t v1427 = vqrdmulhq_n_s16(v1426, 22622); + int16x8_t v1428 = vaddq_s16(v1422, v1427); + int16x8_t v1429 = vsubq_s16(v358, v409); + int16x8_t v1430 = vsubq_s16(v481, v519); + int16x8_t v1431_tmp = vqrdmulhq_n_s16(v1430, 6226); + int16x8_t v1431 = vmlaq_n_s16(v1431_tmp, v1430, 10); + int16x8_t v1432 = vaddq_s16(v1429, v1431); + int16x8_t v1433 = vsubq_s16(v563, v614); + int16x8_t v1434 = vsubq_s16(v656, v694); + int16x8_t v1435_tmp = vqrdmulhq_n_s16(v1434, 6226); + int16x8_t v1435 = vmlaq_n_s16(v1435_tmp, v1434, 10); + int16x8_t v1436 = vaddq_s16(v1433, v1435); + int16x8_t v1437 = vqrdmulhq_n_s16(v1436, 22622); + int16x8_t v1438 = vaddq_s16(v1432, v1437); + int16x8_t v1439 = vqrdmulhq_n_s16(v1438, 17646); + int16x8_t v1440 = vaddq_s16(v1428, v1439); + int16x8_t v1441 = vsubq_s16(v1419, v1421); + int16x8_t v1442 = vsubq_s16(v1423, v1425); + int16x8_t v1443 = vqrdmulhq_n_s16(v1442, 23761); + int16x8_t v1444 = vaddq_s16(v1441, v1443); + int16x8_t v1445 = vsubq_s16(v1429, v1431); + int16x8_t v1446 = vsubq_s16(v1433, v1435); + int16x8_t v1447 = vqrdmulhq_n_s16(v1446, 23761); + int16x8_t v1448 = vaddq_s16(v1445, v1447); + int16x8_t v1449 = vqrdmulhq_n_s16(v1448, 17826); + int16x8_t v1450 = vaddq_s16(v1444, v1449); + int16x8_t v1451 = vsubq_s16(v1397, v1399); + int16x8_t v1452 = vsubq_s16(v1401, v1403); + int16x8_t v1453 = vqrdmulhq_n_s16(v1452, 25084); + int16x8_t v1454 = vaddq_s16(v1451, v1453); + int16x8_t v1455 = vsubq_s16(v1407, v1409); + int16x8_t v1456 = vsubq_s16(v1411, v1413); + int16x8_t v1457 = vqrdmulhq_n_s16(v1456, 25084); + int16x8_t v1458 = vaddq_s16(v1455, v1457); + int16x8_t v1459 = vqrdmulhq_n_s16(v1458, 18021); + int16x8_t v1460 = vaddq_s16(v1454, v1459); + int16x8_t v1461 = vsubq_s16(v1375, v1377); + int16x8_t v1462 = vsubq_s16(v1379, v1381); + int16x8_t v1463 = vqrdmulhq_n_s16(v1462, 26631); + int16x8_t v1464 = vaddq_s16(v1461, v1463); + int16x8_t v1465 = vsubq_s16(v1385, v1387); + int16x8_t v1466 = vsubq_s16(v1389, v1391); + int16x8_t v1467 = vqrdmulhq_n_s16(v1466, 26631); + int16x8_t v1468 = vaddq_s16(v1465, v1467); + int16x8_t v1469 = vqrdmulhq_n_s16(v1468, 18231); + int16x8_t v1470 = vaddq_s16(v1464, v1469); + int16x8_t v1471 = vsubq_s16(v1353, v1355); + int16x8_t v1472 = vsubq_s16(v1357, v1359); + int16x8_t v1473 = vqrdmulhq_n_s16(v1472, 28454); + int16x8_t v1474 = vaddq_s16(v1471, v1473); + int16x8_t v1475 = vsubq_s16(v1363, v1365); + int16x8_t v1476 = vsubq_s16(v1367, v1369); + int16x8_t v1477 = vqrdmulhq_n_s16(v1476, 28454); + int16x8_t v1478 = vaddq_s16(v1475, v1477); + int16x8_t v1479 = vqrdmulhq_n_s16(v1478, 18458); + int16x8_t v1480 = vaddq_s16(v1474, v1479); + int16x8_t v1481 = vsubq_s16(v1331, v1333); + int16x8_t v1482 = vsubq_s16(v1335, v1337); + int16x8_t v1483 = vqrdmulhq_n_s16(v1482, 30624); + int16x8_t v1484 = vaddq_s16(v1481, v1483); + int16x8_t v1485 = vsubq_s16(v1341, v1343); + int16x8_t v1486 = vsubq_s16(v1345, v1347); + int16x8_t v1487 = vqrdmulhq_n_s16(v1486, 30624); + int16x8_t v1488 = vaddq_s16(v1485, v1487); + int16x8_t v1489 = vqrdmulhq_n_s16(v1488, 18702); + int16x8_t v1490 = vaddq_s16(v1484, v1489); + int16x8_t v1491 = vsubq_s16(v1309, v1311); + int16x8_t v1492 = vsubq_s16(v1313, v1315); + int16x8_t v1493_tmp = vqrdmulhq_n_s16(v1492, 472); + int16x8_t v1493 = vaddq_s16(v1493_tmp, v1492); + int16x8_t v1494 = vaddq_s16(v1491, v1493); + int16x8_t v1495 = vsubq_s16(v1319, v1321); + int16x8_t v1496 = vsubq_s16(v1323, v1325); + int16x8_t v1497_tmp = vqrdmulhq_n_s16(v1496, 472); + int16x8_t v1497 = vaddq_s16(v1497_tmp, v1496); + int16x8_t v1498 = vaddq_s16(v1495, v1497); + int16x8_t v1499 = vqrdmulhq_n_s16(v1498, 18964); + int16x8_t v1500 = vaddq_s16(v1494, v1499); + int16x8_t v1501 = vsubq_s16(v1287, v1289); + int16x8_t v1502 = vsubq_s16(v1291, v1293); + int16x8_t v1503_tmp = vqrdmulhq_n_s16(v1502, 3672); + int16x8_t v1503 = vaddq_s16(v1503_tmp, v1502); + int16x8_t v1504 = vaddq_s16(v1501, v1503); + int16x8_t v1505 = vsubq_s16(v1297, v1299); + int16x8_t v1506 = vsubq_s16(v1301, v1303); + int16x8_t v1507_tmp = vqrdmulhq_n_s16(v1506, 3672); + int16x8_t v1507 = vaddq_s16(v1507_tmp, v1506); + int16x8_t v1508 = vaddq_s16(v1505, v1507); + int16x8_t v1509 = vqrdmulhq_n_s16(v1508, 19245); + int16x8_t v1510 = vaddq_s16(v1504, v1509); + int16x8_t v1511 = vsubq_s16(v1265, v1267); + int16x8_t v1512 = vsubq_s16(v1269, v1271); + int16x8_t v1513_tmp = vqrdmulhq_n_s16(v1512, 7662); + int16x8_t v1513 = vaddq_s16(v1513_tmp, v1512); + int16x8_t v1514 = vaddq_s16(v1511, v1513); + int16x8_t v1515 = vsubq_s16(v1275, v1277); + int16x8_t v1516 = vsubq_s16(v1279, v1281); + int16x8_t v1517_tmp = vqrdmulhq_n_s16(v1516, 7662); + int16x8_t v1517 = vaddq_s16(v1517_tmp, v1516); + int16x8_t v1518 = vaddq_s16(v1515, v1517); + int16x8_t v1519 = vqrdmulhq_n_s16(v1518, 19546); + int16x8_t v1520 = vaddq_s16(v1514, v1519); + int16x8_t v1521 = vsubq_s16(v1222, v1227); + int16x8_t v1522 = vsubq_s16(v1232, v1237); + int16x8_t v1523_tmp = vqrdmulhq_n_s16(v1522, 12756); + int16x8_t v1523 = vaddq_s16(v1523_tmp, v1522); + int16x8_t v1524 = vaddq_s16(v1521, v1523); + int16x8_t v1525 = vsubq_s16(v1244, v1249); + int16x8_t v1526 = vsubq_s16(v1254, v1259); + int16x8_t v1527_tmp = vqrdmulhq_n_s16(v1526, 12756); + int16x8_t v1527 = vaddq_s16(v1527_tmp, v1526); + int16x8_t v1528 = vaddq_s16(v1525, v1527); + int16x8_t v1529 = vqrdmulhq_n_s16(v1528, 19869); + int16x8_t v1530 = vaddq_s16(v1524, v1529); + int16x8_t v1531 = vsubq_s16(v1176, v1181); + int16x8_t v1532 = vsubq_s16(v1186, v1191); + int16x8_t v1533_tmp = vqrdmulhq_n_s16(v1532, 19463); + int16x8_t v1533 = vaddq_s16(v1533_tmp, v1532); + int16x8_t v1534 = vaddq_s16(v1531, v1533); + int16x8_t v1535 = vsubq_s16(v1198, v1203); + int16x8_t v1536 = vsubq_s16(v1208, v1213); + int16x8_t v1537_tmp = vqrdmulhq_n_s16(v1536, 19463); + int16x8_t v1537 = vaddq_s16(v1537_tmp, v1536); + int16x8_t v1538 = vaddq_s16(v1535, v1537); + int16x8_t v1539 = vqrdmulhq_n_s16(v1538, 20216); + int16x8_t v1540 = vaddq_s16(v1534, v1539); + int16x8_t v1541 = vsubq_s16(v1130, v1135); + int16x8_t v1542 = vsubq_s16(v1140, v1145); + int16x8_t v1543_tmp = vqrdmulhq_n_s16(v1542, 28661); + int16x8_t v1543 = vaddq_s16(v1543_tmp, v1542); + int16x8_t v1544 = vaddq_s16(v1541, v1543); + int16x8_t v1545 = vsubq_s16(v1152, v1157); + int16x8_t v1546 = vsubq_s16(v1162, v1167); + int16x8_t v1547_tmp = vqrdmulhq_n_s16(v1546, 28661); + int16x8_t v1547 = vaddq_s16(v1547_tmp, v1546); + int16x8_t v1548 = vaddq_s16(v1545, v1547); + int16x8_t v1549 = vqrdmulhq_n_s16(v1548, 20587); + int16x8_t v1550 = vaddq_s16(v1544, v1549); + int16x8_t v1551 = vsubq_s16(v1084, v1089); + int16x8_t v1552 = vsubq_s16(v1094, v1099); + int16x8_t v1553_tmp = vqrdmulhq_n_s16(v1552, 9242); + int16x8_t v1553 = vmlaq_n_s16(v1553_tmp, v1552, 2); + int16x8_t v1554 = vaddq_s16(v1551, v1553); + int16x8_t v1555 = vsubq_s16(v1106, v1111); + int16x8_t v1556 = vsubq_s16(v1116, v1121); + int16x8_t v1557_tmp = vqrdmulhq_n_s16(v1556, 9242); + int16x8_t v1557 = vmlaq_n_s16(v1557_tmp, v1556, 2); + int16x8_t v1558 = vaddq_s16(v1555, v1557); + int16x8_t v1559 = vqrdmulhq_n_s16(v1558, 20985); + int16x8_t v1560 = vaddq_s16(v1554, v1559); + int16x8_t v1561 = vsubq_s16(v996, v1007); + int16x8_t v1562 = vsubq_s16(v1018, v1029); + int16x8_t v1563_tmp = vqrdmulhq_n_s16(v1562, 30298); + int16x8_t v1563 = vmlaq_n_s16(v1563_tmp, v1562, 2); + int16x8_t v1564 = vaddq_s16(v1561, v1563); + int16x8_t v1565 = vsubq_s16(v1042, v1053); + int16x8_t v1566 = vsubq_s16(v1064, v1075); + int16x8_t v1567_tmp = vqrdmulhq_n_s16(v1566, 30298); + int16x8_t v1567 = vmlaq_n_s16(v1567_tmp, v1566, 2); + int16x8_t v1568 = vaddq_s16(v1565, v1567); + int16x8_t v1569 = vqrdmulhq_n_s16(v1568, 21412); + int16x8_t v1570 = vaddq_s16(v1564, v1569); + int16x8_t v1571 = vsubq_s16(v901, v912); + int16x8_t v1572 = vsubq_s16(v924, v935); + int16x8_t v1573_tmp = vqrdmulhq_n_s16(v1572, 2773); + int16x8_t v1573 = vmlaq_n_s16(v1573_tmp, v1572, 4); + int16x8_t v1574 = vaddq_s16(v1571, v1573); + int16x8_t v1575 = vsubq_s16(v948, v959); + int16x8_t v1576 = vsubq_s16(v970, v981); + int16x8_t v1577_tmp = vqrdmulhq_n_s16(v1576, 2773); + int16x8_t v1577 = vmlaq_n_s16(v1577_tmp, v1576, 4); + int16x8_t v1578 = vaddq_s16(v1575, v1577); + int16x8_t v1579 = vqrdmulhq_n_s16(v1578, 21871); + int16x8_t v1580 = vaddq_s16(v1574, v1579); + int16x8_t v1581 = vsubq_s16(v723, v746); + int16x8_t v1582 = vsubq_s16(v769, v792); + int16x8_t v1583_tmp = vqrdmulhq_n_s16(v1582, 26108); + int16x8_t v1583 = vmlaq_n_s16(v1583_tmp, v1582, 6); + int16x8_t v1584 = vaddq_s16(v1581, v1583); + int16x8_t v1585 = vsubq_s16(v817, v840); + int16x8_t v1586 = vsubq_s16(v863, v886); + int16x8_t v1587_tmp = vqrdmulhq_n_s16(v1586, 26108); + int16x8_t v1587 = vmlaq_n_s16(v1587_tmp, v1586, 6); + int16x8_t v1588 = vaddq_s16(v1585, v1587); + int16x8_t v1589 = vqrdmulhq_n_s16(v1588, 22363); + int16x8_t v1590 = vaddq_s16(v1584, v1589); + int16x8_t v1591 = vsubq_s16(v61, v140); + int16x8_t v1592 = vsubq_s16(v234, v314); + int16x8_t v1593_tmp = vqrdmulhq_n_s16(v1592, 12251); + int16x8_t v1593 = vmlaq_n_s16(v1593_tmp, v1592, 20); + int16x8_t v1594 = vaddq_s16(v1591, v1593); + int16x8_t v1595 = vsubq_s16(v410, v521); + int16x8_t v1596 = vsubq_s16(v615, v696); + int16x8_t v1597_tmp = vqrdmulhq_n_s16(v1596, 12251); + int16x8_t v1597 = vmlaq_n_s16(v1597_tmp, v1596, 20); + int16x8_t v1598 = vaddq_s16(v1595, v1597); + int16x8_t v1599 = vqrdmulhq_n_s16(v1598, 22891); + int16x8_t v1600 = vaddq_s16(v1594, v1599); + int16x8_t v1601 = vsubq_s16(v1591, v1593); + int16x8_t v1602 = vsubq_s16(v1595, v1597); + int16x8_t v1603 = vqrdmulhq_n_s16(v1602, 23460); + int16x8_t v1604 = vaddq_s16(v1601, v1603); + int16x8_t v1605 = vsubq_s16(v1581, v1583); + int16x8_t v1606 = vsubq_s16(v1585, v1587); + int16x8_t v1607 = vqrdmulhq_n_s16(v1606, 24073); + int16x8_t v1608 = vaddq_s16(v1605, v1607); + int16x8_t v1609 = vsubq_s16(v1571, v1573); + int16x8_t v1610 = vsubq_s16(v1575, v1577); + int16x8_t v1611 = vqrdmulhq_n_s16(v1610, 24734); + int16x8_t v1612 = vaddq_s16(v1609, v1611); + int16x8_t v1613 = vsubq_s16(v1561, v1563); + int16x8_t v1614 = vsubq_s16(v1565, v1567); + int16x8_t v1615 = vqrdmulhq_n_s16(v1614, 25448); + int16x8_t v1616 = vaddq_s16(v1613, v1615); + int16x8_t v1617 = vsubq_s16(v1551, v1553); + int16x8_t v1618 = vsubq_s16(v1555, v1557); + int16x8_t v1619 = vqrdmulhq_n_s16(v1618, 26220); + int16x8_t v1620 = vaddq_s16(v1617, v1619); + int16x8_t v1621 = vsubq_s16(v1541, v1543); + int16x8_t v1622 = vsubq_s16(v1545, v1547); + int16x8_t v1623 = vqrdmulhq_n_s16(v1622, 27058); + int16x8_t v1624 = vaddq_s16(v1621, v1623); + int16x8_t v1625 = vsubq_s16(v1531, v1533); + int16x8_t v1626 = vsubq_s16(v1535, v1537); + int16x8_t v1627 = vqrdmulhq_n_s16(v1626, 27969); + int16x8_t v1628 = vaddq_s16(v1625, v1627); + int16x8_t v1629 = vsubq_s16(v1521, v1523); + int16x8_t v1630 = vsubq_s16(v1525, v1527); + int16x8_t v1631 = vqrdmulhq_n_s16(v1630, 28961); + int16x8_t v1632 = vaddq_s16(v1629, v1631); + int16x8_t v1633 = vsubq_s16(v1511, v1513); + int16x8_t v1634 = vsubq_s16(v1515, v1517); + int16x8_t v1635 = vqrdmulhq_n_s16(v1634, 30044); + int16x8_t v1636 = vaddq_s16(v1633, v1635); + int16x8_t v1637 = vsubq_s16(v1501, v1503); + int16x8_t v1638 = vsubq_s16(v1505, v1507); + int16x8_t v1639 = vqrdmulhq_n_s16(v1638, 31232); + int16x8_t v1640 = vaddq_s16(v1637, v1639); + int16x8_t v1641 = vsubq_s16(v1491, v1493); + int16x8_t v1642 = vsubq_s16(v1495, v1497); + int16x8_t v1643 = vqrdmulhq_n_s16(v1642, 32538); + int16x8_t v1644 = vaddq_s16(v1641, v1643); + int16x8_t v1645 = vsubq_s16(v1481, v1483); + int16x8_t v1646 = vsubq_s16(v1485, v1487); + int16x8_t v1647_tmp = vqrdmulhq_n_s16(v1646, 1211); + int16x8_t v1647 = vaddq_s16(v1647_tmp, v1646); + int16x8_t v1648 = vaddq_s16(v1645, v1647); + int16x8_t v1649 = vsubq_s16(v1471, v1473); + int16x8_t v1650 = vsubq_s16(v1475, v1477); + int16x8_t v1651_tmp = vqrdmulhq_n_s16(v1650, 2808); + int16x8_t v1651 = vaddq_s16(v1651_tmp, v1650); + int16x8_t v1652 = vaddq_s16(v1649, v1651); + int16x8_t v1653 = vsubq_s16(v1461, v1463); + int16x8_t v1654 = vsubq_s16(v1465, v1467); + int16x8_t v1655_tmp = vqrdmulhq_n_s16(v1654, 4586); + int16x8_t v1655 = vaddq_s16(v1655_tmp, v1654); + int16x8_t v1656 = vaddq_s16(v1653, v1655); + int16x8_t v1657 = vsubq_s16(v1451, v1453); + int16x8_t v1658 = vsubq_s16(v1455, v1457); + int16x8_t v1659_tmp = vqrdmulhq_n_s16(v1658, 6576); + int16x8_t v1659 = vaddq_s16(v1659_tmp, v1658); + int16x8_t v1660 = vaddq_s16(v1657, v1659); + int16x8_t v1661 = vsubq_s16(v1441, v1443); + int16x8_t v1662 = vsubq_s16(v1445, v1447); + int16x8_t v1663_tmp = vqrdmulhq_n_s16(v1662, 8817); + int16x8_t v1663 = vaddq_s16(v1663_tmp, v1662); + int16x8_t v1664 = vaddq_s16(v1661, v1663); + int16x8_t v1665 = vsubq_s16(v1422, v1427); + int16x8_t v1666 = vsubq_s16(v1432, v1437); + int16x8_t v1667_tmp = vqrdmulhq_n_s16(v1666, 11356); + int16x8_t v1667 = vaddq_s16(v1667_tmp, v1666); + int16x8_t v1668 = vaddq_s16(v1665, v1667); + int16x8_t v1669 = vsubq_s16(v1400, v1405); + int16x8_t v1670 = vsubq_s16(v1410, v1415); + int16x8_t v1671_tmp = vqrdmulhq_n_s16(v1670, 14256); + int16x8_t v1671 = vaddq_s16(v1671_tmp, v1670); + int16x8_t v1672 = vaddq_s16(v1669, v1671); + int16x8_t v1673 = vsubq_s16(v1378, v1383); + int16x8_t v1674 = vsubq_s16(v1388, v1393); + int16x8_t v1675_tmp = vqrdmulhq_n_s16(v1674, 17596); + int16x8_t v1675 = vaddq_s16(v1675_tmp, v1674); + int16x8_t v1676 = vaddq_s16(v1673, v1675); + int16x8_t v1677 = vsubq_s16(v1356, v1361); + int16x8_t v1678 = vsubq_s16(v1366, v1371); + int16x8_t v1679_tmp = vqrdmulhq_n_s16(v1678, 21483); + int16x8_t v1679 = vaddq_s16(v1679_tmp, v1678); + int16x8_t v1680 = vaddq_s16(v1677, v1679); + int16x8_t v1681 = vsubq_s16(v1334, v1339); + int16x8_t v1682 = vsubq_s16(v1344, v1349); + int16x8_t v1683_tmp = vqrdmulhq_n_s16(v1682, 26057); + int16x8_t v1683 = vaddq_s16(v1683_tmp, v1682); + int16x8_t v1684 = vaddq_s16(v1681, v1683); + int16x8_t v1685 = vsubq_s16(v1312, v1317); + int16x8_t v1686 = vsubq_s16(v1322, v1327); + int16x8_t v1687_tmp = vqrdmulhq_n_s16(v1686, 31517); + int16x8_t v1687 = vaddq_s16(v1687_tmp, v1686); + int16x8_t v1688 = vaddq_s16(v1685, v1687); + int16x8_t v1689 = vsubq_s16(v1290, v1295); + int16x8_t v1690 = vsubq_s16(v1300, v1305); + int16x8_t v1691_tmp = vqrdmulhq_n_s16(v1690, 5373); + int16x8_t v1691 = vmlaq_n_s16(v1691_tmp, v1690, 2); + int16x8_t v1692 = vaddq_s16(v1689, v1691); + int16x8_t v1693 = vsubq_s16(v1268, v1273); + int16x8_t v1694 = vsubq_s16(v1278, v1283); + int16x8_t v1695_tmp = vqrdmulhq_n_s16(v1694, 13571); + int16x8_t v1695 = vmlaq_n_s16(v1695_tmp, v1694, 2); + int16x8_t v1696 = vaddq_s16(v1693, v1695); + int16x8_t v1697 = vsubq_s16(v1228, v1239); + int16x8_t v1698 = vsubq_s16(v1250, v1261); + int16x8_t v1699_tmp = vqrdmulhq_n_s16(v1698, 23975); + int16x8_t v1699 = vmlaq_n_s16(v1699_tmp, v1698, 2); + int16x8_t v1700 = vaddq_s16(v1697, v1699); + int16x8_t v1701 = vsubq_s16(v1182, v1193); + int16x8_t v1702 = vsubq_s16(v1204, v1215); + int16x8_t v1703_tmp = vqrdmulhq_n_s16(v1702, 4832); + int16x8_t v1703 = vmlaq_n_s16(v1703_tmp, v1702, 3); + int16x8_t v1704 = vaddq_s16(v1701, v1703); + int16x8_t v1705 = vsubq_s16(v1136, v1147); + int16x8_t v1706 = vsubq_s16(v1158, v1169); + int16x8_t v1707_tmp = vqrdmulhq_n_s16(v1706, 23437); + int16x8_t v1707 = vmlaq_n_s16(v1707_tmp, v1706, 3); + int16x8_t v1708 = vaddq_s16(v1705, v1707); + int16x8_t v1709 = vsubq_s16(v1090, v1101); + int16x8_t v1710 = vsubq_s16(v1112, v1123); + int16x8_t v1711_tmp = vqrdmulhq_n_s16(v1710, 17573); + int16x8_t v1711 = vmlaq_n_s16(v1711_tmp, v1710, 4); + int16x8_t v1712 = vaddq_s16(v1709, v1711); + int16x8_t v1713 = vsubq_s16(v1008, v1031); + int16x8_t v1714 = vsubq_s16(v1054, v1077); + int16x8_t v1715_tmp = vqrdmulhq_n_s16(v1714, 27122); + int16x8_t v1715 = vmlaq_n_s16(v1715_tmp, v1714, 5); + int16x8_t v1716 = vaddq_s16(v1713, v1715); + int16x8_t v1717 = vsubq_s16(v913, v937); + int16x8_t v1718 = vsubq_s16(v960, v983); + int16x8_t v1719_tmp = vqrdmulhq_n_s16(v1718, 5041); + int16x8_t v1719 = vmlaq_n_s16(v1719_tmp, v1718, 8); + int16x8_t v1720 = vaddq_s16(v1717, v1719); + int16x8_t v1721 = vsubq_s16(v747, v794); + int16x8_t v1722 = vsubq_s16(v841, v888); + int16x8_t v1723_tmp = vqrdmulhq_n_s16(v1722, 19146); + int16x8_t v1723 = vmlaq_n_s16(v1723_tmp, v1722, 13); + int16x8_t v1724 = vaddq_s16(v1721, v1723); + int16x8_t v1725 = vsubq_s16(v141, v316); + int16x8_t v1726 = vsubq_s16(v522, v698); + int16x8_t v1727_tmp = vqrdmulhq_n_s16(v1726, 24402); + int16x8_t v1727 = vmlaq_n_s16(v1727_tmp, v1726, 40); + int16x8_t v1728 = vaddq_s16(v1725, v1727); + int16x8_t v1729 = vsubq_s16(v1725, v1727); + int16x8_t v1730 = vsubq_s16(v1721, v1723); + int16x8_t v1731 = vsubq_s16(v1717, v1719); + int16x8_t v1732 = vsubq_s16(v1713, v1715); + int16x8_t v1733 = vsubq_s16(v1709, v1711); + int16x8_t v1734 = vsubq_s16(v1705, v1707); + int16x8_t v1735 = vsubq_s16(v1701, v1703); + int16x8_t v1736 = vsubq_s16(v1697, v1699); + int16x8_t v1737 = vsubq_s16(v1693, v1695); + int16x8_t v1738 = vsubq_s16(v1689, v1691); + int16x8_t v1739 = vsubq_s16(v1685, v1687); + int16x8_t v1740 = vsubq_s16(v1681, v1683); + int16x8_t v1741 = vsubq_s16(v1677, v1679); + int16x8_t v1742 = vsubq_s16(v1673, v1675); + int16x8_t v1743 = vsubq_s16(v1669, v1671); + int16x8_t v1744 = vsubq_s16(v1665, v1667); + int16x8_t v1745 = vsubq_s16(v1661, v1663); + int16x8_t v1746 = vsubq_s16(v1657, v1659); + int16x8_t v1747 = vsubq_s16(v1653, v1655); + int16x8_t v1748 = vsubq_s16(v1649, v1651); + int16x8_t v1749 = vsubq_s16(v1645, v1647); + int16x8_t v1750 = vsubq_s16(v1641, v1643); + int16x8_t v1751 = vsubq_s16(v1637, v1639); + int16x8_t v1752 = vsubq_s16(v1633, v1635); + int16x8_t v1753 = vsubq_s16(v1629, v1631); + int16x8_t v1754 = vsubq_s16(v1625, v1627); + int16x8_t v1755 = vsubq_s16(v1621, v1623); + int16x8_t v1756 = vsubq_s16(v1617, v1619); + int16x8_t v1757 = vsubq_s16(v1613, v1615); + int16x8_t v1758 = vsubq_s16(v1609, v1611); + int16x8_t v1759 = vsubq_s16(v1605, v1607); + int16x8_t v1760 = vsubq_s16(v1601, v1603); + int16x8_t v1761 = vsubq_s16(v1594, v1599); + int16x8_t v1762 = vsubq_s16(v1584, v1589); + int16x8_t v1763 = vsubq_s16(v1574, v1579); + int16x8_t v1764 = vsubq_s16(v1564, v1569); + int16x8_t v1765 = vsubq_s16(v1554, v1559); + int16x8_t v1766 = vsubq_s16(v1544, v1549); + int16x8_t v1767 = vsubq_s16(v1534, v1539); + int16x8_t v1768 = vsubq_s16(v1524, v1529); + int16x8_t v1769 = vsubq_s16(v1514, v1519); + int16x8_t v1770 = vsubq_s16(v1504, v1509); + int16x8_t v1771 = vsubq_s16(v1494, v1499); + int16x8_t v1772 = vsubq_s16(v1484, v1489); + int16x8_t v1773 = vsubq_s16(v1474, v1479); + int16x8_t v1774 = vsubq_s16(v1464, v1469); + int16x8_t v1775 = vsubq_s16(v1454, v1459); + int16x8_t v1776 = vsubq_s16(v1444, v1449); + int16x8_t v1777 = vsubq_s16(v1428, v1439); + int16x8_t v1778 = vsubq_s16(v1406, v1417); + int16x8_t v1779 = vsubq_s16(v1384, v1395); + int16x8_t v1780 = vsubq_s16(v1362, v1373); + int16x8_t v1781 = vsubq_s16(v1340, v1351); + int16x8_t v1782 = vsubq_s16(v1318, v1329); + int16x8_t v1783 = vsubq_s16(v1296, v1307); + int16x8_t v1784 = vsubq_s16(v1274, v1285); + int16x8_t v1785 = vsubq_s16(v1240, v1263); + int16x8_t v1786 = vsubq_s16(v1194, v1217); + int16x8_t v1787 = vsubq_s16(v1148, v1171); + int16x8_t v1788 = vsubq_s16(v1102, v1125); + int16x8_t v1789 = vsubq_s16(v1032, v1079); + int16x8_t v1790 = vsubq_s16(v938, v985); + int16x8_t v1791 = vsubq_s16(v795, v890); + int16x8_t v1792 = vsubq_s16(v317, v700); + vst1q_s16(out + out_stride * 0 + i, v701); + vst1q_s16(out + out_stride * 1 + i, v891); + vst1q_s16(out + out_stride * 2 + i, v986); + vst1q_s16(out + out_stride * 3 + i, v1080); + vst1q_s16(out + out_stride * 4 + i, v1126); + vst1q_s16(out + out_stride * 5 + i, v1172); + vst1q_s16(out + out_stride * 6 + i, v1218); + vst1q_s16(out + out_stride * 7 + i, v1264); + vst1q_s16(out + out_stride * 8 + i, v1286); + vst1q_s16(out + out_stride * 9 + i, v1308); + vst1q_s16(out + out_stride * 10 + i, v1330); + vst1q_s16(out + out_stride * 11 + i, v1352); + vst1q_s16(out + out_stride * 12 + i, v1374); + vst1q_s16(out + out_stride * 13 + i, v1396); + vst1q_s16(out + out_stride * 14 + i, v1418); + vst1q_s16(out + out_stride * 15 + i, v1440); + vst1q_s16(out + out_stride * 16 + i, v1450); + vst1q_s16(out + out_stride * 17 + i, v1460); + vst1q_s16(out + out_stride * 18 + i, v1470); + vst1q_s16(out + out_stride * 19 + i, v1480); + vst1q_s16(out + out_stride * 20 + i, v1490); + vst1q_s16(out + out_stride * 21 + i, v1500); + vst1q_s16(out + out_stride * 22 + i, v1510); + vst1q_s16(out + out_stride * 23 + i, v1520); + vst1q_s16(out + out_stride * 24 + i, v1530); + vst1q_s16(out + out_stride * 25 + i, v1540); + vst1q_s16(out + out_stride * 26 + i, v1550); + vst1q_s16(out + out_stride * 27 + i, v1560); + vst1q_s16(out + out_stride * 28 + i, v1570); + vst1q_s16(out + out_stride * 29 + i, v1580); + vst1q_s16(out + out_stride * 30 + i, v1590); + vst1q_s16(out + out_stride * 31 + i, v1600); + vst1q_s16(out + out_stride * 32 + i, v1604); + vst1q_s16(out + out_stride * 33 + i, v1608); + vst1q_s16(out + out_stride * 34 + i, v1612); + vst1q_s16(out + out_stride * 35 + i, v1616); + vst1q_s16(out + out_stride * 36 + i, v1620); + vst1q_s16(out + out_stride * 37 + i, v1624); + vst1q_s16(out + out_stride * 38 + i, v1628); + vst1q_s16(out + out_stride * 39 + i, v1632); + vst1q_s16(out + out_stride * 40 + i, v1636); + vst1q_s16(out + out_stride * 41 + i, v1640); + vst1q_s16(out + out_stride * 42 + i, v1644); + vst1q_s16(out + out_stride * 43 + i, v1648); + vst1q_s16(out + out_stride * 44 + i, v1652); + vst1q_s16(out + out_stride * 45 + i, v1656); + vst1q_s16(out + out_stride * 46 + i, v1660); + vst1q_s16(out + out_stride * 47 + i, v1664); + vst1q_s16(out + out_stride * 48 + i, v1668); + vst1q_s16(out + out_stride * 49 + i, v1672); + vst1q_s16(out + out_stride * 50 + i, v1676); + vst1q_s16(out + out_stride * 51 + i, v1680); + vst1q_s16(out + out_stride * 52 + i, v1684); + vst1q_s16(out + out_stride * 53 + i, v1688); + vst1q_s16(out + out_stride * 54 + i, v1692); + vst1q_s16(out + out_stride * 55 + i, v1696); + vst1q_s16(out + out_stride * 56 + i, v1700); + vst1q_s16(out + out_stride * 57 + i, v1704); + vst1q_s16(out + out_stride * 58 + i, v1708); + vst1q_s16(out + out_stride * 59 + i, v1712); + vst1q_s16(out + out_stride * 60 + i, v1716); + vst1q_s16(out + out_stride * 61 + i, v1720); + vst1q_s16(out + out_stride * 62 + i, v1724); + vst1q_s16(out + out_stride * 63 + i, v1728); + vst1q_s16(out + out_stride * 64 + i, v1729); + vst1q_s16(out + out_stride * 65 + i, v1730); + vst1q_s16(out + out_stride * 66 + i, v1731); + vst1q_s16(out + out_stride * 67 + i, v1732); + vst1q_s16(out + out_stride * 68 + i, v1733); + vst1q_s16(out + out_stride * 69 + i, v1734); + vst1q_s16(out + out_stride * 70 + i, v1735); + vst1q_s16(out + out_stride * 71 + i, v1736); + vst1q_s16(out + out_stride * 72 + i, v1737); + vst1q_s16(out + out_stride * 73 + i, v1738); + vst1q_s16(out + out_stride * 74 + i, v1739); + vst1q_s16(out + out_stride * 75 + i, v1740); + vst1q_s16(out + out_stride * 76 + i, v1741); + vst1q_s16(out + out_stride * 77 + i, v1742); + vst1q_s16(out + out_stride * 78 + i, v1743); + vst1q_s16(out + out_stride * 79 + i, v1744); + vst1q_s16(out + out_stride * 80 + i, v1745); + vst1q_s16(out + out_stride * 81 + i, v1746); + vst1q_s16(out + out_stride * 82 + i, v1747); + vst1q_s16(out + out_stride * 83 + i, v1748); + vst1q_s16(out + out_stride * 84 + i, v1749); + vst1q_s16(out + out_stride * 85 + i, v1750); + vst1q_s16(out + out_stride * 86 + i, v1751); + vst1q_s16(out + out_stride * 87 + i, v1752); + vst1q_s16(out + out_stride * 88 + i, v1753); + vst1q_s16(out + out_stride * 89 + i, v1754); + vst1q_s16(out + out_stride * 90 + i, v1755); + vst1q_s16(out + out_stride * 91 + i, v1756); + vst1q_s16(out + out_stride * 92 + i, v1757); + vst1q_s16(out + out_stride * 93 + i, v1758); + vst1q_s16(out + out_stride * 94 + i, v1759); + vst1q_s16(out + out_stride * 95 + i, v1760); + vst1q_s16(out + out_stride * 96 + i, v1761); + vst1q_s16(out + out_stride * 97 + i, v1762); + vst1q_s16(out + out_stride * 98 + i, v1763); + vst1q_s16(out + out_stride * 99 + i, v1764); + vst1q_s16(out + out_stride * 100 + i, v1765); + vst1q_s16(out + out_stride * 101 + i, v1766); + vst1q_s16(out + out_stride * 102 + i, v1767); + vst1q_s16(out + out_stride * 103 + i, v1768); + vst1q_s16(out + out_stride * 104 + i, v1769); + vst1q_s16(out + out_stride * 105 + i, v1770); + vst1q_s16(out + out_stride * 106 + i, v1771); + vst1q_s16(out + out_stride * 107 + i, v1772); + vst1q_s16(out + out_stride * 108 + i, v1773); + vst1q_s16(out + out_stride * 109 + i, v1774); + vst1q_s16(out + out_stride * 110 + i, v1775); + vst1q_s16(out + out_stride * 111 + i, v1776); + vst1q_s16(out + out_stride * 112 + i, v1777); + vst1q_s16(out + out_stride * 113 + i, v1778); + vst1q_s16(out + out_stride * 114 + i, v1779); + vst1q_s16(out + out_stride * 115 + i, v1780); + vst1q_s16(out + out_stride * 116 + i, v1781); + vst1q_s16(out + out_stride * 117 + i, v1782); + vst1q_s16(out + out_stride * 118 + i, v1783); + vst1q_s16(out + out_stride * 119 + i, v1784); + vst1q_s16(out + out_stride * 120 + i, v1785); + vst1q_s16(out + out_stride * 121 + i, v1786); + vst1q_s16(out + out_stride * 122 + i, v1787); + vst1q_s16(out + out_stride * 123 + i, v1788); + vst1q_s16(out + out_stride * 124 + i, v1789); + vst1q_s16(out + out_stride * 125 + i, v1790); + vst1q_s16(out + out_stride * 126 + i, v1791); + vst1q_s16(out + out_stride * 127 + i, v1792); + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct16-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct16-inl.h new file mode 100644 index 0000000000..472ec20d42 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct16-inl.h @@ -0,0 +1,180 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* This file is automatically generated. Do not modify it directly. */ +#if HWY_TARGET != HWY_NEON +#error "only include this file from fast_dct-inl.h" +#endif + +constexpr size_t FastIDCTIntegerBits(FastDCTTag<16>) { return 1; } + +void FastIDCT(FastDCTTag<16>, const int16_t* in, size_t in_stride, int16_t* out, + size_t out_stride, size_t count) { + JXL_ASSERT(count % 8 == 0); + for (size_t i = 0; i < count; i += 8) { + int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i); + int16x8_t v1 = vld1q_s16(in + in_stride * 8 + i); + int16x8_t v2 = vaddq_s16(v0, v1); + int16x8_t v3 = vld1q_s16(in + in_stride * 4 + i); + int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573); + int16x8_t v4 = vaddq_s16(v4_tmp, v3); + int16x8_t v5 = vld1q_s16(in + in_stride * 12 + i); + int16x8_t v6 = vaddq_s16(v5, v3); + int16x8_t v7 = vaddq_s16(v4, v6); + int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734); + int16x8_t v9 = vaddq_s16(v2, v8); + int16x8_t v10 = vld1q_s16(in + in_stride * 2 + i); + int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573); + int16x8_t v11 = vaddq_s16(v11_tmp, v10); + int16x8_t v12 = vld1q_s16(in + in_stride * 10 + i); + int16x8_t v13 = vld1q_s16(in + in_stride * 6 + i); + int16x8_t v14 = vaddq_s16(v12, v13); + int16x8_t v15 = vaddq_s16(v11, v14); + int16x8_t v16 = vaddq_s16(v13, v10); + int16x8_t v17 = vqrdmulhq_n_s16(v16, 25080); + int16x8_t v18 = vld1q_s16(in + in_stride * 14 + i); + int16x8_t v19 = vaddq_s16(v18, v12); + int16x8_t v20 = vaddq_s16(v16, v19); + int16x8_t v21 = vqrdmulhq_n_s16(v20, 17734); + int16x8_t v22 = vaddq_s16(v17, v21); + int16x8_t v23 = vaddq_s16(v15, v22); + int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705); + int16x8_t v25 = vaddq_s16(v9, v24); + int16x8_t v26 = vld1q_s16(in + in_stride * 15 + i); + int16x8_t v27 = vld1q_s16(in + in_stride * 13 + i); + int16x8_t v28 = vaddq_s16(v26, v27); + int16x8_t v29 = vld1q_s16(in + in_stride * 11 + i); + int16x8_t v30 = vld1q_s16(in + in_stride * 9 + i); + int16x8_t v31 = vaddq_s16(v29, v30); + int16x8_t v32 = vaddq_s16(v28, v31); + int16x8_t v33 = vqrdmulhq_n_s16(v32, 17734); + int16x8_t v34 = vld1q_s16(in + in_stride * 3 + i); + int16x8_t v35 = vld1q_s16(in + in_stride * 1 + i); + int16x8_t v36 = vaddq_s16(v34, v35); + int16x8_t v37 = vld1q_s16(in + in_stride * 7 + i); + int16x8_t v38 = vld1q_s16(in + in_stride * 5 + i); + int16x8_t v39 = vaddq_s16(v37, v38); + int16x8_t v40 = vaddq_s16(v36, v39); + int16x8_t v41_tmp = vqrdmulhq_n_s16(v40, 10045); + int16x8_t v41 = vaddq_s16(v41_tmp, v40); + int16x8_t v42 = vaddq_s16(v33, v41); + int16x8_t v43 = vqrdmulhq_n_s16(v42, 16705); + int16x8_t v44_tmp = vqrdmulhq_n_s16(v36, 13573); + int16x8_t v44 = vaddq_s16(v44_tmp, v36); + int16x8_t v45 = vaddq_s16(v39, v31); + int16x8_t v46 = vaddq_s16(v44, v45); + int16x8_t v47 = vqrdmulhq_n_s16(v46, 16705); + int16x8_t v48 = vaddq_s16(v43, v47); + int16x8_t v49_tmp = vqrdmulhq_n_s16(v35, 13573); + int16x8_t v49 = vaddq_s16(v49_tmp, v35); + int16x8_t v50 = vaddq_s16(v30, v37); + int16x8_t v51 = vaddq_s16(v49, v50); + int16x8_t v52 = vaddq_s16(v38, v34); + int16x8_t v53 = vaddq_s16(v27, v29); + int16x8_t v54 = vaddq_s16(v52, v53); + int16x8_t v55 = vqrdmulhq_n_s16(v54, 17734); + int16x8_t v56 = vqrdmulhq_n_s16(v52, 25080); + int16x8_t v57 = vaddq_s16(v55, v56); + int16x8_t v58 = vaddq_s16(v51, v57); + int16x8_t v59 = vaddq_s16(v48, v58); + int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463); + int16x8_t v61 = vaddq_s16(v25, v60); + int16x8_t v62 = vsubq_s16(v0, v1); + int16x8_t v63 = vsubq_s16(v4, v6); + int16x8_t v64_tmp = vqrdmulhq_n_s16(v63, 10045); + int16x8_t v64 = vaddq_s16(v64_tmp, v63); + int16x8_t v65 = vaddq_s16(v62, v64); + int16x8_t v66 = vsubq_s16(v11, v14); + int16x8_t v67 = vqrdmulhq_n_s16(v16, 17734); + int16x8_t v68_tmp = vqrdmulhq_n_s16(v19, 10045); + int16x8_t v68 = vaddq_s16(v68_tmp, v19); + int16x8_t v69 = vsubq_s16(v67, v68); + int16x8_t v70 = vaddq_s16(v66, v69); + int16x8_t v71 = vqrdmulhq_n_s16(v70, 19705); + int16x8_t v72 = vaddq_s16(v65, v71); + int16x8_t v73 = vsubq_s16(v49, v50); + int16x8_t v74 = vqrdmulhq_n_s16(v52, 17734); + int16x8_t v75_tmp = vqrdmulhq_n_s16(v53, 10045); + int16x8_t v75 = vaddq_s16(v75_tmp, v53); + int16x8_t v76 = vsubq_s16(v74, v75); + int16x8_t v77 = vaddq_s16(v73, v76); + int16x8_t v78 = vsubq_s16(v44, v45); + int16x8_t v79 = vqrdmulhq_n_s16(v78, 19705); + int16x8_t v80 = vqrdmulhq_n_s16(v40, 13573); + int16x8_t v81 = vsubq_s16(v80, v32); + int16x8_t v82 = vqrdmulhq_n_s16(v81, 25746); + int16x8_t v83 = vaddq_s16(v79, v82); + int16x8_t v84 = vaddq_s16(v77, v83); + int16x8_t v85 = vqrdmulhq_n_s16(v84, 17121); + int16x8_t v86 = vaddq_s16(v72, v85); + int16x8_t v87 = vsubq_s16(v62, v64); + int16x8_t v88 = vsubq_s16(v66, v69); + int16x8_t v89 = vqrdmulhq_n_s16(v88, 29490); + int16x8_t v90 = vaddq_s16(v87, v89); + int16x8_t v91 = vsubq_s16(v73, v76); + int16x8_t v92 = vqrdmulhq_n_s16(v78, 29490); + int16x8_t v93_tmp = vqrdmulhq_n_s16(v81, 5763); + int16x8_t v93 = vaddq_s16(v93_tmp, v81); + int16x8_t v94 = vsubq_s16(v92, v93); + int16x8_t v95 = vaddq_s16(v91, v94); + int16x8_t v96 = vqrdmulhq_n_s16(v95, 18578); + int16x8_t v97 = vaddq_s16(v90, v96); + int16x8_t v98 = vsubq_s16(v46, v42); + int16x8_t v99_tmp = vqrdmulhq_n_s16(v98, 18446); + int16x8_t v99 = vmlaq_n_s16(v99_tmp, v98, 2); + int16x8_t v100 = vsubq_s16(v51, v57); + int16x8_t v101 = vaddq_s16(v99, v100); + int16x8_t v102 = vqrdmulhq_n_s16(v101, 21195); + int16x8_t v103 = vsubq_s16(v2, v8); + int16x8_t v104 = vsubq_s16(v15, v22); + int16x8_t v105_tmp = vqrdmulhq_n_s16(v104, 18446); + int16x8_t v105 = vmlaq_n_s16(v105_tmp, v104, 2); + int16x8_t v106 = vaddq_s16(v103, v105); + int16x8_t v107 = vaddq_s16(v102, v106); + int16x8_t v108 = vsubq_s16(v103, v105); + int16x8_t v109 = vsubq_s16(v100, v99); + int16x8_t v110 = vqrdmulhq_n_s16(v109, 25826); + int16x8_t v111 = vaddq_s16(v108, v110); + int16x8_t v112 = vsubq_s16(v87, v89); + int16x8_t v113 = vsubq_s16(v91, v94); + int16x8_t v114_tmp = vqrdmulhq_n_s16(v113, 1988); + int16x8_t v114 = vaddq_s16(v114_tmp, v113); + int16x8_t v115 = vaddq_s16(v112, v114); + int16x8_t v116 = vsubq_s16(v65, v71); + int16x8_t v117 = vsubq_s16(v77, v83); + int16x8_t v118_tmp = vqrdmulhq_n_s16(v117, 23673); + int16x8_t v118 = vaddq_s16(v118_tmp, v117); + int16x8_t v119 = vaddq_s16(v116, v118); + int16x8_t v120 = vsubq_s16(v58, v48); + int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 3314); + int16x8_t v121 = vmlaq_n_s16(v121_tmp, v120, 5); + int16x8_t v122 = vsubq_s16(v9, v24); + int16x8_t v123 = vaddq_s16(v121, v122); + int16x8_t v124 = vsubq_s16(v122, v121); + int16x8_t v125 = vsubq_s16(v116, v118); + int16x8_t v126 = vsubq_s16(v112, v114); + int16x8_t v127 = vsubq_s16(v108, v110); + int16x8_t v128 = vsubq_s16(v106, v102); + int16x8_t v129 = vsubq_s16(v90, v96); + int16x8_t v130 = vsubq_s16(v72, v85); + int16x8_t v131 = vsubq_s16(v25, v60); + vst1q_s16(out + out_stride * 0 + i, v61); + vst1q_s16(out + out_stride * 1 + i, v86); + vst1q_s16(out + out_stride * 2 + i, v97); + vst1q_s16(out + out_stride * 3 + i, v107); + vst1q_s16(out + out_stride * 4 + i, v111); + vst1q_s16(out + out_stride * 5 + i, v115); + vst1q_s16(out + out_stride * 6 + i, v119); + vst1q_s16(out + out_stride * 7 + i, v123); + vst1q_s16(out + out_stride * 8 + i, v124); + vst1q_s16(out + out_stride * 9 + i, v125); + vst1q_s16(out + out_stride * 10 + i, v126); + vst1q_s16(out + out_stride * 11 + i, v127); + vst1q_s16(out + out_stride * 12 + i, v128); + vst1q_s16(out + out_stride * 13 + i, v129); + vst1q_s16(out + out_stride * 14 + i, v130); + vst1q_s16(out + out_stride * 15 + i, v131); + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct256-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct256-inl.h new file mode 100644 index 0000000000..a823440af2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct256-inl.h @@ -0,0 +1,4811 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* This file is automatically generated. Do not modify it directly. */ +#if HWY_TARGET != HWY_NEON +#error "only include this file from fast_dct-inl.h" +#endif + +constexpr size_t FastIDCTIntegerBits(FastDCTTag<256>) { return 3; } + +void FastIDCT(FastDCTTag<256>, const int16_t* in, size_t in_stride, + int16_t* out, size_t out_stride, size_t count) { + JXL_ASSERT(count % 8 == 0); + for (size_t i = 0; i < count; i += 8) { + int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i); + int16x8_t v1 = vld1q_s16(in + in_stride * 128 + i); + int16x8_t v2 = vaddq_s16(v0, v1); + int16x8_t v3 = vld1q_s16(in + in_stride * 64 + i); + int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573); + int16x8_t v4 = vaddq_s16(v4_tmp, v3); + int16x8_t v5 = vld1q_s16(in + in_stride * 192 + i); + int16x8_t v6 = vaddq_s16(v5, v3); + int16x8_t v7 = vaddq_s16(v4, v6); + int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734); + int16x8_t v9 = vaddq_s16(v2, v8); + int16x8_t v10 = vld1q_s16(in + in_stride * 32 + i); + int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573); + int16x8_t v11 = vaddq_s16(v11_tmp, v10); + int16x8_t v12 = vld1q_s16(in + in_stride * 160 + i); + int16x8_t v13 = vld1q_s16(in + in_stride * 96 + i); + int16x8_t v14 = vaddq_s16(v12, v13); + int16x8_t v15 = vaddq_s16(v11, v14); + int16x8_t v16 = vaddq_s16(v13, v10); + int16x8_t v17_tmp = vqrdmulhq_n_s16(v16, 13573); + int16x8_t v17 = vaddq_s16(v17_tmp, v16); + int16x8_t v18 = vld1q_s16(in + in_stride * 224 + i); + int16x8_t v19 = vaddq_s16(v18, v12); + int16x8_t v20 = vaddq_s16(v19, v16); + int16x8_t v21 = vaddq_s16(v17, v20); + int16x8_t v22 = vqrdmulhq_n_s16(v21, 17734); + int16x8_t v23 = vaddq_s16(v15, v22); + int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705); + int16x8_t v25 = vaddq_s16(v9, v24); + int16x8_t v26 = vld1q_s16(in + in_stride * 16 + i); + int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573); + int16x8_t v27 = vaddq_s16(v27_tmp, v26); + int16x8_t v28 = vld1q_s16(in + in_stride * 144 + i); + int16x8_t v29 = vld1q_s16(in + in_stride * 112 + i); + int16x8_t v30 = vaddq_s16(v28, v29); + int16x8_t v31 = vaddq_s16(v27, v30); + int16x8_t v32 = vld1q_s16(in + in_stride * 80 + i); + int16x8_t v33 = vld1q_s16(in + in_stride * 48 + i); + int16x8_t v34 = vaddq_s16(v32, v33); + int16x8_t v35_tmp = vqrdmulhq_n_s16(v34, 13573); + int16x8_t v35 = vaddq_s16(v35_tmp, v34); + int16x8_t v36 = vld1q_s16(in + in_stride * 208 + i); + int16x8_t v37 = vld1q_s16(in + in_stride * 176 + i); + int16x8_t v38 = vaddq_s16(v36, v37); + int16x8_t v39 = vaddq_s16(v38, v34); + int16x8_t v40 = vaddq_s16(v35, v39); + int16x8_t v41 = vqrdmulhq_n_s16(v40, 17734); + int16x8_t v42 = vaddq_s16(v31, v41); + int16x8_t v43 = vaddq_s16(v33, v26); + int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573); + int16x8_t v44 = vaddq_s16(v44_tmp, v43); + int16x8_t v45 = vaddq_s16(v37, v28); + int16x8_t v46 = vaddq_s16(v29, v32); + int16x8_t v47 = vaddq_s16(v45, v46); + int16x8_t v48 = vaddq_s16(v44, v47); + int16x8_t v49 = vaddq_s16(v46, v43); + int16x8_t v50_tmp = vqrdmulhq_n_s16(v49, 13573); + int16x8_t v50 = vaddq_s16(v50_tmp, v49); + int16x8_t v51 = vld1q_s16(in + in_stride * 240 + i); + int16x8_t v52 = vaddq_s16(v51, v36); + int16x8_t v53 = vaddq_s16(v52, v45); + int16x8_t v54 = vaddq_s16(v53, v49); + int16x8_t v55 = vaddq_s16(v50, v54); + int16x8_t v56 = vqrdmulhq_n_s16(v55, 17734); + int16x8_t v57 = vaddq_s16(v48, v56); + int16x8_t v58 = vqrdmulhq_n_s16(v57, 16705); + int16x8_t v59 = vaddq_s16(v42, v58); + int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463); + int16x8_t v61 = vaddq_s16(v25, v60); + int16x8_t v62 = vld1q_s16(in + in_stride * 8 + i); + int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573); + int16x8_t v63 = vaddq_s16(v63_tmp, v62); + int16x8_t v64 = vld1q_s16(in + in_stride * 136 + i); + int16x8_t v65 = vld1q_s16(in + in_stride * 120 + i); + int16x8_t v66 = vaddq_s16(v64, v65); + int16x8_t v67 = vaddq_s16(v63, v66); + int16x8_t v68 = vld1q_s16(in + in_stride * 72 + i); + int16x8_t v69 = vld1q_s16(in + in_stride * 56 + i); + int16x8_t v70 = vaddq_s16(v68, v69); + int16x8_t v71_tmp = vqrdmulhq_n_s16(v70, 13573); + int16x8_t v71 = vaddq_s16(v71_tmp, v70); + int16x8_t v72 = vld1q_s16(in + in_stride * 200 + i); + int16x8_t v73 = vld1q_s16(in + in_stride * 184 + i); + int16x8_t v74 = vaddq_s16(v72, v73); + int16x8_t v75 = vaddq_s16(v74, v70); + int16x8_t v76 = vaddq_s16(v71, v75); + int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734); + int16x8_t v78 = vaddq_s16(v67, v77); + int16x8_t v79 = vld1q_s16(in + in_stride * 40 + i); + int16x8_t v80 = vld1q_s16(in + in_stride * 24 + i); + int16x8_t v81 = vaddq_s16(v79, v80); + int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573); + int16x8_t v82 = vaddq_s16(v82_tmp, v81); + int16x8_t v83 = vld1q_s16(in + in_stride * 168 + i); + int16x8_t v84 = vld1q_s16(in + in_stride * 152 + i); + int16x8_t v85 = vaddq_s16(v83, v84); + int16x8_t v86 = vld1q_s16(in + in_stride * 104 + i); + int16x8_t v87 = vld1q_s16(in + in_stride * 88 + i); + int16x8_t v88 = vaddq_s16(v86, v87); + int16x8_t v89 = vaddq_s16(v85, v88); + int16x8_t v90 = vaddq_s16(v82, v89); + int16x8_t v91 = vaddq_s16(v88, v81); + int16x8_t v92_tmp = vqrdmulhq_n_s16(v91, 13573); + int16x8_t v92 = vaddq_s16(v92_tmp, v91); + int16x8_t v93 = vld1q_s16(in + in_stride * 232 + i); + int16x8_t v94 = vld1q_s16(in + in_stride * 216 + i); + int16x8_t v95 = vaddq_s16(v93, v94); + int16x8_t v96 = vaddq_s16(v95, v85); + int16x8_t v97 = vaddq_s16(v96, v91); + int16x8_t v98 = vaddq_s16(v92, v97); + int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734); + int16x8_t v100 = vaddq_s16(v90, v99); + int16x8_t v101 = vqrdmulhq_n_s16(v100, 16705); + int16x8_t v102 = vaddq_s16(v78, v101); + int16x8_t v103 = vaddq_s16(v80, v62); + int16x8_t v104_tmp = vqrdmulhq_n_s16(v103, 13573); + int16x8_t v104 = vaddq_s16(v104_tmp, v103); + int16x8_t v105 = vaddq_s16(v84, v64); + int16x8_t v106 = vaddq_s16(v65, v86); + int16x8_t v107 = vaddq_s16(v105, v106); + int16x8_t v108 = vaddq_s16(v104, v107); + int16x8_t v109 = vaddq_s16(v87, v68); + int16x8_t v110 = vaddq_s16(v69, v79); + int16x8_t v111 = vaddq_s16(v109, v110); + int16x8_t v112_tmp = vqrdmulhq_n_s16(v111, 13573); + int16x8_t v112 = vaddq_s16(v112_tmp, v111); + int16x8_t v113 = vaddq_s16(v94, v72); + int16x8_t v114 = vaddq_s16(v73, v83); + int16x8_t v115 = vaddq_s16(v113, v114); + int16x8_t v116 = vaddq_s16(v115, v111); + int16x8_t v117 = vaddq_s16(v112, v116); + int16x8_t v118 = vqrdmulhq_n_s16(v117, 17734); + int16x8_t v119 = vaddq_s16(v108, v118); + int16x8_t v120 = vaddq_s16(v110, v103); + int16x8_t v121_tmp = vqrdmulhq_n_s16(v120, 13573); + int16x8_t v121 = vaddq_s16(v121_tmp, v120); + int16x8_t v122 = vaddq_s16(v114, v105); + int16x8_t v123 = vaddq_s16(v106, v109); + int16x8_t v124 = vaddq_s16(v122, v123); + int16x8_t v125 = vaddq_s16(v121, v124); + int16x8_t v126 = vaddq_s16(v123, v120); + int16x8_t v127_tmp = vqrdmulhq_n_s16(v126, 13573); + int16x8_t v127 = vaddq_s16(v127_tmp, v126); + int16x8_t v128 = vld1q_s16(in + in_stride * 248 + i); + int16x8_t v129 = vaddq_s16(v128, v93); + int16x8_t v130 = vaddq_s16(v129, v113); + int16x8_t v131 = vaddq_s16(v130, v122); + int16x8_t v132 = vaddq_s16(v131, v126); + int16x8_t v133 = vaddq_s16(v127, v132); + int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734); + int16x8_t v135 = vaddq_s16(v125, v134); + int16x8_t v136 = vqrdmulhq_n_s16(v135, 16705); + int16x8_t v137 = vaddq_s16(v119, v136); + int16x8_t v138 = vqrdmulhq_n_s16(v137, 16463); + int16x8_t v139 = vaddq_s16(v102, v138); + int16x8_t v140 = vqrdmulhq_n_s16(v139, 16404); + int16x8_t v141 = vaddq_s16(v61, v140); + int16x8_t v142 = vld1q_s16(in + in_stride * 4 + i); + int16x8_t v143_tmp = vqrdmulhq_n_s16(v142, 13573); + int16x8_t v143 = vaddq_s16(v143_tmp, v142); + int16x8_t v144 = vld1q_s16(in + in_stride * 132 + i); + int16x8_t v145 = vld1q_s16(in + in_stride * 124 + i); + int16x8_t v146 = vaddq_s16(v144, v145); + int16x8_t v147 = vaddq_s16(v143, v146); + int16x8_t v148 = vld1q_s16(in + in_stride * 68 + i); + int16x8_t v149 = vld1q_s16(in + in_stride * 60 + i); + int16x8_t v150 = vaddq_s16(v148, v149); + int16x8_t v151_tmp = vqrdmulhq_n_s16(v150, 13573); + int16x8_t v151 = vaddq_s16(v151_tmp, v150); + int16x8_t v152 = vld1q_s16(in + in_stride * 196 + i); + int16x8_t v153 = vld1q_s16(in + in_stride * 188 + i); + int16x8_t v154 = vaddq_s16(v152, v153); + int16x8_t v155 = vaddq_s16(v154, v150); + int16x8_t v156 = vaddq_s16(v151, v155); + int16x8_t v157 = vqrdmulhq_n_s16(v156, 17734); + int16x8_t v158 = vaddq_s16(v147, v157); + int16x8_t v159 = vld1q_s16(in + in_stride * 36 + i); + int16x8_t v160 = vld1q_s16(in + in_stride * 28 + i); + int16x8_t v161 = vaddq_s16(v159, v160); + int16x8_t v162_tmp = vqrdmulhq_n_s16(v161, 13573); + int16x8_t v162 = vaddq_s16(v162_tmp, v161); + int16x8_t v163 = vld1q_s16(in + in_stride * 164 + i); + int16x8_t v164 = vld1q_s16(in + in_stride * 156 + i); + int16x8_t v165 = vaddq_s16(v163, v164); + int16x8_t v166 = vld1q_s16(in + in_stride * 100 + i); + int16x8_t v167 = vld1q_s16(in + in_stride * 92 + i); + int16x8_t v168 = vaddq_s16(v166, v167); + int16x8_t v169 = vaddq_s16(v165, v168); + int16x8_t v170 = vaddq_s16(v162, v169); + int16x8_t v171 = vaddq_s16(v168, v161); + int16x8_t v172_tmp = vqrdmulhq_n_s16(v171, 13573); + int16x8_t v172 = vaddq_s16(v172_tmp, v171); + int16x8_t v173 = vld1q_s16(in + in_stride * 228 + i); + int16x8_t v174 = vld1q_s16(in + in_stride * 220 + i); + int16x8_t v175 = vaddq_s16(v173, v174); + int16x8_t v176 = vaddq_s16(v175, v165); + int16x8_t v177 = vaddq_s16(v176, v171); + int16x8_t v178 = vaddq_s16(v172, v177); + int16x8_t v179 = vqrdmulhq_n_s16(v178, 17734); + int16x8_t v180 = vaddq_s16(v170, v179); + int16x8_t v181 = vqrdmulhq_n_s16(v180, 16705); + int16x8_t v182 = vaddq_s16(v158, v181); + int16x8_t v183 = vld1q_s16(in + in_stride * 20 + i); + int16x8_t v184 = vld1q_s16(in + in_stride * 12 + i); + int16x8_t v185 = vaddq_s16(v183, v184); + int16x8_t v186_tmp = vqrdmulhq_n_s16(v185, 13573); + int16x8_t v186 = vaddq_s16(v186_tmp, v185); + int16x8_t v187 = vld1q_s16(in + in_stride * 148 + i); + int16x8_t v188 = vld1q_s16(in + in_stride * 140 + i); + int16x8_t v189 = vaddq_s16(v187, v188); + int16x8_t v190 = vld1q_s16(in + in_stride * 116 + i); + int16x8_t v191 = vld1q_s16(in + in_stride * 108 + i); + int16x8_t v192 = vaddq_s16(v190, v191); + int16x8_t v193 = vaddq_s16(v189, v192); + int16x8_t v194 = vaddq_s16(v186, v193); + int16x8_t v195 = vld1q_s16(in + in_stride * 84 + i); + int16x8_t v196 = vld1q_s16(in + in_stride * 76 + i); + int16x8_t v197 = vaddq_s16(v195, v196); + int16x8_t v198 = vld1q_s16(in + in_stride * 52 + i); + int16x8_t v199 = vld1q_s16(in + in_stride * 44 + i); + int16x8_t v200 = vaddq_s16(v198, v199); + int16x8_t v201 = vaddq_s16(v197, v200); + int16x8_t v202_tmp = vqrdmulhq_n_s16(v201, 13573); + int16x8_t v202 = vaddq_s16(v202_tmp, v201); + int16x8_t v203 = vld1q_s16(in + in_stride * 212 + i); + int16x8_t v204 = vld1q_s16(in + in_stride * 204 + i); + int16x8_t v205 = vaddq_s16(v203, v204); + int16x8_t v206 = vld1q_s16(in + in_stride * 180 + i); + int16x8_t v207 = vld1q_s16(in + in_stride * 172 + i); + int16x8_t v208 = vaddq_s16(v206, v207); + int16x8_t v209 = vaddq_s16(v205, v208); + int16x8_t v210 = vaddq_s16(v209, v201); + int16x8_t v211 = vaddq_s16(v202, v210); + int16x8_t v212 = vqrdmulhq_n_s16(v211, 17734); + int16x8_t v213 = vaddq_s16(v194, v212); + int16x8_t v214 = vaddq_s16(v200, v185); + int16x8_t v215_tmp = vqrdmulhq_n_s16(v214, 13573); + int16x8_t v215 = vaddq_s16(v215_tmp, v214); + int16x8_t v216 = vaddq_s16(v208, v189); + int16x8_t v217 = vaddq_s16(v192, v197); + int16x8_t v218 = vaddq_s16(v216, v217); + int16x8_t v219 = vaddq_s16(v215, v218); + int16x8_t v220 = vaddq_s16(v217, v214); + int16x8_t v221_tmp = vqrdmulhq_n_s16(v220, 13573); + int16x8_t v221 = vaddq_s16(v221_tmp, v220); + int16x8_t v222 = vld1q_s16(in + in_stride * 244 + i); + int16x8_t v223 = vld1q_s16(in + in_stride * 236 + i); + int16x8_t v224 = vaddq_s16(v222, v223); + int16x8_t v225 = vaddq_s16(v224, v205); + int16x8_t v226 = vaddq_s16(v225, v216); + int16x8_t v227 = vaddq_s16(v226, v220); + int16x8_t v228 = vaddq_s16(v221, v227); + int16x8_t v229 = vqrdmulhq_n_s16(v228, 17734); + int16x8_t v230 = vaddq_s16(v219, v229); + int16x8_t v231 = vqrdmulhq_n_s16(v230, 16705); + int16x8_t v232 = vaddq_s16(v213, v231); + int16x8_t v233 = vqrdmulhq_n_s16(v232, 16463); + int16x8_t v234 = vaddq_s16(v182, v233); + int16x8_t v235 = vaddq_s16(v184, v142); + int16x8_t v236_tmp = vqrdmulhq_n_s16(v235, 13573); + int16x8_t v236 = vaddq_s16(v236_tmp, v235); + int16x8_t v237 = vaddq_s16(v188, v144); + int16x8_t v238 = vaddq_s16(v145, v190); + int16x8_t v239 = vaddq_s16(v237, v238); + int16x8_t v240 = vaddq_s16(v236, v239); + int16x8_t v241 = vaddq_s16(v196, v148); + int16x8_t v242 = vaddq_s16(v149, v198); + int16x8_t v243 = vaddq_s16(v241, v242); + int16x8_t v244_tmp = vqrdmulhq_n_s16(v243, 13573); + int16x8_t v244 = vaddq_s16(v244_tmp, v243); + int16x8_t v245 = vaddq_s16(v204, v152); + int16x8_t v246 = vaddq_s16(v153, v206); + int16x8_t v247 = vaddq_s16(v245, v246); + int16x8_t v248 = vaddq_s16(v247, v243); + int16x8_t v249 = vaddq_s16(v244, v248); + int16x8_t v250 = vqrdmulhq_n_s16(v249, 17734); + int16x8_t v251 = vaddq_s16(v240, v250); + int16x8_t v252 = vaddq_s16(v199, v159); + int16x8_t v253 = vaddq_s16(v160, v183); + int16x8_t v254 = vaddq_s16(v252, v253); + int16x8_t v255_tmp = vqrdmulhq_n_s16(v254, 13573); + int16x8_t v255 = vaddq_s16(v255_tmp, v254); + int16x8_t v256 = vaddq_s16(v207, v163); + int16x8_t v257 = vaddq_s16(v164, v187); + int16x8_t v258 = vaddq_s16(v256, v257); + int16x8_t v259 = vaddq_s16(v191, v166); + int16x8_t v260 = vaddq_s16(v167, v195); + int16x8_t v261 = vaddq_s16(v259, v260); + int16x8_t v262 = vaddq_s16(v258, v261); + int16x8_t v263 = vaddq_s16(v255, v262); + int16x8_t v264 = vaddq_s16(v261, v254); + int16x8_t v265_tmp = vqrdmulhq_n_s16(v264, 13573); + int16x8_t v265 = vaddq_s16(v265_tmp, v264); + int16x8_t v266 = vaddq_s16(v223, v173); + int16x8_t v267 = vaddq_s16(v174, v203); + int16x8_t v268 = vaddq_s16(v266, v267); + int16x8_t v269 = vaddq_s16(v268, v258); + int16x8_t v270 = vaddq_s16(v269, v264); + int16x8_t v271 = vaddq_s16(v265, v270); + int16x8_t v272 = vqrdmulhq_n_s16(v271, 17734); + int16x8_t v273 = vaddq_s16(v263, v272); + int16x8_t v274 = vqrdmulhq_n_s16(v273, 16705); + int16x8_t v275 = vaddq_s16(v251, v274); + int16x8_t v276 = vaddq_s16(v253, v235); + int16x8_t v277_tmp = vqrdmulhq_n_s16(v276, 13573); + int16x8_t v277 = vaddq_s16(v277_tmp, v276); + int16x8_t v278 = vaddq_s16(v257, v237); + int16x8_t v279 = vaddq_s16(v238, v259); + int16x8_t v280 = vaddq_s16(v278, v279); + int16x8_t v281 = vaddq_s16(v277, v280); + int16x8_t v282 = vaddq_s16(v260, v241); + int16x8_t v283 = vaddq_s16(v242, v252); + int16x8_t v284 = vaddq_s16(v282, v283); + int16x8_t v285_tmp = vqrdmulhq_n_s16(v284, 13573); + int16x8_t v285 = vaddq_s16(v285_tmp, v284); + int16x8_t v286 = vaddq_s16(v267, v245); + int16x8_t v287 = vaddq_s16(v246, v256); + int16x8_t v288 = vaddq_s16(v286, v287); + int16x8_t v289 = vaddq_s16(v288, v284); + int16x8_t v290 = vaddq_s16(v285, v289); + int16x8_t v291 = vqrdmulhq_n_s16(v290, 17734); + int16x8_t v292 = vaddq_s16(v281, v291); + int16x8_t v293 = vaddq_s16(v283, v276); + int16x8_t v294_tmp = vqrdmulhq_n_s16(v293, 13573); + int16x8_t v294 = vaddq_s16(v294_tmp, v293); + int16x8_t v295 = vaddq_s16(v287, v278); + int16x8_t v296 = vaddq_s16(v279, v282); + int16x8_t v297 = vaddq_s16(v295, v296); + int16x8_t v298 = vaddq_s16(v294, v297); + int16x8_t v299 = vaddq_s16(v296, v293); + int16x8_t v300_tmp = vqrdmulhq_n_s16(v299, 13573); + int16x8_t v300 = vaddq_s16(v300_tmp, v299); + int16x8_t v301 = vld1q_s16(in + in_stride * 252 + i); + int16x8_t v302 = vaddq_s16(v301, v222); + int16x8_t v303 = vaddq_s16(v302, v266); + int16x8_t v304 = vaddq_s16(v303, v286); + int16x8_t v305 = vaddq_s16(v304, v295); + int16x8_t v306 = vaddq_s16(v305, v299); + int16x8_t v307 = vaddq_s16(v300, v306); + int16x8_t v308 = vqrdmulhq_n_s16(v307, 17734); + int16x8_t v309 = vaddq_s16(v298, v308); + int16x8_t v310 = vqrdmulhq_n_s16(v309, 16705); + int16x8_t v311 = vaddq_s16(v292, v310); + int16x8_t v312 = vqrdmulhq_n_s16(v311, 16463); + int16x8_t v313 = vaddq_s16(v275, v312); + int16x8_t v314 = vqrdmulhq_n_s16(v313, 16404); + int16x8_t v315 = vaddq_s16(v234, v314); + int16x8_t v316 = vqrdmulhq_n_s16(v315, 16389); + int16x8_t v317 = vaddq_s16(v141, v316); + int16x8_t v318 = vld1q_s16(in + in_stride * 2 + i); + int16x8_t v319_tmp = vqrdmulhq_n_s16(v318, 13573); + int16x8_t v319 = vaddq_s16(v319_tmp, v318); + int16x8_t v320 = vld1q_s16(in + in_stride * 130 + i); + int16x8_t v321 = vld1q_s16(in + in_stride * 126 + i); + int16x8_t v322 = vaddq_s16(v320, v321); + int16x8_t v323 = vaddq_s16(v319, v322); + int16x8_t v324 = vld1q_s16(in + in_stride * 66 + i); + int16x8_t v325 = vld1q_s16(in + in_stride * 62 + i); + int16x8_t v326 = vaddq_s16(v324, v325); + int16x8_t v327_tmp = vqrdmulhq_n_s16(v326, 13573); + int16x8_t v327 = vaddq_s16(v327_tmp, v326); + int16x8_t v328 = vld1q_s16(in + in_stride * 194 + i); + int16x8_t v329 = vld1q_s16(in + in_stride * 190 + i); + int16x8_t v330 = vaddq_s16(v328, v329); + int16x8_t v331 = vaddq_s16(v330, v326); + int16x8_t v332 = vaddq_s16(v327, v331); + int16x8_t v333 = vqrdmulhq_n_s16(v332, 17734); + int16x8_t v334 = vaddq_s16(v323, v333); + int16x8_t v335 = vld1q_s16(in + in_stride * 34 + i); + int16x8_t v336 = vld1q_s16(in + in_stride * 30 + i); + int16x8_t v337 = vaddq_s16(v335, v336); + int16x8_t v338_tmp = vqrdmulhq_n_s16(v337, 13573); + int16x8_t v338 = vaddq_s16(v338_tmp, v337); + int16x8_t v339 = vld1q_s16(in + in_stride * 162 + i); + int16x8_t v340 = vld1q_s16(in + in_stride * 158 + i); + int16x8_t v341 = vaddq_s16(v339, v340); + int16x8_t v342 = vld1q_s16(in + in_stride * 98 + i); + int16x8_t v343 = vld1q_s16(in + in_stride * 94 + i); + int16x8_t v344 = vaddq_s16(v342, v343); + int16x8_t v345 = vaddq_s16(v341, v344); + int16x8_t v346 = vaddq_s16(v338, v345); + int16x8_t v347 = vaddq_s16(v344, v337); + int16x8_t v348_tmp = vqrdmulhq_n_s16(v347, 13573); + int16x8_t v348 = vaddq_s16(v348_tmp, v347); + int16x8_t v349 = vld1q_s16(in + in_stride * 226 + i); + int16x8_t v350 = vld1q_s16(in + in_stride * 222 + i); + int16x8_t v351 = vaddq_s16(v349, v350); + int16x8_t v352 = vaddq_s16(v351, v341); + int16x8_t v353 = vaddq_s16(v352, v347); + int16x8_t v354 = vaddq_s16(v348, v353); + int16x8_t v355 = vqrdmulhq_n_s16(v354, 17734); + int16x8_t v356 = vaddq_s16(v346, v355); + int16x8_t v357 = vqrdmulhq_n_s16(v356, 16705); + int16x8_t v358 = vaddq_s16(v334, v357); + int16x8_t v359 = vld1q_s16(in + in_stride * 18 + i); + int16x8_t v360 = vld1q_s16(in + in_stride * 14 + i); + int16x8_t v361 = vaddq_s16(v359, v360); + int16x8_t v362_tmp = vqrdmulhq_n_s16(v361, 13573); + int16x8_t v362 = vaddq_s16(v362_tmp, v361); + int16x8_t v363 = vld1q_s16(in + in_stride * 146 + i); + int16x8_t v364 = vld1q_s16(in + in_stride * 142 + i); + int16x8_t v365 = vaddq_s16(v363, v364); + int16x8_t v366 = vld1q_s16(in + in_stride * 114 + i); + int16x8_t v367 = vld1q_s16(in + in_stride * 110 + i); + int16x8_t v368 = vaddq_s16(v366, v367); + int16x8_t v369 = vaddq_s16(v365, v368); + int16x8_t v370 = vaddq_s16(v362, v369); + int16x8_t v371 = vld1q_s16(in + in_stride * 82 + i); + int16x8_t v372 = vld1q_s16(in + in_stride * 78 + i); + int16x8_t v373 = vaddq_s16(v371, v372); + int16x8_t v374 = vld1q_s16(in + in_stride * 50 + i); + int16x8_t v375 = vld1q_s16(in + in_stride * 46 + i); + int16x8_t v376 = vaddq_s16(v374, v375); + int16x8_t v377 = vaddq_s16(v373, v376); + int16x8_t v378_tmp = vqrdmulhq_n_s16(v377, 13573); + int16x8_t v378 = vaddq_s16(v378_tmp, v377); + int16x8_t v379 = vld1q_s16(in + in_stride * 210 + i); + int16x8_t v380 = vld1q_s16(in + in_stride * 206 + i); + int16x8_t v381 = vaddq_s16(v379, v380); + int16x8_t v382 = vld1q_s16(in + in_stride * 178 + i); + int16x8_t v383 = vld1q_s16(in + in_stride * 174 + i); + int16x8_t v384 = vaddq_s16(v382, v383); + int16x8_t v385 = vaddq_s16(v381, v384); + int16x8_t v386 = vaddq_s16(v385, v377); + int16x8_t v387 = vaddq_s16(v378, v386); + int16x8_t v388 = vqrdmulhq_n_s16(v387, 17734); + int16x8_t v389 = vaddq_s16(v370, v388); + int16x8_t v390 = vaddq_s16(v376, v361); + int16x8_t v391_tmp = vqrdmulhq_n_s16(v390, 13573); + int16x8_t v391 = vaddq_s16(v391_tmp, v390); + int16x8_t v392 = vaddq_s16(v384, v365); + int16x8_t v393 = vaddq_s16(v368, v373); + int16x8_t v394 = vaddq_s16(v392, v393); + int16x8_t v395 = vaddq_s16(v391, v394); + int16x8_t v396 = vaddq_s16(v393, v390); + int16x8_t v397_tmp = vqrdmulhq_n_s16(v396, 13573); + int16x8_t v397 = vaddq_s16(v397_tmp, v396); + int16x8_t v398 = vld1q_s16(in + in_stride * 242 + i); + int16x8_t v399 = vld1q_s16(in + in_stride * 238 + i); + int16x8_t v400 = vaddq_s16(v398, v399); + int16x8_t v401 = vaddq_s16(v400, v381); + int16x8_t v402 = vaddq_s16(v401, v392); + int16x8_t v403 = vaddq_s16(v402, v396); + int16x8_t v404 = vaddq_s16(v397, v403); + int16x8_t v405 = vqrdmulhq_n_s16(v404, 17734); + int16x8_t v406 = vaddq_s16(v395, v405); + int16x8_t v407 = vqrdmulhq_n_s16(v406, 16705); + int16x8_t v408 = vaddq_s16(v389, v407); + int16x8_t v409 = vqrdmulhq_n_s16(v408, 16463); + int16x8_t v410 = vaddq_s16(v358, v409); + int16x8_t v411 = vld1q_s16(in + in_stride * 10 + i); + int16x8_t v412 = vld1q_s16(in + in_stride * 6 + i); + int16x8_t v413 = vaddq_s16(v411, v412); + int16x8_t v414_tmp = vqrdmulhq_n_s16(v413, 13573); + int16x8_t v414 = vaddq_s16(v414_tmp, v413); + int16x8_t v415 = vld1q_s16(in + in_stride * 138 + i); + int16x8_t v416 = vld1q_s16(in + in_stride * 134 + i); + int16x8_t v417 = vaddq_s16(v415, v416); + int16x8_t v418 = vld1q_s16(in + in_stride * 122 + i); + int16x8_t v419 = vld1q_s16(in + in_stride * 118 + i); + int16x8_t v420 = vaddq_s16(v418, v419); + int16x8_t v421 = vaddq_s16(v417, v420); + int16x8_t v422 = vaddq_s16(v414, v421); + int16x8_t v423 = vld1q_s16(in + in_stride * 74 + i); + int16x8_t v424 = vld1q_s16(in + in_stride * 70 + i); + int16x8_t v425 = vaddq_s16(v423, v424); + int16x8_t v426 = vld1q_s16(in + in_stride * 58 + i); + int16x8_t v427 = vld1q_s16(in + in_stride * 54 + i); + int16x8_t v428 = vaddq_s16(v426, v427); + int16x8_t v429 = vaddq_s16(v425, v428); + int16x8_t v430_tmp = vqrdmulhq_n_s16(v429, 13573); + int16x8_t v430 = vaddq_s16(v430_tmp, v429); + int16x8_t v431 = vld1q_s16(in + in_stride * 202 + i); + int16x8_t v432 = vld1q_s16(in + in_stride * 198 + i); + int16x8_t v433 = vaddq_s16(v431, v432); + int16x8_t v434 = vld1q_s16(in + in_stride * 186 + i); + int16x8_t v435 = vld1q_s16(in + in_stride * 182 + i); + int16x8_t v436 = vaddq_s16(v434, v435); + int16x8_t v437 = vaddq_s16(v433, v436); + int16x8_t v438 = vaddq_s16(v437, v429); + int16x8_t v439 = vaddq_s16(v430, v438); + int16x8_t v440 = vqrdmulhq_n_s16(v439, 17734); + int16x8_t v441 = vaddq_s16(v422, v440); + int16x8_t v442 = vld1q_s16(in + in_stride * 42 + i); + int16x8_t v443 = vld1q_s16(in + in_stride * 38 + i); + int16x8_t v444 = vaddq_s16(v442, v443); + int16x8_t v445 = vld1q_s16(in + in_stride * 26 + i); + int16x8_t v446 = vld1q_s16(in + in_stride * 22 + i); + int16x8_t v447 = vaddq_s16(v445, v446); + int16x8_t v448 = vaddq_s16(v444, v447); + int16x8_t v449_tmp = vqrdmulhq_n_s16(v448, 13573); + int16x8_t v449 = vaddq_s16(v449_tmp, v448); + int16x8_t v450 = vld1q_s16(in + in_stride * 170 + i); + int16x8_t v451 = vld1q_s16(in + in_stride * 166 + i); + int16x8_t v452 = vaddq_s16(v450, v451); + int16x8_t v453 = vld1q_s16(in + in_stride * 154 + i); + int16x8_t v454 = vld1q_s16(in + in_stride * 150 + i); + int16x8_t v455 = vaddq_s16(v453, v454); + int16x8_t v456 = vaddq_s16(v452, v455); + int16x8_t v457 = vld1q_s16(in + in_stride * 106 + i); + int16x8_t v458 = vld1q_s16(in + in_stride * 102 + i); + int16x8_t v459 = vaddq_s16(v457, v458); + int16x8_t v460 = vld1q_s16(in + in_stride * 90 + i); + int16x8_t v461 = vld1q_s16(in + in_stride * 86 + i); + int16x8_t v462 = vaddq_s16(v460, v461); + int16x8_t v463 = vaddq_s16(v459, v462); + int16x8_t v464 = vaddq_s16(v456, v463); + int16x8_t v465 = vaddq_s16(v449, v464); + int16x8_t v466 = vaddq_s16(v463, v448); + int16x8_t v467_tmp = vqrdmulhq_n_s16(v466, 13573); + int16x8_t v467 = vaddq_s16(v467_tmp, v466); + int16x8_t v468 = vld1q_s16(in + in_stride * 234 + i); + int16x8_t v469 = vld1q_s16(in + in_stride * 230 + i); + int16x8_t v470 = vaddq_s16(v468, v469); + int16x8_t v471 = vld1q_s16(in + in_stride * 218 + i); + int16x8_t v472 = vld1q_s16(in + in_stride * 214 + i); + int16x8_t v473 = vaddq_s16(v471, v472); + int16x8_t v474 = vaddq_s16(v470, v473); + int16x8_t v475 = vaddq_s16(v474, v456); + int16x8_t v476 = vaddq_s16(v475, v466); + int16x8_t v477 = vaddq_s16(v467, v476); + int16x8_t v478 = vqrdmulhq_n_s16(v477, 17734); + int16x8_t v479 = vaddq_s16(v465, v478); + int16x8_t v480 = vqrdmulhq_n_s16(v479, 16705); + int16x8_t v481 = vaddq_s16(v441, v480); + int16x8_t v482 = vaddq_s16(v447, v413); + int16x8_t v483_tmp = vqrdmulhq_n_s16(v482, 13573); + int16x8_t v483 = vaddq_s16(v483_tmp, v482); + int16x8_t v484 = vaddq_s16(v455, v417); + int16x8_t v485 = vaddq_s16(v420, v459); + int16x8_t v486 = vaddq_s16(v484, v485); + int16x8_t v487 = vaddq_s16(v483, v486); + int16x8_t v488 = vaddq_s16(v462, v425); + int16x8_t v489 = vaddq_s16(v428, v444); + int16x8_t v490 = vaddq_s16(v488, v489); + int16x8_t v491_tmp = vqrdmulhq_n_s16(v490, 13573); + int16x8_t v491 = vaddq_s16(v491_tmp, v490); + int16x8_t v492 = vaddq_s16(v473, v433); + int16x8_t v493 = vaddq_s16(v436, v452); + int16x8_t v494 = vaddq_s16(v492, v493); + int16x8_t v495 = vaddq_s16(v494, v490); + int16x8_t v496 = vaddq_s16(v491, v495); + int16x8_t v497 = vqrdmulhq_n_s16(v496, 17734); + int16x8_t v498 = vaddq_s16(v487, v497); + int16x8_t v499 = vaddq_s16(v489, v482); + int16x8_t v500_tmp = vqrdmulhq_n_s16(v499, 13573); + int16x8_t v500 = vaddq_s16(v500_tmp, v499); + int16x8_t v501 = vaddq_s16(v493, v484); + int16x8_t v502 = vaddq_s16(v485, v488); + int16x8_t v503 = vaddq_s16(v501, v502); + int16x8_t v504 = vaddq_s16(v500, v503); + int16x8_t v505 = vaddq_s16(v502, v499); + int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 13573); + int16x8_t v506 = vaddq_s16(v506_tmp, v505); + int16x8_t v507 = vld1q_s16(in + in_stride * 250 + i); + int16x8_t v508 = vld1q_s16(in + in_stride * 246 + i); + int16x8_t v509 = vaddq_s16(v507, v508); + int16x8_t v510 = vaddq_s16(v509, v470); + int16x8_t v511 = vaddq_s16(v510, v492); + int16x8_t v512 = vaddq_s16(v511, v501); + int16x8_t v513 = vaddq_s16(v512, v505); + int16x8_t v514 = vaddq_s16(v506, v513); + int16x8_t v515 = vqrdmulhq_n_s16(v514, 17734); + int16x8_t v516 = vaddq_s16(v504, v515); + int16x8_t v517 = vqrdmulhq_n_s16(v516, 16705); + int16x8_t v518 = vaddq_s16(v498, v517); + int16x8_t v519 = vqrdmulhq_n_s16(v518, 16463); + int16x8_t v520 = vaddq_s16(v481, v519); + int16x8_t v521 = vqrdmulhq_n_s16(v520, 16404); + int16x8_t v522 = vaddq_s16(v410, v521); + int16x8_t v523 = vaddq_s16(v412, v318); + int16x8_t v524_tmp = vqrdmulhq_n_s16(v523, 13573); + int16x8_t v524 = vaddq_s16(v524_tmp, v523); + int16x8_t v525 = vaddq_s16(v416, v320); + int16x8_t v526 = vaddq_s16(v321, v418); + int16x8_t v527 = vaddq_s16(v525, v526); + int16x8_t v528 = vaddq_s16(v524, v527); + int16x8_t v529 = vaddq_s16(v424, v324); + int16x8_t v530 = vaddq_s16(v325, v426); + int16x8_t v531 = vaddq_s16(v529, v530); + int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 13573); + int16x8_t v532 = vaddq_s16(v532_tmp, v531); + int16x8_t v533 = vaddq_s16(v432, v328); + int16x8_t v534 = vaddq_s16(v329, v434); + int16x8_t v535 = vaddq_s16(v533, v534); + int16x8_t v536 = vaddq_s16(v535, v531); + int16x8_t v537 = vaddq_s16(v532, v536); + int16x8_t v538 = vqrdmulhq_n_s16(v537, 17734); + int16x8_t v539 = vaddq_s16(v528, v538); + int16x8_t v540 = vaddq_s16(v443, v335); + int16x8_t v541 = vaddq_s16(v336, v445); + int16x8_t v542 = vaddq_s16(v540, v541); + int16x8_t v543_tmp = vqrdmulhq_n_s16(v542, 13573); + int16x8_t v543 = vaddq_s16(v543_tmp, v542); + int16x8_t v544 = vaddq_s16(v451, v339); + int16x8_t v545 = vaddq_s16(v340, v453); + int16x8_t v546 = vaddq_s16(v544, v545); + int16x8_t v547 = vaddq_s16(v458, v342); + int16x8_t v548 = vaddq_s16(v343, v460); + int16x8_t v549 = vaddq_s16(v547, v548); + int16x8_t v550 = vaddq_s16(v546, v549); + int16x8_t v551 = vaddq_s16(v543, v550); + int16x8_t v552 = vaddq_s16(v549, v542); + int16x8_t v553_tmp = vqrdmulhq_n_s16(v552, 13573); + int16x8_t v553 = vaddq_s16(v553_tmp, v552); + int16x8_t v554 = vaddq_s16(v469, v349); + int16x8_t v555 = vaddq_s16(v350, v471); + int16x8_t v556 = vaddq_s16(v554, v555); + int16x8_t v557 = vaddq_s16(v556, v546); + int16x8_t v558 = vaddq_s16(v557, v552); + int16x8_t v559 = vaddq_s16(v553, v558); + int16x8_t v560 = vqrdmulhq_n_s16(v559, 17734); + int16x8_t v561 = vaddq_s16(v551, v560); + int16x8_t v562 = vqrdmulhq_n_s16(v561, 16705); + int16x8_t v563 = vaddq_s16(v539, v562); + int16x8_t v564 = vaddq_s16(v446, v359); + int16x8_t v565 = vaddq_s16(v360, v411); + int16x8_t v566 = vaddq_s16(v564, v565); + int16x8_t v567_tmp = vqrdmulhq_n_s16(v566, 13573); + int16x8_t v567 = vaddq_s16(v567_tmp, v566); + int16x8_t v568 = vaddq_s16(v454, v363); + int16x8_t v569 = vaddq_s16(v364, v415); + int16x8_t v570 = vaddq_s16(v568, v569); + int16x8_t v571 = vaddq_s16(v419, v366); + int16x8_t v572 = vaddq_s16(v367, v457); + int16x8_t v573 = vaddq_s16(v571, v572); + int16x8_t v574 = vaddq_s16(v570, v573); + int16x8_t v575 = vaddq_s16(v567, v574); + int16x8_t v576 = vaddq_s16(v461, v371); + int16x8_t v577 = vaddq_s16(v372, v423); + int16x8_t v578 = vaddq_s16(v576, v577); + int16x8_t v579 = vaddq_s16(v427, v374); + int16x8_t v580 = vaddq_s16(v375, v442); + int16x8_t v581 = vaddq_s16(v579, v580); + int16x8_t v582 = vaddq_s16(v578, v581); + int16x8_t v583_tmp = vqrdmulhq_n_s16(v582, 13573); + int16x8_t v583 = vaddq_s16(v583_tmp, v582); + int16x8_t v584 = vaddq_s16(v472, v379); + int16x8_t v585 = vaddq_s16(v380, v431); + int16x8_t v586 = vaddq_s16(v584, v585); + int16x8_t v587 = vaddq_s16(v435, v382); + int16x8_t v588 = vaddq_s16(v383, v450); + int16x8_t v589 = vaddq_s16(v587, v588); + int16x8_t v590 = vaddq_s16(v586, v589); + int16x8_t v591 = vaddq_s16(v590, v582); + int16x8_t v592 = vaddq_s16(v583, v591); + int16x8_t v593 = vqrdmulhq_n_s16(v592, 17734); + int16x8_t v594 = vaddq_s16(v575, v593); + int16x8_t v595 = vaddq_s16(v581, v566); + int16x8_t v596_tmp = vqrdmulhq_n_s16(v595, 13573); + int16x8_t v596 = vaddq_s16(v596_tmp, v595); + int16x8_t v597 = vaddq_s16(v589, v570); + int16x8_t v598 = vaddq_s16(v573, v578); + int16x8_t v599 = vaddq_s16(v597, v598); + int16x8_t v600 = vaddq_s16(v596, v599); + int16x8_t v601 = vaddq_s16(v598, v595); + int16x8_t v602_tmp = vqrdmulhq_n_s16(v601, 13573); + int16x8_t v602 = vaddq_s16(v602_tmp, v601); + int16x8_t v603 = vaddq_s16(v508, v398); + int16x8_t v604 = vaddq_s16(v399, v468); + int16x8_t v605 = vaddq_s16(v603, v604); + int16x8_t v606 = vaddq_s16(v605, v586); + int16x8_t v607 = vaddq_s16(v606, v597); + int16x8_t v608 = vaddq_s16(v607, v601); + int16x8_t v609 = vaddq_s16(v602, v608); + int16x8_t v610 = vqrdmulhq_n_s16(v609, 17734); + int16x8_t v611 = vaddq_s16(v600, v610); + int16x8_t v612 = vqrdmulhq_n_s16(v611, 16705); + int16x8_t v613 = vaddq_s16(v594, v612); + int16x8_t v614 = vqrdmulhq_n_s16(v613, 16463); + int16x8_t v615 = vaddq_s16(v563, v614); + int16x8_t v616 = vaddq_s16(v565, v523); + int16x8_t v617_tmp = vqrdmulhq_n_s16(v616, 13573); + int16x8_t v617 = vaddq_s16(v617_tmp, v616); + int16x8_t v618 = vaddq_s16(v569, v525); + int16x8_t v619 = vaddq_s16(v526, v571); + int16x8_t v620 = vaddq_s16(v618, v619); + int16x8_t v621 = vaddq_s16(v617, v620); + int16x8_t v622 = vaddq_s16(v577, v529); + int16x8_t v623 = vaddq_s16(v530, v579); + int16x8_t v624 = vaddq_s16(v622, v623); + int16x8_t v625_tmp = vqrdmulhq_n_s16(v624, 13573); + int16x8_t v625 = vaddq_s16(v625_tmp, v624); + int16x8_t v626 = vaddq_s16(v585, v533); + int16x8_t v627 = vaddq_s16(v534, v587); + int16x8_t v628 = vaddq_s16(v626, v627); + int16x8_t v629 = vaddq_s16(v628, v624); + int16x8_t v630 = vaddq_s16(v625, v629); + int16x8_t v631 = vqrdmulhq_n_s16(v630, 17734); + int16x8_t v632 = vaddq_s16(v621, v631); + int16x8_t v633 = vaddq_s16(v580, v540); + int16x8_t v634 = vaddq_s16(v541, v564); + int16x8_t v635 = vaddq_s16(v633, v634); + int16x8_t v636_tmp = vqrdmulhq_n_s16(v635, 13573); + int16x8_t v636 = vaddq_s16(v636_tmp, v635); + int16x8_t v637 = vaddq_s16(v588, v544); + int16x8_t v638 = vaddq_s16(v545, v568); + int16x8_t v639 = vaddq_s16(v637, v638); + int16x8_t v640 = vaddq_s16(v572, v547); + int16x8_t v641 = vaddq_s16(v548, v576); + int16x8_t v642 = vaddq_s16(v640, v641); + int16x8_t v643 = vaddq_s16(v639, v642); + int16x8_t v644 = vaddq_s16(v636, v643); + int16x8_t v645 = vaddq_s16(v642, v635); + int16x8_t v646_tmp = vqrdmulhq_n_s16(v645, 13573); + int16x8_t v646 = vaddq_s16(v646_tmp, v645); + int16x8_t v647 = vaddq_s16(v604, v554); + int16x8_t v648 = vaddq_s16(v555, v584); + int16x8_t v649 = vaddq_s16(v647, v648); + int16x8_t v650 = vaddq_s16(v649, v639); + int16x8_t v651 = vaddq_s16(v650, v645); + int16x8_t v652 = vaddq_s16(v646, v651); + int16x8_t v653 = vqrdmulhq_n_s16(v652, 17734); + int16x8_t v654 = vaddq_s16(v644, v653); + int16x8_t v655 = vqrdmulhq_n_s16(v654, 16705); + int16x8_t v656 = vaddq_s16(v632, v655); + int16x8_t v657 = vaddq_s16(v634, v616); + int16x8_t v658_tmp = vqrdmulhq_n_s16(v657, 13573); + int16x8_t v658 = vaddq_s16(v658_tmp, v657); + int16x8_t v659 = vaddq_s16(v638, v618); + int16x8_t v660 = vaddq_s16(v619, v640); + int16x8_t v661 = vaddq_s16(v659, v660); + int16x8_t v662 = vaddq_s16(v658, v661); + int16x8_t v663 = vaddq_s16(v641, v622); + int16x8_t v664 = vaddq_s16(v623, v633); + int16x8_t v665 = vaddq_s16(v663, v664); + int16x8_t v666_tmp = vqrdmulhq_n_s16(v665, 13573); + int16x8_t v666 = vaddq_s16(v666_tmp, v665); + int16x8_t v667 = vaddq_s16(v648, v626); + int16x8_t v668 = vaddq_s16(v627, v637); + int16x8_t v669 = vaddq_s16(v667, v668); + int16x8_t v670 = vaddq_s16(v669, v665); + int16x8_t v671 = vaddq_s16(v666, v670); + int16x8_t v672 = vqrdmulhq_n_s16(v671, 17734); + int16x8_t v673 = vaddq_s16(v662, v672); + int16x8_t v674 = vaddq_s16(v664, v657); + int16x8_t v675_tmp = vqrdmulhq_n_s16(v674, 13573); + int16x8_t v675 = vaddq_s16(v675_tmp, v674); + int16x8_t v676 = vaddq_s16(v668, v659); + int16x8_t v677 = vaddq_s16(v660, v663); + int16x8_t v678 = vaddq_s16(v676, v677); + int16x8_t v679 = vaddq_s16(v675, v678); + int16x8_t v680 = vaddq_s16(v677, v674); + int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 13573); + int16x8_t v681 = vaddq_s16(v681_tmp, v680); + int16x8_t v682 = vld1q_s16(in + in_stride * 254 + i); + int16x8_t v683 = vaddq_s16(v682, v507); + int16x8_t v684 = vaddq_s16(v683, v603); + int16x8_t v685 = vaddq_s16(v684, v647); + int16x8_t v686 = vaddq_s16(v685, v667); + int16x8_t v687 = vaddq_s16(v686, v676); + int16x8_t v688 = vaddq_s16(v687, v680); + int16x8_t v689 = vaddq_s16(v681, v688); + int16x8_t v690 = vqrdmulhq_n_s16(v689, 17734); + int16x8_t v691 = vaddq_s16(v679, v690); + int16x8_t v692 = vqrdmulhq_n_s16(v691, 16705); + int16x8_t v693 = vaddq_s16(v673, v692); + int16x8_t v694 = vqrdmulhq_n_s16(v693, 16463); + int16x8_t v695 = vaddq_s16(v656, v694); + int16x8_t v696 = vqrdmulhq_n_s16(v695, 16404); + int16x8_t v697 = vaddq_s16(v615, v696); + int16x8_t v698 = vqrdmulhq_n_s16(v697, 16389); + int16x8_t v699 = vaddq_s16(v522, v698); + int16x8_t v700 = vqrdmulhq_n_s16(v699, 16385); + int16x8_t v701 = vaddq_s16(v317, v700); + int16x8_t v702 = vld1q_s16(in + in_stride * 1 + i); + int16x8_t v703_tmp = vqrdmulhq_n_s16(v702, 13573); + int16x8_t v703 = vaddq_s16(v703_tmp, v702); + int16x8_t v704 = vld1q_s16(in + in_stride * 129 + i); + int16x8_t v705 = vld1q_s16(in + in_stride * 127 + i); + int16x8_t v706 = vaddq_s16(v704, v705); + int16x8_t v707 = vaddq_s16(v703, v706); + int16x8_t v708 = vld1q_s16(in + in_stride * 65 + i); + int16x8_t v709 = vld1q_s16(in + in_stride * 63 + i); + int16x8_t v710 = vaddq_s16(v708, v709); + int16x8_t v711_tmp = vqrdmulhq_n_s16(v710, 13573); + int16x8_t v711 = vaddq_s16(v711_tmp, v710); + int16x8_t v712 = vld1q_s16(in + in_stride * 193 + i); + int16x8_t v713 = vld1q_s16(in + in_stride * 191 + i); + int16x8_t v714 = vaddq_s16(v712, v713); + int16x8_t v715 = vaddq_s16(v714, v710); + int16x8_t v716 = vaddq_s16(v711, v715); + int16x8_t v717 = vqrdmulhq_n_s16(v716, 17734); + int16x8_t v718 = vaddq_s16(v707, v717); + int16x8_t v719 = vld1q_s16(in + in_stride * 33 + i); + int16x8_t v720 = vld1q_s16(in + in_stride * 31 + i); + int16x8_t v721 = vaddq_s16(v719, v720); + int16x8_t v722_tmp = vqrdmulhq_n_s16(v721, 13573); + int16x8_t v722 = vaddq_s16(v722_tmp, v721); + int16x8_t v723 = vld1q_s16(in + in_stride * 161 + i); + int16x8_t v724 = vld1q_s16(in + in_stride * 159 + i); + int16x8_t v725 = vaddq_s16(v723, v724); + int16x8_t v726 = vld1q_s16(in + in_stride * 97 + i); + int16x8_t v727 = vld1q_s16(in + in_stride * 95 + i); + int16x8_t v728 = vaddq_s16(v726, v727); + int16x8_t v729 = vaddq_s16(v725, v728); + int16x8_t v730 = vaddq_s16(v722, v729); + int16x8_t v731 = vaddq_s16(v728, v721); + int16x8_t v732_tmp = vqrdmulhq_n_s16(v731, 13573); + int16x8_t v732 = vaddq_s16(v732_tmp, v731); + int16x8_t v733 = vld1q_s16(in + in_stride * 225 + i); + int16x8_t v734 = vld1q_s16(in + in_stride * 223 + i); + int16x8_t v735 = vaddq_s16(v733, v734); + int16x8_t v736 = vaddq_s16(v735, v725); + int16x8_t v737 = vaddq_s16(v736, v731); + int16x8_t v738 = vaddq_s16(v732, v737); + int16x8_t v739 = vqrdmulhq_n_s16(v738, 17734); + int16x8_t v740 = vaddq_s16(v730, v739); + int16x8_t v741 = vqrdmulhq_n_s16(v740, 16705); + int16x8_t v742 = vaddq_s16(v718, v741); + int16x8_t v743 = vld1q_s16(in + in_stride * 17 + i); + int16x8_t v744 = vld1q_s16(in + in_stride * 15 + i); + int16x8_t v745 = vaddq_s16(v743, v744); + int16x8_t v746_tmp = vqrdmulhq_n_s16(v745, 13573); + int16x8_t v746 = vaddq_s16(v746_tmp, v745); + int16x8_t v747 = vld1q_s16(in + in_stride * 145 + i); + int16x8_t v748 = vld1q_s16(in + in_stride * 143 + i); + int16x8_t v749 = vaddq_s16(v747, v748); + int16x8_t v750 = vld1q_s16(in + in_stride * 113 + i); + int16x8_t v751 = vld1q_s16(in + in_stride * 111 + i); + int16x8_t v752 = vaddq_s16(v750, v751); + int16x8_t v753 = vaddq_s16(v749, v752); + int16x8_t v754 = vaddq_s16(v746, v753); + int16x8_t v755 = vld1q_s16(in + in_stride * 81 + i); + int16x8_t v756 = vld1q_s16(in + in_stride * 79 + i); + int16x8_t v757 = vaddq_s16(v755, v756); + int16x8_t v758 = vld1q_s16(in + in_stride * 49 + i); + int16x8_t v759 = vld1q_s16(in + in_stride * 47 + i); + int16x8_t v760 = vaddq_s16(v758, v759); + int16x8_t v761 = vaddq_s16(v757, v760); + int16x8_t v762_tmp = vqrdmulhq_n_s16(v761, 13573); + int16x8_t v762 = vaddq_s16(v762_tmp, v761); + int16x8_t v763 = vld1q_s16(in + in_stride * 209 + i); + int16x8_t v764 = vld1q_s16(in + in_stride * 207 + i); + int16x8_t v765 = vaddq_s16(v763, v764); + int16x8_t v766 = vld1q_s16(in + in_stride * 177 + i); + int16x8_t v767 = vld1q_s16(in + in_stride * 175 + i); + int16x8_t v768 = vaddq_s16(v766, v767); + int16x8_t v769 = vaddq_s16(v765, v768); + int16x8_t v770 = vaddq_s16(v769, v761); + int16x8_t v771 = vaddq_s16(v762, v770); + int16x8_t v772 = vqrdmulhq_n_s16(v771, 17734); + int16x8_t v773 = vaddq_s16(v754, v772); + int16x8_t v774 = vaddq_s16(v760, v745); + int16x8_t v775_tmp = vqrdmulhq_n_s16(v774, 13573); + int16x8_t v775 = vaddq_s16(v775_tmp, v774); + int16x8_t v776 = vaddq_s16(v768, v749); + int16x8_t v777 = vaddq_s16(v752, v757); + int16x8_t v778 = vaddq_s16(v776, v777); + int16x8_t v779 = vaddq_s16(v775, v778); + int16x8_t v780 = vaddq_s16(v777, v774); + int16x8_t v781_tmp = vqrdmulhq_n_s16(v780, 13573); + int16x8_t v781 = vaddq_s16(v781_tmp, v780); + int16x8_t v782 = vld1q_s16(in + in_stride * 241 + i); + int16x8_t v783 = vld1q_s16(in + in_stride * 239 + i); + int16x8_t v784 = vaddq_s16(v782, v783); + int16x8_t v785 = vaddq_s16(v784, v765); + int16x8_t v786 = vaddq_s16(v785, v776); + int16x8_t v787 = vaddq_s16(v786, v780); + int16x8_t v788 = vaddq_s16(v781, v787); + int16x8_t v789 = vqrdmulhq_n_s16(v788, 17734); + int16x8_t v790 = vaddq_s16(v779, v789); + int16x8_t v791 = vqrdmulhq_n_s16(v790, 16705); + int16x8_t v792 = vaddq_s16(v773, v791); + int16x8_t v793 = vqrdmulhq_n_s16(v792, 16463); + int16x8_t v794 = vaddq_s16(v742, v793); + int16x8_t v795 = vld1q_s16(in + in_stride * 9 + i); + int16x8_t v796 = vld1q_s16(in + in_stride * 7 + i); + int16x8_t v797 = vaddq_s16(v795, v796); + int16x8_t v798_tmp = vqrdmulhq_n_s16(v797, 13573); + int16x8_t v798 = vaddq_s16(v798_tmp, v797); + int16x8_t v799 = vld1q_s16(in + in_stride * 137 + i); + int16x8_t v800 = vld1q_s16(in + in_stride * 135 + i); + int16x8_t v801 = vaddq_s16(v799, v800); + int16x8_t v802 = vld1q_s16(in + in_stride * 121 + i); + int16x8_t v803 = vld1q_s16(in + in_stride * 119 + i); + int16x8_t v804 = vaddq_s16(v802, v803); + int16x8_t v805 = vaddq_s16(v801, v804); + int16x8_t v806 = vaddq_s16(v798, v805); + int16x8_t v807 = vld1q_s16(in + in_stride * 73 + i); + int16x8_t v808 = vld1q_s16(in + in_stride * 71 + i); + int16x8_t v809 = vaddq_s16(v807, v808); + int16x8_t v810 = vld1q_s16(in + in_stride * 57 + i); + int16x8_t v811 = vld1q_s16(in + in_stride * 55 + i); + int16x8_t v812 = vaddq_s16(v810, v811); + int16x8_t v813 = vaddq_s16(v809, v812); + int16x8_t v814_tmp = vqrdmulhq_n_s16(v813, 13573); + int16x8_t v814 = vaddq_s16(v814_tmp, v813); + int16x8_t v815 = vld1q_s16(in + in_stride * 201 + i); + int16x8_t v816 = vld1q_s16(in + in_stride * 199 + i); + int16x8_t v817 = vaddq_s16(v815, v816); + int16x8_t v818 = vld1q_s16(in + in_stride * 185 + i); + int16x8_t v819 = vld1q_s16(in + in_stride * 183 + i); + int16x8_t v820 = vaddq_s16(v818, v819); + int16x8_t v821 = vaddq_s16(v817, v820); + int16x8_t v822 = vaddq_s16(v821, v813); + int16x8_t v823 = vaddq_s16(v814, v822); + int16x8_t v824 = vqrdmulhq_n_s16(v823, 17734); + int16x8_t v825 = vaddq_s16(v806, v824); + int16x8_t v826 = vld1q_s16(in + in_stride * 41 + i); + int16x8_t v827 = vld1q_s16(in + in_stride * 39 + i); + int16x8_t v828 = vaddq_s16(v826, v827); + int16x8_t v829 = vld1q_s16(in + in_stride * 25 + i); + int16x8_t v830 = vld1q_s16(in + in_stride * 23 + i); + int16x8_t v831 = vaddq_s16(v829, v830); + int16x8_t v832 = vaddq_s16(v828, v831); + int16x8_t v833_tmp = vqrdmulhq_n_s16(v832, 13573); + int16x8_t v833 = vaddq_s16(v833_tmp, v832); + int16x8_t v834 = vld1q_s16(in + in_stride * 169 + i); + int16x8_t v835 = vld1q_s16(in + in_stride * 167 + i); + int16x8_t v836 = vaddq_s16(v834, v835); + int16x8_t v837 = vld1q_s16(in + in_stride * 153 + i); + int16x8_t v838 = vld1q_s16(in + in_stride * 151 + i); + int16x8_t v839 = vaddq_s16(v837, v838); + int16x8_t v840 = vaddq_s16(v836, v839); + int16x8_t v841 = vld1q_s16(in + in_stride * 105 + i); + int16x8_t v842 = vld1q_s16(in + in_stride * 103 + i); + int16x8_t v843 = vaddq_s16(v841, v842); + int16x8_t v844 = vld1q_s16(in + in_stride * 89 + i); + int16x8_t v845 = vld1q_s16(in + in_stride * 87 + i); + int16x8_t v846 = vaddq_s16(v844, v845); + int16x8_t v847 = vaddq_s16(v843, v846); + int16x8_t v848 = vaddq_s16(v840, v847); + int16x8_t v849 = vaddq_s16(v833, v848); + int16x8_t v850 = vaddq_s16(v847, v832); + int16x8_t v851_tmp = vqrdmulhq_n_s16(v850, 13573); + int16x8_t v851 = vaddq_s16(v851_tmp, v850); + int16x8_t v852 = vld1q_s16(in + in_stride * 233 + i); + int16x8_t v853 = vld1q_s16(in + in_stride * 231 + i); + int16x8_t v854 = vaddq_s16(v852, v853); + int16x8_t v855 = vld1q_s16(in + in_stride * 217 + i); + int16x8_t v856 = vld1q_s16(in + in_stride * 215 + i); + int16x8_t v857 = vaddq_s16(v855, v856); + int16x8_t v858 = vaddq_s16(v854, v857); + int16x8_t v859 = vaddq_s16(v858, v840); + int16x8_t v860 = vaddq_s16(v859, v850); + int16x8_t v861 = vaddq_s16(v851, v860); + int16x8_t v862 = vqrdmulhq_n_s16(v861, 17734); + int16x8_t v863 = vaddq_s16(v849, v862); + int16x8_t v864 = vqrdmulhq_n_s16(v863, 16705); + int16x8_t v865 = vaddq_s16(v825, v864); + int16x8_t v866 = vaddq_s16(v831, v797); + int16x8_t v867_tmp = vqrdmulhq_n_s16(v866, 13573); + int16x8_t v867 = vaddq_s16(v867_tmp, v866); + int16x8_t v868 = vaddq_s16(v839, v801); + int16x8_t v869 = vaddq_s16(v804, v843); + int16x8_t v870 = vaddq_s16(v868, v869); + int16x8_t v871 = vaddq_s16(v867, v870); + int16x8_t v872 = vaddq_s16(v846, v809); + int16x8_t v873 = vaddq_s16(v812, v828); + int16x8_t v874 = vaddq_s16(v872, v873); + int16x8_t v875_tmp = vqrdmulhq_n_s16(v874, 13573); + int16x8_t v875 = vaddq_s16(v875_tmp, v874); + int16x8_t v876 = vaddq_s16(v857, v817); + int16x8_t v877 = vaddq_s16(v820, v836); + int16x8_t v878 = vaddq_s16(v876, v877); + int16x8_t v879 = vaddq_s16(v878, v874); + int16x8_t v880 = vaddq_s16(v875, v879); + int16x8_t v881 = vqrdmulhq_n_s16(v880, 17734); + int16x8_t v882 = vaddq_s16(v871, v881); + int16x8_t v883 = vaddq_s16(v873, v866); + int16x8_t v884_tmp = vqrdmulhq_n_s16(v883, 13573); + int16x8_t v884 = vaddq_s16(v884_tmp, v883); + int16x8_t v885 = vaddq_s16(v877, v868); + int16x8_t v886 = vaddq_s16(v869, v872); + int16x8_t v887 = vaddq_s16(v885, v886); + int16x8_t v888 = vaddq_s16(v884, v887); + int16x8_t v889 = vaddq_s16(v886, v883); + int16x8_t v890_tmp = vqrdmulhq_n_s16(v889, 13573); + int16x8_t v890 = vaddq_s16(v890_tmp, v889); + int16x8_t v891 = vld1q_s16(in + in_stride * 249 + i); + int16x8_t v892 = vld1q_s16(in + in_stride * 247 + i); + int16x8_t v893 = vaddq_s16(v891, v892); + int16x8_t v894 = vaddq_s16(v893, v854); + int16x8_t v895 = vaddq_s16(v894, v876); + int16x8_t v896 = vaddq_s16(v895, v885); + int16x8_t v897 = vaddq_s16(v896, v889); + int16x8_t v898 = vaddq_s16(v890, v897); + int16x8_t v899 = vqrdmulhq_n_s16(v898, 17734); + int16x8_t v900 = vaddq_s16(v888, v899); + int16x8_t v901 = vqrdmulhq_n_s16(v900, 16705); + int16x8_t v902 = vaddq_s16(v882, v901); + int16x8_t v903 = vqrdmulhq_n_s16(v902, 16463); + int16x8_t v904 = vaddq_s16(v865, v903); + int16x8_t v905 = vqrdmulhq_n_s16(v904, 16404); + int16x8_t v906 = vaddq_s16(v794, v905); + int16x8_t v907 = vld1q_s16(in + in_stride * 5 + i); + int16x8_t v908 = vld1q_s16(in + in_stride * 3 + i); + int16x8_t v909 = vaddq_s16(v907, v908); + int16x8_t v910_tmp = vqrdmulhq_n_s16(v909, 13573); + int16x8_t v910 = vaddq_s16(v910_tmp, v909); + int16x8_t v911 = vld1q_s16(in + in_stride * 133 + i); + int16x8_t v912 = vld1q_s16(in + in_stride * 131 + i); + int16x8_t v913 = vaddq_s16(v911, v912); + int16x8_t v914 = vld1q_s16(in + in_stride * 125 + i); + int16x8_t v915 = vld1q_s16(in + in_stride * 123 + i); + int16x8_t v916 = vaddq_s16(v914, v915); + int16x8_t v917 = vaddq_s16(v913, v916); + int16x8_t v918 = vaddq_s16(v910, v917); + int16x8_t v919 = vld1q_s16(in + in_stride * 69 + i); + int16x8_t v920 = vld1q_s16(in + in_stride * 67 + i); + int16x8_t v921 = vaddq_s16(v919, v920); + int16x8_t v922 = vld1q_s16(in + in_stride * 61 + i); + int16x8_t v923 = vld1q_s16(in + in_stride * 59 + i); + int16x8_t v924 = vaddq_s16(v922, v923); + int16x8_t v925 = vaddq_s16(v921, v924); + int16x8_t v926_tmp = vqrdmulhq_n_s16(v925, 13573); + int16x8_t v926 = vaddq_s16(v926_tmp, v925); + int16x8_t v927 = vld1q_s16(in + in_stride * 197 + i); + int16x8_t v928 = vld1q_s16(in + in_stride * 195 + i); + int16x8_t v929 = vaddq_s16(v927, v928); + int16x8_t v930 = vld1q_s16(in + in_stride * 189 + i); + int16x8_t v931 = vld1q_s16(in + in_stride * 187 + i); + int16x8_t v932 = vaddq_s16(v930, v931); + int16x8_t v933 = vaddq_s16(v929, v932); + int16x8_t v934 = vaddq_s16(v933, v925); + int16x8_t v935 = vaddq_s16(v926, v934); + int16x8_t v936 = vqrdmulhq_n_s16(v935, 17734); + int16x8_t v937 = vaddq_s16(v918, v936); + int16x8_t v938 = vld1q_s16(in + in_stride * 37 + i); + int16x8_t v939 = vld1q_s16(in + in_stride * 35 + i); + int16x8_t v940 = vaddq_s16(v938, v939); + int16x8_t v941 = vld1q_s16(in + in_stride * 29 + i); + int16x8_t v942 = vld1q_s16(in + in_stride * 27 + i); + int16x8_t v943 = vaddq_s16(v941, v942); + int16x8_t v944 = vaddq_s16(v940, v943); + int16x8_t v945_tmp = vqrdmulhq_n_s16(v944, 13573); + int16x8_t v945 = vaddq_s16(v945_tmp, v944); + int16x8_t v946 = vld1q_s16(in + in_stride * 165 + i); + int16x8_t v947 = vld1q_s16(in + in_stride * 163 + i); + int16x8_t v948 = vaddq_s16(v946, v947); + int16x8_t v949 = vld1q_s16(in + in_stride * 157 + i); + int16x8_t v950 = vld1q_s16(in + in_stride * 155 + i); + int16x8_t v951 = vaddq_s16(v949, v950); + int16x8_t v952 = vaddq_s16(v948, v951); + int16x8_t v953 = vld1q_s16(in + in_stride * 101 + i); + int16x8_t v954 = vld1q_s16(in + in_stride * 99 + i); + int16x8_t v955 = vaddq_s16(v953, v954); + int16x8_t v956 = vld1q_s16(in + in_stride * 93 + i); + int16x8_t v957 = vld1q_s16(in + in_stride * 91 + i); + int16x8_t v958 = vaddq_s16(v956, v957); + int16x8_t v959 = vaddq_s16(v955, v958); + int16x8_t v960 = vaddq_s16(v952, v959); + int16x8_t v961 = vaddq_s16(v945, v960); + int16x8_t v962 = vaddq_s16(v959, v944); + int16x8_t v963_tmp = vqrdmulhq_n_s16(v962, 13573); + int16x8_t v963 = vaddq_s16(v963_tmp, v962); + int16x8_t v964 = vld1q_s16(in + in_stride * 229 + i); + int16x8_t v965 = vld1q_s16(in + in_stride * 227 + i); + int16x8_t v966 = vaddq_s16(v964, v965); + int16x8_t v967 = vld1q_s16(in + in_stride * 221 + i); + int16x8_t v968 = vld1q_s16(in + in_stride * 219 + i); + int16x8_t v969 = vaddq_s16(v967, v968); + int16x8_t v970 = vaddq_s16(v966, v969); + int16x8_t v971 = vaddq_s16(v970, v952); + int16x8_t v972 = vaddq_s16(v971, v962); + int16x8_t v973 = vaddq_s16(v963, v972); + int16x8_t v974 = vqrdmulhq_n_s16(v973, 17734); + int16x8_t v975 = vaddq_s16(v961, v974); + int16x8_t v976 = vqrdmulhq_n_s16(v975, 16705); + int16x8_t v977 = vaddq_s16(v937, v976); + int16x8_t v978 = vld1q_s16(in + in_stride * 21 + i); + int16x8_t v979 = vld1q_s16(in + in_stride * 19 + i); + int16x8_t v980 = vaddq_s16(v978, v979); + int16x8_t v981 = vld1q_s16(in + in_stride * 13 + i); + int16x8_t v982 = vld1q_s16(in + in_stride * 11 + i); + int16x8_t v983 = vaddq_s16(v981, v982); + int16x8_t v984 = vaddq_s16(v980, v983); + int16x8_t v985_tmp = vqrdmulhq_n_s16(v984, 13573); + int16x8_t v985 = vaddq_s16(v985_tmp, v984); + int16x8_t v986 = vld1q_s16(in + in_stride * 149 + i); + int16x8_t v987 = vld1q_s16(in + in_stride * 147 + i); + int16x8_t v988 = vaddq_s16(v986, v987); + int16x8_t v989 = vld1q_s16(in + in_stride * 141 + i); + int16x8_t v990 = vld1q_s16(in + in_stride * 139 + i); + int16x8_t v991 = vaddq_s16(v989, v990); + int16x8_t v992 = vaddq_s16(v988, v991); + int16x8_t v993 = vld1q_s16(in + in_stride * 117 + i); + int16x8_t v994 = vld1q_s16(in + in_stride * 115 + i); + int16x8_t v995 = vaddq_s16(v993, v994); + int16x8_t v996 = vld1q_s16(in + in_stride * 109 + i); + int16x8_t v997 = vld1q_s16(in + in_stride * 107 + i); + int16x8_t v998 = vaddq_s16(v996, v997); + int16x8_t v999 = vaddq_s16(v995, v998); + int16x8_t v1000 = vaddq_s16(v992, v999); + int16x8_t v1001 = vaddq_s16(v985, v1000); + int16x8_t v1002 = vld1q_s16(in + in_stride * 85 + i); + int16x8_t v1003 = vld1q_s16(in + in_stride * 83 + i); + int16x8_t v1004 = vaddq_s16(v1002, v1003); + int16x8_t v1005 = vld1q_s16(in + in_stride * 77 + i); + int16x8_t v1006 = vld1q_s16(in + in_stride * 75 + i); + int16x8_t v1007 = vaddq_s16(v1005, v1006); + int16x8_t v1008 = vaddq_s16(v1004, v1007); + int16x8_t v1009 = vld1q_s16(in + in_stride * 53 + i); + int16x8_t v1010 = vld1q_s16(in + in_stride * 51 + i); + int16x8_t v1011 = vaddq_s16(v1009, v1010); + int16x8_t v1012 = vld1q_s16(in + in_stride * 45 + i); + int16x8_t v1013 = vld1q_s16(in + in_stride * 43 + i); + int16x8_t v1014 = vaddq_s16(v1012, v1013); + int16x8_t v1015 = vaddq_s16(v1011, v1014); + int16x8_t v1016 = vaddq_s16(v1008, v1015); + int16x8_t v1017_tmp = vqrdmulhq_n_s16(v1016, 13573); + int16x8_t v1017 = vaddq_s16(v1017_tmp, v1016); + int16x8_t v1018 = vld1q_s16(in + in_stride * 213 + i); + int16x8_t v1019 = vld1q_s16(in + in_stride * 211 + i); + int16x8_t v1020 = vaddq_s16(v1018, v1019); + int16x8_t v1021 = vld1q_s16(in + in_stride * 205 + i); + int16x8_t v1022 = vld1q_s16(in + in_stride * 203 + i); + int16x8_t v1023 = vaddq_s16(v1021, v1022); + int16x8_t v1024 = vaddq_s16(v1020, v1023); + int16x8_t v1025 = vld1q_s16(in + in_stride * 181 + i); + int16x8_t v1026 = vld1q_s16(in + in_stride * 179 + i); + int16x8_t v1027 = vaddq_s16(v1025, v1026); + int16x8_t v1028 = vld1q_s16(in + in_stride * 173 + i); + int16x8_t v1029 = vld1q_s16(in + in_stride * 171 + i); + int16x8_t v1030 = vaddq_s16(v1028, v1029); + int16x8_t v1031 = vaddq_s16(v1027, v1030); + int16x8_t v1032 = vaddq_s16(v1024, v1031); + int16x8_t v1033 = vaddq_s16(v1032, v1016); + int16x8_t v1034 = vaddq_s16(v1017, v1033); + int16x8_t v1035 = vqrdmulhq_n_s16(v1034, 17734); + int16x8_t v1036 = vaddq_s16(v1001, v1035); + int16x8_t v1037 = vaddq_s16(v1015, v984); + int16x8_t v1038_tmp = vqrdmulhq_n_s16(v1037, 13573); + int16x8_t v1038 = vaddq_s16(v1038_tmp, v1037); + int16x8_t v1039 = vaddq_s16(v1031, v992); + int16x8_t v1040 = vaddq_s16(v999, v1008); + int16x8_t v1041 = vaddq_s16(v1039, v1040); + int16x8_t v1042 = vaddq_s16(v1038, v1041); + int16x8_t v1043 = vaddq_s16(v1040, v1037); + int16x8_t v1044_tmp = vqrdmulhq_n_s16(v1043, 13573); + int16x8_t v1044 = vaddq_s16(v1044_tmp, v1043); + int16x8_t v1045 = vld1q_s16(in + in_stride * 245 + i); + int16x8_t v1046 = vld1q_s16(in + in_stride * 243 + i); + int16x8_t v1047 = vaddq_s16(v1045, v1046); + int16x8_t v1048 = vld1q_s16(in + in_stride * 237 + i); + int16x8_t v1049 = vld1q_s16(in + in_stride * 235 + i); + int16x8_t v1050 = vaddq_s16(v1048, v1049); + int16x8_t v1051 = vaddq_s16(v1047, v1050); + int16x8_t v1052 = vaddq_s16(v1051, v1024); + int16x8_t v1053 = vaddq_s16(v1052, v1039); + int16x8_t v1054 = vaddq_s16(v1053, v1043); + int16x8_t v1055 = vaddq_s16(v1044, v1054); + int16x8_t v1056 = vqrdmulhq_n_s16(v1055, 17734); + int16x8_t v1057 = vaddq_s16(v1042, v1056); + int16x8_t v1058 = vqrdmulhq_n_s16(v1057, 16705); + int16x8_t v1059 = vaddq_s16(v1036, v1058); + int16x8_t v1060 = vqrdmulhq_n_s16(v1059, 16463); + int16x8_t v1061 = vaddq_s16(v977, v1060); + int16x8_t v1062 = vaddq_s16(v983, v909); + int16x8_t v1063_tmp = vqrdmulhq_n_s16(v1062, 13573); + int16x8_t v1063 = vaddq_s16(v1063_tmp, v1062); + int16x8_t v1064 = vaddq_s16(v991, v913); + int16x8_t v1065 = vaddq_s16(v916, v995); + int16x8_t v1066 = vaddq_s16(v1064, v1065); + int16x8_t v1067 = vaddq_s16(v1063, v1066); + int16x8_t v1068 = vaddq_s16(v1007, v921); + int16x8_t v1069 = vaddq_s16(v924, v1011); + int16x8_t v1070 = vaddq_s16(v1068, v1069); + int16x8_t v1071_tmp = vqrdmulhq_n_s16(v1070, 13573); + int16x8_t v1071 = vaddq_s16(v1071_tmp, v1070); + int16x8_t v1072 = vaddq_s16(v1023, v929); + int16x8_t v1073 = vaddq_s16(v932, v1027); + int16x8_t v1074 = vaddq_s16(v1072, v1073); + int16x8_t v1075 = vaddq_s16(v1074, v1070); + int16x8_t v1076 = vaddq_s16(v1071, v1075); + int16x8_t v1077 = vqrdmulhq_n_s16(v1076, 17734); + int16x8_t v1078 = vaddq_s16(v1067, v1077); + int16x8_t v1079 = vaddq_s16(v1014, v940); + int16x8_t v1080 = vaddq_s16(v943, v980); + int16x8_t v1081 = vaddq_s16(v1079, v1080); + int16x8_t v1082_tmp = vqrdmulhq_n_s16(v1081, 13573); + int16x8_t v1082 = vaddq_s16(v1082_tmp, v1081); + int16x8_t v1083 = vaddq_s16(v1030, v948); + int16x8_t v1084 = vaddq_s16(v951, v988); + int16x8_t v1085 = vaddq_s16(v1083, v1084); + int16x8_t v1086 = vaddq_s16(v998, v955); + int16x8_t v1087 = vaddq_s16(v958, v1004); + int16x8_t v1088 = vaddq_s16(v1086, v1087); + int16x8_t v1089 = vaddq_s16(v1085, v1088); + int16x8_t v1090 = vaddq_s16(v1082, v1089); + int16x8_t v1091 = vaddq_s16(v1088, v1081); + int16x8_t v1092_tmp = vqrdmulhq_n_s16(v1091, 13573); + int16x8_t v1092 = vaddq_s16(v1092_tmp, v1091); + int16x8_t v1093 = vaddq_s16(v1050, v966); + int16x8_t v1094 = vaddq_s16(v969, v1020); + int16x8_t v1095 = vaddq_s16(v1093, v1094); + int16x8_t v1096 = vaddq_s16(v1095, v1085); + int16x8_t v1097 = vaddq_s16(v1096, v1091); + int16x8_t v1098 = vaddq_s16(v1092, v1097); + int16x8_t v1099 = vqrdmulhq_n_s16(v1098, 17734); + int16x8_t v1100 = vaddq_s16(v1090, v1099); + int16x8_t v1101 = vqrdmulhq_n_s16(v1100, 16705); + int16x8_t v1102 = vaddq_s16(v1078, v1101); + int16x8_t v1103 = vaddq_s16(v1080, v1062); + int16x8_t v1104_tmp = vqrdmulhq_n_s16(v1103, 13573); + int16x8_t v1104 = vaddq_s16(v1104_tmp, v1103); + int16x8_t v1105 = vaddq_s16(v1084, v1064); + int16x8_t v1106 = vaddq_s16(v1065, v1086); + int16x8_t v1107 = vaddq_s16(v1105, v1106); + int16x8_t v1108 = vaddq_s16(v1104, v1107); + int16x8_t v1109 = vaddq_s16(v1087, v1068); + int16x8_t v1110 = vaddq_s16(v1069, v1079); + int16x8_t v1111 = vaddq_s16(v1109, v1110); + int16x8_t v1112_tmp = vqrdmulhq_n_s16(v1111, 13573); + int16x8_t v1112 = vaddq_s16(v1112_tmp, v1111); + int16x8_t v1113 = vaddq_s16(v1094, v1072); + int16x8_t v1114 = vaddq_s16(v1073, v1083); + int16x8_t v1115 = vaddq_s16(v1113, v1114); + int16x8_t v1116 = vaddq_s16(v1115, v1111); + int16x8_t v1117 = vaddq_s16(v1112, v1116); + int16x8_t v1118 = vqrdmulhq_n_s16(v1117, 17734); + int16x8_t v1119 = vaddq_s16(v1108, v1118); + int16x8_t v1120 = vaddq_s16(v1110, v1103); + int16x8_t v1121_tmp = vqrdmulhq_n_s16(v1120, 13573); + int16x8_t v1121 = vaddq_s16(v1121_tmp, v1120); + int16x8_t v1122 = vaddq_s16(v1114, v1105); + int16x8_t v1123 = vaddq_s16(v1106, v1109); + int16x8_t v1124 = vaddq_s16(v1122, v1123); + int16x8_t v1125 = vaddq_s16(v1121, v1124); + int16x8_t v1126 = vaddq_s16(v1123, v1120); + int16x8_t v1127_tmp = vqrdmulhq_n_s16(v1126, 13573); + int16x8_t v1127 = vaddq_s16(v1127_tmp, v1126); + int16x8_t v1128 = vld1q_s16(in + in_stride * 253 + i); + int16x8_t v1129 = vld1q_s16(in + in_stride * 251 + i); + int16x8_t v1130 = vaddq_s16(v1128, v1129); + int16x8_t v1131 = vaddq_s16(v1130, v1047); + int16x8_t v1132 = vaddq_s16(v1131, v1093); + int16x8_t v1133 = vaddq_s16(v1132, v1113); + int16x8_t v1134 = vaddq_s16(v1133, v1122); + int16x8_t v1135 = vaddq_s16(v1134, v1126); + int16x8_t v1136 = vaddq_s16(v1127, v1135); + int16x8_t v1137 = vqrdmulhq_n_s16(v1136, 17734); + int16x8_t v1138 = vaddq_s16(v1125, v1137); + int16x8_t v1139 = vqrdmulhq_n_s16(v1138, 16705); + int16x8_t v1140 = vaddq_s16(v1119, v1139); + int16x8_t v1141 = vqrdmulhq_n_s16(v1140, 16463); + int16x8_t v1142 = vaddq_s16(v1102, v1141); + int16x8_t v1143 = vqrdmulhq_n_s16(v1142, 16404); + int16x8_t v1144 = vaddq_s16(v1061, v1143); + int16x8_t v1145 = vqrdmulhq_n_s16(v1144, 16389); + int16x8_t v1146 = vaddq_s16(v906, v1145); + int16x8_t v1147 = vaddq_s16(v908, v702); + int16x8_t v1148_tmp = vqrdmulhq_n_s16(v1147, 13573); + int16x8_t v1148 = vaddq_s16(v1148_tmp, v1147); + int16x8_t v1149 = vaddq_s16(v912, v704); + int16x8_t v1150 = vaddq_s16(v705, v914); + int16x8_t v1151 = vaddq_s16(v1149, v1150); + int16x8_t v1152 = vaddq_s16(v1148, v1151); + int16x8_t v1153 = vaddq_s16(v920, v708); + int16x8_t v1154 = vaddq_s16(v709, v922); + int16x8_t v1155 = vaddq_s16(v1153, v1154); + int16x8_t v1156_tmp = vqrdmulhq_n_s16(v1155, 13573); + int16x8_t v1156 = vaddq_s16(v1156_tmp, v1155); + int16x8_t v1157 = vaddq_s16(v928, v712); + int16x8_t v1158 = vaddq_s16(v713, v930); + int16x8_t v1159 = vaddq_s16(v1157, v1158); + int16x8_t v1160 = vaddq_s16(v1159, v1155); + int16x8_t v1161 = vaddq_s16(v1156, v1160); + int16x8_t v1162 = vqrdmulhq_n_s16(v1161, 17734); + int16x8_t v1163 = vaddq_s16(v1152, v1162); + int16x8_t v1164 = vaddq_s16(v939, v719); + int16x8_t v1165 = vaddq_s16(v720, v941); + int16x8_t v1166 = vaddq_s16(v1164, v1165); + int16x8_t v1167_tmp = vqrdmulhq_n_s16(v1166, 13573); + int16x8_t v1167 = vaddq_s16(v1167_tmp, v1166); + int16x8_t v1168 = vaddq_s16(v947, v723); + int16x8_t v1169 = vaddq_s16(v724, v949); + int16x8_t v1170 = vaddq_s16(v1168, v1169); + int16x8_t v1171 = vaddq_s16(v954, v726); + int16x8_t v1172 = vaddq_s16(v727, v956); + int16x8_t v1173 = vaddq_s16(v1171, v1172); + int16x8_t v1174 = vaddq_s16(v1170, v1173); + int16x8_t v1175 = vaddq_s16(v1167, v1174); + int16x8_t v1176 = vaddq_s16(v1173, v1166); + int16x8_t v1177_tmp = vqrdmulhq_n_s16(v1176, 13573); + int16x8_t v1177 = vaddq_s16(v1177_tmp, v1176); + int16x8_t v1178 = vaddq_s16(v965, v733); + int16x8_t v1179 = vaddq_s16(v734, v967); + int16x8_t v1180 = vaddq_s16(v1178, v1179); + int16x8_t v1181 = vaddq_s16(v1180, v1170); + int16x8_t v1182 = vaddq_s16(v1181, v1176); + int16x8_t v1183 = vaddq_s16(v1177, v1182); + int16x8_t v1184 = vqrdmulhq_n_s16(v1183, 17734); + int16x8_t v1185 = vaddq_s16(v1175, v1184); + int16x8_t v1186 = vqrdmulhq_n_s16(v1185, 16705); + int16x8_t v1187 = vaddq_s16(v1163, v1186); + int16x8_t v1188 = vaddq_s16(v979, v743); + int16x8_t v1189 = vaddq_s16(v744, v981); + int16x8_t v1190 = vaddq_s16(v1188, v1189); + int16x8_t v1191_tmp = vqrdmulhq_n_s16(v1190, 13573); + int16x8_t v1191 = vaddq_s16(v1191_tmp, v1190); + int16x8_t v1192 = vaddq_s16(v987, v747); + int16x8_t v1193 = vaddq_s16(v748, v989); + int16x8_t v1194 = vaddq_s16(v1192, v1193); + int16x8_t v1195 = vaddq_s16(v994, v750); + int16x8_t v1196 = vaddq_s16(v751, v996); + int16x8_t v1197 = vaddq_s16(v1195, v1196); + int16x8_t v1198 = vaddq_s16(v1194, v1197); + int16x8_t v1199 = vaddq_s16(v1191, v1198); + int16x8_t v1200 = vaddq_s16(v1003, v755); + int16x8_t v1201 = vaddq_s16(v756, v1005); + int16x8_t v1202 = vaddq_s16(v1200, v1201); + int16x8_t v1203 = vaddq_s16(v1010, v758); + int16x8_t v1204 = vaddq_s16(v759, v1012); + int16x8_t v1205 = vaddq_s16(v1203, v1204); + int16x8_t v1206 = vaddq_s16(v1202, v1205); + int16x8_t v1207_tmp = vqrdmulhq_n_s16(v1206, 13573); + int16x8_t v1207 = vaddq_s16(v1207_tmp, v1206); + int16x8_t v1208 = vaddq_s16(v1019, v763); + int16x8_t v1209 = vaddq_s16(v764, v1021); + int16x8_t v1210 = vaddq_s16(v1208, v1209); + int16x8_t v1211 = vaddq_s16(v1026, v766); + int16x8_t v1212 = vaddq_s16(v767, v1028); + int16x8_t v1213 = vaddq_s16(v1211, v1212); + int16x8_t v1214 = vaddq_s16(v1210, v1213); + int16x8_t v1215 = vaddq_s16(v1214, v1206); + int16x8_t v1216 = vaddq_s16(v1207, v1215); + int16x8_t v1217 = vqrdmulhq_n_s16(v1216, 17734); + int16x8_t v1218 = vaddq_s16(v1199, v1217); + int16x8_t v1219 = vaddq_s16(v1205, v1190); + int16x8_t v1220_tmp = vqrdmulhq_n_s16(v1219, 13573); + int16x8_t v1220 = vaddq_s16(v1220_tmp, v1219); + int16x8_t v1221 = vaddq_s16(v1213, v1194); + int16x8_t v1222 = vaddq_s16(v1197, v1202); + int16x8_t v1223 = vaddq_s16(v1221, v1222); + int16x8_t v1224 = vaddq_s16(v1220, v1223); + int16x8_t v1225 = vaddq_s16(v1222, v1219); + int16x8_t v1226_tmp = vqrdmulhq_n_s16(v1225, 13573); + int16x8_t v1226 = vaddq_s16(v1226_tmp, v1225); + int16x8_t v1227 = vaddq_s16(v1046, v782); + int16x8_t v1228 = vaddq_s16(v783, v1048); + int16x8_t v1229 = vaddq_s16(v1227, v1228); + int16x8_t v1230 = vaddq_s16(v1229, v1210); + int16x8_t v1231 = vaddq_s16(v1230, v1221); + int16x8_t v1232 = vaddq_s16(v1231, v1225); + int16x8_t v1233 = vaddq_s16(v1226, v1232); + int16x8_t v1234 = vqrdmulhq_n_s16(v1233, 17734); + int16x8_t v1235 = vaddq_s16(v1224, v1234); + int16x8_t v1236 = vqrdmulhq_n_s16(v1235, 16705); + int16x8_t v1237 = vaddq_s16(v1218, v1236); + int16x8_t v1238 = vqrdmulhq_n_s16(v1237, 16463); + int16x8_t v1239 = vaddq_s16(v1187, v1238); + int16x8_t v1240 = vaddq_s16(v982, v795); + int16x8_t v1241 = vaddq_s16(v796, v907); + int16x8_t v1242 = vaddq_s16(v1240, v1241); + int16x8_t v1243_tmp = vqrdmulhq_n_s16(v1242, 13573); + int16x8_t v1243 = vaddq_s16(v1243_tmp, v1242); + int16x8_t v1244 = vaddq_s16(v990, v799); + int16x8_t v1245 = vaddq_s16(v800, v911); + int16x8_t v1246 = vaddq_s16(v1244, v1245); + int16x8_t v1247 = vaddq_s16(v915, v802); + int16x8_t v1248 = vaddq_s16(v803, v993); + int16x8_t v1249 = vaddq_s16(v1247, v1248); + int16x8_t v1250 = vaddq_s16(v1246, v1249); + int16x8_t v1251 = vaddq_s16(v1243, v1250); + int16x8_t v1252 = vaddq_s16(v1006, v807); + int16x8_t v1253 = vaddq_s16(v808, v919); + int16x8_t v1254 = vaddq_s16(v1252, v1253); + int16x8_t v1255 = vaddq_s16(v923, v810); + int16x8_t v1256 = vaddq_s16(v811, v1009); + int16x8_t v1257 = vaddq_s16(v1255, v1256); + int16x8_t v1258 = vaddq_s16(v1254, v1257); + int16x8_t v1259_tmp = vqrdmulhq_n_s16(v1258, 13573); + int16x8_t v1259 = vaddq_s16(v1259_tmp, v1258); + int16x8_t v1260 = vaddq_s16(v1022, v815); + int16x8_t v1261 = vaddq_s16(v816, v927); + int16x8_t v1262 = vaddq_s16(v1260, v1261); + int16x8_t v1263 = vaddq_s16(v931, v818); + int16x8_t v1264 = vaddq_s16(v819, v1025); + int16x8_t v1265 = vaddq_s16(v1263, v1264); + int16x8_t v1266 = vaddq_s16(v1262, v1265); + int16x8_t v1267 = vaddq_s16(v1266, v1258); + int16x8_t v1268 = vaddq_s16(v1259, v1267); + int16x8_t v1269 = vqrdmulhq_n_s16(v1268, 17734); + int16x8_t v1270 = vaddq_s16(v1251, v1269); + int16x8_t v1271 = vaddq_s16(v1013, v826); + int16x8_t v1272 = vaddq_s16(v827, v938); + int16x8_t v1273 = vaddq_s16(v1271, v1272); + int16x8_t v1274 = vaddq_s16(v942, v829); + int16x8_t v1275 = vaddq_s16(v830, v978); + int16x8_t v1276 = vaddq_s16(v1274, v1275); + int16x8_t v1277 = vaddq_s16(v1273, v1276); + int16x8_t v1278_tmp = vqrdmulhq_n_s16(v1277, 13573); + int16x8_t v1278 = vaddq_s16(v1278_tmp, v1277); + int16x8_t v1279 = vaddq_s16(v1029, v834); + int16x8_t v1280 = vaddq_s16(v835, v946); + int16x8_t v1281 = vaddq_s16(v1279, v1280); + int16x8_t v1282 = vaddq_s16(v950, v837); + int16x8_t v1283 = vaddq_s16(v838, v986); + int16x8_t v1284 = vaddq_s16(v1282, v1283); + int16x8_t v1285 = vaddq_s16(v1281, v1284); + int16x8_t v1286 = vaddq_s16(v997, v841); + int16x8_t v1287 = vaddq_s16(v842, v953); + int16x8_t v1288 = vaddq_s16(v1286, v1287); + int16x8_t v1289 = vaddq_s16(v957, v844); + int16x8_t v1290 = vaddq_s16(v845, v1002); + int16x8_t v1291 = vaddq_s16(v1289, v1290); + int16x8_t v1292 = vaddq_s16(v1288, v1291); + int16x8_t v1293 = vaddq_s16(v1285, v1292); + int16x8_t v1294 = vaddq_s16(v1278, v1293); + int16x8_t v1295 = vaddq_s16(v1292, v1277); + int16x8_t v1296_tmp = vqrdmulhq_n_s16(v1295, 13573); + int16x8_t v1296 = vaddq_s16(v1296_tmp, v1295); + int16x8_t v1297 = vaddq_s16(v1049, v852); + int16x8_t v1298 = vaddq_s16(v853, v964); + int16x8_t v1299 = vaddq_s16(v1297, v1298); + int16x8_t v1300 = vaddq_s16(v968, v855); + int16x8_t v1301 = vaddq_s16(v856, v1018); + int16x8_t v1302 = vaddq_s16(v1300, v1301); + int16x8_t v1303 = vaddq_s16(v1299, v1302); + int16x8_t v1304 = vaddq_s16(v1303, v1285); + int16x8_t v1305 = vaddq_s16(v1304, v1295); + int16x8_t v1306 = vaddq_s16(v1296, v1305); + int16x8_t v1307 = vqrdmulhq_n_s16(v1306, 17734); + int16x8_t v1308 = vaddq_s16(v1294, v1307); + int16x8_t v1309 = vqrdmulhq_n_s16(v1308, 16705); + int16x8_t v1310 = vaddq_s16(v1270, v1309); + int16x8_t v1311 = vaddq_s16(v1276, v1242); + int16x8_t v1312_tmp = vqrdmulhq_n_s16(v1311, 13573); + int16x8_t v1312 = vaddq_s16(v1312_tmp, v1311); + int16x8_t v1313 = vaddq_s16(v1284, v1246); + int16x8_t v1314 = vaddq_s16(v1249, v1288); + int16x8_t v1315 = vaddq_s16(v1313, v1314); + int16x8_t v1316 = vaddq_s16(v1312, v1315); + int16x8_t v1317 = vaddq_s16(v1291, v1254); + int16x8_t v1318 = vaddq_s16(v1257, v1273); + int16x8_t v1319 = vaddq_s16(v1317, v1318); + int16x8_t v1320_tmp = vqrdmulhq_n_s16(v1319, 13573); + int16x8_t v1320 = vaddq_s16(v1320_tmp, v1319); + int16x8_t v1321 = vaddq_s16(v1302, v1262); + int16x8_t v1322 = vaddq_s16(v1265, v1281); + int16x8_t v1323 = vaddq_s16(v1321, v1322); + int16x8_t v1324 = vaddq_s16(v1323, v1319); + int16x8_t v1325 = vaddq_s16(v1320, v1324); + int16x8_t v1326 = vqrdmulhq_n_s16(v1325, 17734); + int16x8_t v1327 = vaddq_s16(v1316, v1326); + int16x8_t v1328 = vaddq_s16(v1318, v1311); + int16x8_t v1329_tmp = vqrdmulhq_n_s16(v1328, 13573); + int16x8_t v1329 = vaddq_s16(v1329_tmp, v1328); + int16x8_t v1330 = vaddq_s16(v1322, v1313); + int16x8_t v1331 = vaddq_s16(v1314, v1317); + int16x8_t v1332 = vaddq_s16(v1330, v1331); + int16x8_t v1333 = vaddq_s16(v1329, v1332); + int16x8_t v1334 = vaddq_s16(v1331, v1328); + int16x8_t v1335_tmp = vqrdmulhq_n_s16(v1334, 13573); + int16x8_t v1335 = vaddq_s16(v1335_tmp, v1334); + int16x8_t v1336 = vaddq_s16(v1129, v891); + int16x8_t v1337 = vaddq_s16(v892, v1045); + int16x8_t v1338 = vaddq_s16(v1336, v1337); + int16x8_t v1339 = vaddq_s16(v1338, v1299); + int16x8_t v1340 = vaddq_s16(v1339, v1321); + int16x8_t v1341 = vaddq_s16(v1340, v1330); + int16x8_t v1342 = vaddq_s16(v1341, v1334); + int16x8_t v1343 = vaddq_s16(v1335, v1342); + int16x8_t v1344 = vqrdmulhq_n_s16(v1343, 17734); + int16x8_t v1345 = vaddq_s16(v1333, v1344); + int16x8_t v1346 = vqrdmulhq_n_s16(v1345, 16705); + int16x8_t v1347 = vaddq_s16(v1327, v1346); + int16x8_t v1348 = vqrdmulhq_n_s16(v1347, 16463); + int16x8_t v1349 = vaddq_s16(v1310, v1348); + int16x8_t v1350 = vqrdmulhq_n_s16(v1349, 16404); + int16x8_t v1351 = vaddq_s16(v1239, v1350); + int16x8_t v1352 = vaddq_s16(v1241, v1147); + int16x8_t v1353_tmp = vqrdmulhq_n_s16(v1352, 13573); + int16x8_t v1353 = vaddq_s16(v1353_tmp, v1352); + int16x8_t v1354 = vaddq_s16(v1245, v1149); + int16x8_t v1355 = vaddq_s16(v1150, v1247); + int16x8_t v1356 = vaddq_s16(v1354, v1355); + int16x8_t v1357 = vaddq_s16(v1353, v1356); + int16x8_t v1358 = vaddq_s16(v1253, v1153); + int16x8_t v1359 = vaddq_s16(v1154, v1255); + int16x8_t v1360 = vaddq_s16(v1358, v1359); + int16x8_t v1361_tmp = vqrdmulhq_n_s16(v1360, 13573); + int16x8_t v1361 = vaddq_s16(v1361_tmp, v1360); + int16x8_t v1362 = vaddq_s16(v1261, v1157); + int16x8_t v1363 = vaddq_s16(v1158, v1263); + int16x8_t v1364 = vaddq_s16(v1362, v1363); + int16x8_t v1365 = vaddq_s16(v1364, v1360); + int16x8_t v1366 = vaddq_s16(v1361, v1365); + int16x8_t v1367 = vqrdmulhq_n_s16(v1366, 17734); + int16x8_t v1368 = vaddq_s16(v1357, v1367); + int16x8_t v1369 = vaddq_s16(v1272, v1164); + int16x8_t v1370 = vaddq_s16(v1165, v1274); + int16x8_t v1371 = vaddq_s16(v1369, v1370); + int16x8_t v1372_tmp = vqrdmulhq_n_s16(v1371, 13573); + int16x8_t v1372 = vaddq_s16(v1372_tmp, v1371); + int16x8_t v1373 = vaddq_s16(v1280, v1168); + int16x8_t v1374 = vaddq_s16(v1169, v1282); + int16x8_t v1375 = vaddq_s16(v1373, v1374); + int16x8_t v1376 = vaddq_s16(v1287, v1171); + int16x8_t v1377 = vaddq_s16(v1172, v1289); + int16x8_t v1378 = vaddq_s16(v1376, v1377); + int16x8_t v1379 = vaddq_s16(v1375, v1378); + int16x8_t v1380 = vaddq_s16(v1372, v1379); + int16x8_t v1381 = vaddq_s16(v1378, v1371); + int16x8_t v1382_tmp = vqrdmulhq_n_s16(v1381, 13573); + int16x8_t v1382 = vaddq_s16(v1382_tmp, v1381); + int16x8_t v1383 = vaddq_s16(v1298, v1178); + int16x8_t v1384 = vaddq_s16(v1179, v1300); + int16x8_t v1385 = vaddq_s16(v1383, v1384); + int16x8_t v1386 = vaddq_s16(v1385, v1375); + int16x8_t v1387 = vaddq_s16(v1386, v1381); + int16x8_t v1388 = vaddq_s16(v1382, v1387); + int16x8_t v1389 = vqrdmulhq_n_s16(v1388, 17734); + int16x8_t v1390 = vaddq_s16(v1380, v1389); + int16x8_t v1391 = vqrdmulhq_n_s16(v1390, 16705); + int16x8_t v1392 = vaddq_s16(v1368, v1391); + int16x8_t v1393 = vaddq_s16(v1275, v1188); + int16x8_t v1394 = vaddq_s16(v1189, v1240); + int16x8_t v1395 = vaddq_s16(v1393, v1394); + int16x8_t v1396_tmp = vqrdmulhq_n_s16(v1395, 13573); + int16x8_t v1396 = vaddq_s16(v1396_tmp, v1395); + int16x8_t v1397 = vaddq_s16(v1283, v1192); + int16x8_t v1398 = vaddq_s16(v1193, v1244); + int16x8_t v1399 = vaddq_s16(v1397, v1398); + int16x8_t v1400 = vaddq_s16(v1248, v1195); + int16x8_t v1401 = vaddq_s16(v1196, v1286); + int16x8_t v1402 = vaddq_s16(v1400, v1401); + int16x8_t v1403 = vaddq_s16(v1399, v1402); + int16x8_t v1404 = vaddq_s16(v1396, v1403); + int16x8_t v1405 = vaddq_s16(v1290, v1200); + int16x8_t v1406 = vaddq_s16(v1201, v1252); + int16x8_t v1407 = vaddq_s16(v1405, v1406); + int16x8_t v1408 = vaddq_s16(v1256, v1203); + int16x8_t v1409 = vaddq_s16(v1204, v1271); + int16x8_t v1410 = vaddq_s16(v1408, v1409); + int16x8_t v1411 = vaddq_s16(v1407, v1410); + int16x8_t v1412_tmp = vqrdmulhq_n_s16(v1411, 13573); + int16x8_t v1412 = vaddq_s16(v1412_tmp, v1411); + int16x8_t v1413 = vaddq_s16(v1301, v1208); + int16x8_t v1414 = vaddq_s16(v1209, v1260); + int16x8_t v1415 = vaddq_s16(v1413, v1414); + int16x8_t v1416 = vaddq_s16(v1264, v1211); + int16x8_t v1417 = vaddq_s16(v1212, v1279); + int16x8_t v1418 = vaddq_s16(v1416, v1417); + int16x8_t v1419 = vaddq_s16(v1415, v1418); + int16x8_t v1420 = vaddq_s16(v1419, v1411); + int16x8_t v1421 = vaddq_s16(v1412, v1420); + int16x8_t v1422 = vqrdmulhq_n_s16(v1421, 17734); + int16x8_t v1423 = vaddq_s16(v1404, v1422); + int16x8_t v1424 = vaddq_s16(v1410, v1395); + int16x8_t v1425_tmp = vqrdmulhq_n_s16(v1424, 13573); + int16x8_t v1425 = vaddq_s16(v1425_tmp, v1424); + int16x8_t v1426 = vaddq_s16(v1418, v1399); + int16x8_t v1427 = vaddq_s16(v1402, v1407); + int16x8_t v1428 = vaddq_s16(v1426, v1427); + int16x8_t v1429 = vaddq_s16(v1425, v1428); + int16x8_t v1430 = vaddq_s16(v1427, v1424); + int16x8_t v1431_tmp = vqrdmulhq_n_s16(v1430, 13573); + int16x8_t v1431 = vaddq_s16(v1431_tmp, v1430); + int16x8_t v1432 = vaddq_s16(v1337, v1227); + int16x8_t v1433 = vaddq_s16(v1228, v1297); + int16x8_t v1434 = vaddq_s16(v1432, v1433); + int16x8_t v1435 = vaddq_s16(v1434, v1415); + int16x8_t v1436 = vaddq_s16(v1435, v1426); + int16x8_t v1437 = vaddq_s16(v1436, v1430); + int16x8_t v1438 = vaddq_s16(v1431, v1437); + int16x8_t v1439 = vqrdmulhq_n_s16(v1438, 17734); + int16x8_t v1440 = vaddq_s16(v1429, v1439); + int16x8_t v1441 = vqrdmulhq_n_s16(v1440, 16705); + int16x8_t v1442 = vaddq_s16(v1423, v1441); + int16x8_t v1443 = vqrdmulhq_n_s16(v1442, 16463); + int16x8_t v1444 = vaddq_s16(v1392, v1443); + int16x8_t v1445 = vaddq_s16(v1394, v1352); + int16x8_t v1446_tmp = vqrdmulhq_n_s16(v1445, 13573); + int16x8_t v1446 = vaddq_s16(v1446_tmp, v1445); + int16x8_t v1447 = vaddq_s16(v1398, v1354); + int16x8_t v1448 = vaddq_s16(v1355, v1400); + int16x8_t v1449 = vaddq_s16(v1447, v1448); + int16x8_t v1450 = vaddq_s16(v1446, v1449); + int16x8_t v1451 = vaddq_s16(v1406, v1358); + int16x8_t v1452 = vaddq_s16(v1359, v1408); + int16x8_t v1453 = vaddq_s16(v1451, v1452); + int16x8_t v1454_tmp = vqrdmulhq_n_s16(v1453, 13573); + int16x8_t v1454 = vaddq_s16(v1454_tmp, v1453); + int16x8_t v1455 = vaddq_s16(v1414, v1362); + int16x8_t v1456 = vaddq_s16(v1363, v1416); + int16x8_t v1457 = vaddq_s16(v1455, v1456); + int16x8_t v1458 = vaddq_s16(v1457, v1453); + int16x8_t v1459 = vaddq_s16(v1454, v1458); + int16x8_t v1460 = vqrdmulhq_n_s16(v1459, 17734); + int16x8_t v1461 = vaddq_s16(v1450, v1460); + int16x8_t v1462 = vaddq_s16(v1409, v1369); + int16x8_t v1463 = vaddq_s16(v1370, v1393); + int16x8_t v1464 = vaddq_s16(v1462, v1463); + int16x8_t v1465_tmp = vqrdmulhq_n_s16(v1464, 13573); + int16x8_t v1465 = vaddq_s16(v1465_tmp, v1464); + int16x8_t v1466 = vaddq_s16(v1417, v1373); + int16x8_t v1467 = vaddq_s16(v1374, v1397); + int16x8_t v1468 = vaddq_s16(v1466, v1467); + int16x8_t v1469 = vaddq_s16(v1401, v1376); + int16x8_t v1470 = vaddq_s16(v1377, v1405); + int16x8_t v1471 = vaddq_s16(v1469, v1470); + int16x8_t v1472 = vaddq_s16(v1468, v1471); + int16x8_t v1473 = vaddq_s16(v1465, v1472); + int16x8_t v1474 = vaddq_s16(v1471, v1464); + int16x8_t v1475_tmp = vqrdmulhq_n_s16(v1474, 13573); + int16x8_t v1475 = vaddq_s16(v1475_tmp, v1474); + int16x8_t v1476 = vaddq_s16(v1433, v1383); + int16x8_t v1477 = vaddq_s16(v1384, v1413); + int16x8_t v1478 = vaddq_s16(v1476, v1477); + int16x8_t v1479 = vaddq_s16(v1478, v1468); + int16x8_t v1480 = vaddq_s16(v1479, v1474); + int16x8_t v1481 = vaddq_s16(v1475, v1480); + int16x8_t v1482 = vqrdmulhq_n_s16(v1481, 17734); + int16x8_t v1483 = vaddq_s16(v1473, v1482); + int16x8_t v1484 = vqrdmulhq_n_s16(v1483, 16705); + int16x8_t v1485 = vaddq_s16(v1461, v1484); + int16x8_t v1486 = vaddq_s16(v1463, v1445); + int16x8_t v1487_tmp = vqrdmulhq_n_s16(v1486, 13573); + int16x8_t v1487 = vaddq_s16(v1487_tmp, v1486); + int16x8_t v1488 = vaddq_s16(v1467, v1447); + int16x8_t v1489 = vaddq_s16(v1448, v1469); + int16x8_t v1490 = vaddq_s16(v1488, v1489); + int16x8_t v1491 = vaddq_s16(v1487, v1490); + int16x8_t v1492 = vaddq_s16(v1470, v1451); + int16x8_t v1493 = vaddq_s16(v1452, v1462); + int16x8_t v1494 = vaddq_s16(v1492, v1493); + int16x8_t v1495_tmp = vqrdmulhq_n_s16(v1494, 13573); + int16x8_t v1495 = vaddq_s16(v1495_tmp, v1494); + int16x8_t v1496 = vaddq_s16(v1477, v1455); + int16x8_t v1497 = vaddq_s16(v1456, v1466); + int16x8_t v1498 = vaddq_s16(v1496, v1497); + int16x8_t v1499 = vaddq_s16(v1498, v1494); + int16x8_t v1500 = vaddq_s16(v1495, v1499); + int16x8_t v1501 = vqrdmulhq_n_s16(v1500, 17734); + int16x8_t v1502 = vaddq_s16(v1491, v1501); + int16x8_t v1503 = vaddq_s16(v1493, v1486); + int16x8_t v1504_tmp = vqrdmulhq_n_s16(v1503, 13573); + int16x8_t v1504 = vaddq_s16(v1504_tmp, v1503); + int16x8_t v1505 = vaddq_s16(v1497, v1488); + int16x8_t v1506 = vaddq_s16(v1489, v1492); + int16x8_t v1507 = vaddq_s16(v1505, v1506); + int16x8_t v1508 = vaddq_s16(v1504, v1507); + int16x8_t v1509 = vaddq_s16(v1506, v1503); + int16x8_t v1510_tmp = vqrdmulhq_n_s16(v1509, 13573); + int16x8_t v1510 = vaddq_s16(v1510_tmp, v1509); + int16x8_t v1511 = vld1q_s16(in + in_stride * 255 + i); + int16x8_t v1512 = vaddq_s16(v1511, v1128); + int16x8_t v1513 = vaddq_s16(v1512, v1336); + int16x8_t v1514 = vaddq_s16(v1513, v1432); + int16x8_t v1515 = vaddq_s16(v1514, v1476); + int16x8_t v1516 = vaddq_s16(v1515, v1496); + int16x8_t v1517 = vaddq_s16(v1516, v1505); + int16x8_t v1518 = vaddq_s16(v1517, v1509); + int16x8_t v1519 = vaddq_s16(v1510, v1518); + int16x8_t v1520 = vqrdmulhq_n_s16(v1519, 17734); + int16x8_t v1521 = vaddq_s16(v1508, v1520); + int16x8_t v1522 = vqrdmulhq_n_s16(v1521, 16705); + int16x8_t v1523 = vaddq_s16(v1502, v1522); + int16x8_t v1524 = vqrdmulhq_n_s16(v1523, 16463); + int16x8_t v1525 = vaddq_s16(v1485, v1524); + int16x8_t v1526 = vqrdmulhq_n_s16(v1525, 16404); + int16x8_t v1527 = vaddq_s16(v1444, v1526); + int16x8_t v1528 = vqrdmulhq_n_s16(v1527, 16389); + int16x8_t v1529 = vaddq_s16(v1351, v1528); + int16x8_t v1530 = vqrdmulhq_n_s16(v1529, 16385); + int16x8_t v1531 = vaddq_s16(v1146, v1530); + int16x8_t v1532 = vqrdmulhq_n_s16(v1531, 16384); + int16x8_t v1533 = vaddq_s16(v701, v1532); + int16x8_t v1534 = vsubq_s16(v0, v1); + int16x8_t v1535 = vsubq_s16(v4, v6); + int16x8_t v1536_tmp = vqrdmulhq_n_s16(v1535, 10045); + int16x8_t v1536 = vaddq_s16(v1536_tmp, v1535); + int16x8_t v1537 = vaddq_s16(v1534, v1536); + int16x8_t v1538 = vsubq_s16(v11, v14); + int16x8_t v1539 = vsubq_s16(v17, v20); + int16x8_t v1540_tmp = vqrdmulhq_n_s16(v1539, 10045); + int16x8_t v1540 = vaddq_s16(v1540_tmp, v1539); + int16x8_t v1541 = vaddq_s16(v1538, v1540); + int16x8_t v1542 = vqrdmulhq_n_s16(v1541, 19705); + int16x8_t v1543 = vaddq_s16(v1537, v1542); + int16x8_t v1544 = vsubq_s16(v27, v30); + int16x8_t v1545 = vsubq_s16(v35, v39); + int16x8_t v1546_tmp = vqrdmulhq_n_s16(v1545, 10045); + int16x8_t v1546 = vaddq_s16(v1546_tmp, v1545); + int16x8_t v1547 = vaddq_s16(v1544, v1546); + int16x8_t v1548 = vsubq_s16(v44, v47); + int16x8_t v1549 = vsubq_s16(v50, v54); + int16x8_t v1550_tmp = vqrdmulhq_n_s16(v1549, 10045); + int16x8_t v1550 = vaddq_s16(v1550_tmp, v1549); + int16x8_t v1551 = vaddq_s16(v1548, v1550); + int16x8_t v1552 = vqrdmulhq_n_s16(v1551, 19705); + int16x8_t v1553 = vaddq_s16(v1547, v1552); + int16x8_t v1554 = vqrdmulhq_n_s16(v1553, 17121); + int16x8_t v1555 = vaddq_s16(v1543, v1554); + int16x8_t v1556 = vsubq_s16(v63, v66); + int16x8_t v1557 = vsubq_s16(v71, v75); + int16x8_t v1558_tmp = vqrdmulhq_n_s16(v1557, 10045); + int16x8_t v1558 = vaddq_s16(v1558_tmp, v1557); + int16x8_t v1559 = vaddq_s16(v1556, v1558); + int16x8_t v1560 = vsubq_s16(v82, v89); + int16x8_t v1561 = vsubq_s16(v92, v97); + int16x8_t v1562_tmp = vqrdmulhq_n_s16(v1561, 10045); + int16x8_t v1562 = vaddq_s16(v1562_tmp, v1561); + int16x8_t v1563 = vaddq_s16(v1560, v1562); + int16x8_t v1564 = vqrdmulhq_n_s16(v1563, 19705); + int16x8_t v1565 = vaddq_s16(v1559, v1564); + int16x8_t v1566 = vsubq_s16(v104, v107); + int16x8_t v1567 = vsubq_s16(v112, v116); + int16x8_t v1568_tmp = vqrdmulhq_n_s16(v1567, 10045); + int16x8_t v1568 = vaddq_s16(v1568_tmp, v1567); + int16x8_t v1569 = vaddq_s16(v1566, v1568); + int16x8_t v1570 = vsubq_s16(v121, v124); + int16x8_t v1571 = vsubq_s16(v127, v132); + int16x8_t v1572_tmp = vqrdmulhq_n_s16(v1571, 10045); + int16x8_t v1572 = vaddq_s16(v1572_tmp, v1571); + int16x8_t v1573 = vaddq_s16(v1570, v1572); + int16x8_t v1574 = vqrdmulhq_n_s16(v1573, 19705); + int16x8_t v1575 = vaddq_s16(v1569, v1574); + int16x8_t v1576 = vqrdmulhq_n_s16(v1575, 17121); + int16x8_t v1577 = vaddq_s16(v1565, v1576); + int16x8_t v1578 = vqrdmulhq_n_s16(v1577, 16563); + int16x8_t v1579 = vaddq_s16(v1555, v1578); + int16x8_t v1580 = vsubq_s16(v143, v146); + int16x8_t v1581 = vsubq_s16(v151, v155); + int16x8_t v1582_tmp = vqrdmulhq_n_s16(v1581, 10045); + int16x8_t v1582 = vaddq_s16(v1582_tmp, v1581); + int16x8_t v1583 = vaddq_s16(v1580, v1582); + int16x8_t v1584 = vsubq_s16(v162, v169); + int16x8_t v1585 = vsubq_s16(v172, v177); + int16x8_t v1586_tmp = vqrdmulhq_n_s16(v1585, 10045); + int16x8_t v1586 = vaddq_s16(v1586_tmp, v1585); + int16x8_t v1587 = vaddq_s16(v1584, v1586); + int16x8_t v1588 = vqrdmulhq_n_s16(v1587, 19705); + int16x8_t v1589 = vaddq_s16(v1583, v1588); + int16x8_t v1590 = vsubq_s16(v186, v193); + int16x8_t v1591 = vsubq_s16(v202, v210); + int16x8_t v1592_tmp = vqrdmulhq_n_s16(v1591, 10045); + int16x8_t v1592 = vaddq_s16(v1592_tmp, v1591); + int16x8_t v1593 = vaddq_s16(v1590, v1592); + int16x8_t v1594 = vsubq_s16(v215, v218); + int16x8_t v1595 = vsubq_s16(v221, v227); + int16x8_t v1596_tmp = vqrdmulhq_n_s16(v1595, 10045); + int16x8_t v1596 = vaddq_s16(v1596_tmp, v1595); + int16x8_t v1597 = vaddq_s16(v1594, v1596); + int16x8_t v1598 = vqrdmulhq_n_s16(v1597, 19705); + int16x8_t v1599 = vaddq_s16(v1593, v1598); + int16x8_t v1600 = vqrdmulhq_n_s16(v1599, 17121); + int16x8_t v1601 = vaddq_s16(v1589, v1600); + int16x8_t v1602 = vsubq_s16(v236, v239); + int16x8_t v1603 = vsubq_s16(v244, v248); + int16x8_t v1604_tmp = vqrdmulhq_n_s16(v1603, 10045); + int16x8_t v1604 = vaddq_s16(v1604_tmp, v1603); + int16x8_t v1605 = vaddq_s16(v1602, v1604); + int16x8_t v1606 = vsubq_s16(v255, v262); + int16x8_t v1607 = vsubq_s16(v265, v270); + int16x8_t v1608_tmp = vqrdmulhq_n_s16(v1607, 10045); + int16x8_t v1608 = vaddq_s16(v1608_tmp, v1607); + int16x8_t v1609 = vaddq_s16(v1606, v1608); + int16x8_t v1610 = vqrdmulhq_n_s16(v1609, 19705); + int16x8_t v1611 = vaddq_s16(v1605, v1610); + int16x8_t v1612 = vsubq_s16(v277, v280); + int16x8_t v1613 = vsubq_s16(v285, v289); + int16x8_t v1614_tmp = vqrdmulhq_n_s16(v1613, 10045); + int16x8_t v1614 = vaddq_s16(v1614_tmp, v1613); + int16x8_t v1615 = vaddq_s16(v1612, v1614); + int16x8_t v1616 = vsubq_s16(v294, v297); + int16x8_t v1617 = vsubq_s16(v300, v306); + int16x8_t v1618_tmp = vqrdmulhq_n_s16(v1617, 10045); + int16x8_t v1618 = vaddq_s16(v1618_tmp, v1617); + int16x8_t v1619 = vaddq_s16(v1616, v1618); + int16x8_t v1620 = vqrdmulhq_n_s16(v1619, 19705); + int16x8_t v1621 = vaddq_s16(v1615, v1620); + int16x8_t v1622 = vqrdmulhq_n_s16(v1621, 17121); + int16x8_t v1623 = vaddq_s16(v1611, v1622); + int16x8_t v1624 = vqrdmulhq_n_s16(v1623, 16563); + int16x8_t v1625 = vaddq_s16(v1601, v1624); + int16x8_t v1626 = vqrdmulhq_n_s16(v1625, 16429); + int16x8_t v1627 = vaddq_s16(v1579, v1626); + int16x8_t v1628 = vsubq_s16(v319, v322); + int16x8_t v1629 = vsubq_s16(v327, v331); + int16x8_t v1630_tmp = vqrdmulhq_n_s16(v1629, 10045); + int16x8_t v1630 = vaddq_s16(v1630_tmp, v1629); + int16x8_t v1631 = vaddq_s16(v1628, v1630); + int16x8_t v1632 = vsubq_s16(v338, v345); + int16x8_t v1633 = vsubq_s16(v348, v353); + int16x8_t v1634_tmp = vqrdmulhq_n_s16(v1633, 10045); + int16x8_t v1634 = vaddq_s16(v1634_tmp, v1633); + int16x8_t v1635 = vaddq_s16(v1632, v1634); + int16x8_t v1636 = vqrdmulhq_n_s16(v1635, 19705); + int16x8_t v1637 = vaddq_s16(v1631, v1636); + int16x8_t v1638 = vsubq_s16(v362, v369); + int16x8_t v1639 = vsubq_s16(v378, v386); + int16x8_t v1640_tmp = vqrdmulhq_n_s16(v1639, 10045); + int16x8_t v1640 = vaddq_s16(v1640_tmp, v1639); + int16x8_t v1641 = vaddq_s16(v1638, v1640); + int16x8_t v1642 = vsubq_s16(v391, v394); + int16x8_t v1643 = vsubq_s16(v397, v403); + int16x8_t v1644_tmp = vqrdmulhq_n_s16(v1643, 10045); + int16x8_t v1644 = vaddq_s16(v1644_tmp, v1643); + int16x8_t v1645 = vaddq_s16(v1642, v1644); + int16x8_t v1646 = vqrdmulhq_n_s16(v1645, 19705); + int16x8_t v1647 = vaddq_s16(v1641, v1646); + int16x8_t v1648 = vqrdmulhq_n_s16(v1647, 17121); + int16x8_t v1649 = vaddq_s16(v1637, v1648); + int16x8_t v1650 = vsubq_s16(v414, v421); + int16x8_t v1651 = vsubq_s16(v430, v438); + int16x8_t v1652_tmp = vqrdmulhq_n_s16(v1651, 10045); + int16x8_t v1652 = vaddq_s16(v1652_tmp, v1651); + int16x8_t v1653 = vaddq_s16(v1650, v1652); + int16x8_t v1654 = vsubq_s16(v449, v464); + int16x8_t v1655 = vsubq_s16(v467, v476); + int16x8_t v1656_tmp = vqrdmulhq_n_s16(v1655, 10045); + int16x8_t v1656 = vaddq_s16(v1656_tmp, v1655); + int16x8_t v1657 = vaddq_s16(v1654, v1656); + int16x8_t v1658 = vqrdmulhq_n_s16(v1657, 19705); + int16x8_t v1659 = vaddq_s16(v1653, v1658); + int16x8_t v1660 = vsubq_s16(v483, v486); + int16x8_t v1661 = vsubq_s16(v491, v495); + int16x8_t v1662_tmp = vqrdmulhq_n_s16(v1661, 10045); + int16x8_t v1662 = vaddq_s16(v1662_tmp, v1661); + int16x8_t v1663 = vaddq_s16(v1660, v1662); + int16x8_t v1664 = vsubq_s16(v500, v503); + int16x8_t v1665 = vsubq_s16(v506, v513); + int16x8_t v1666_tmp = vqrdmulhq_n_s16(v1665, 10045); + int16x8_t v1666 = vaddq_s16(v1666_tmp, v1665); + int16x8_t v1667 = vaddq_s16(v1664, v1666); + int16x8_t v1668 = vqrdmulhq_n_s16(v1667, 19705); + int16x8_t v1669 = vaddq_s16(v1663, v1668); + int16x8_t v1670 = vqrdmulhq_n_s16(v1669, 17121); + int16x8_t v1671 = vaddq_s16(v1659, v1670); + int16x8_t v1672 = vqrdmulhq_n_s16(v1671, 16563); + int16x8_t v1673 = vaddq_s16(v1649, v1672); + int16x8_t v1674 = vsubq_s16(v524, v527); + int16x8_t v1675 = vsubq_s16(v532, v536); + int16x8_t v1676_tmp = vqrdmulhq_n_s16(v1675, 10045); + int16x8_t v1676 = vaddq_s16(v1676_tmp, v1675); + int16x8_t v1677 = vaddq_s16(v1674, v1676); + int16x8_t v1678 = vsubq_s16(v543, v550); + int16x8_t v1679 = vsubq_s16(v553, v558); + int16x8_t v1680_tmp = vqrdmulhq_n_s16(v1679, 10045); + int16x8_t v1680 = vaddq_s16(v1680_tmp, v1679); + int16x8_t v1681 = vaddq_s16(v1678, v1680); + int16x8_t v1682 = vqrdmulhq_n_s16(v1681, 19705); + int16x8_t v1683 = vaddq_s16(v1677, v1682); + int16x8_t v1684 = vsubq_s16(v567, v574); + int16x8_t v1685 = vsubq_s16(v583, v591); + int16x8_t v1686_tmp = vqrdmulhq_n_s16(v1685, 10045); + int16x8_t v1686 = vaddq_s16(v1686_tmp, v1685); + int16x8_t v1687 = vaddq_s16(v1684, v1686); + int16x8_t v1688 = vsubq_s16(v596, v599); + int16x8_t v1689 = vsubq_s16(v602, v608); + int16x8_t v1690_tmp = vqrdmulhq_n_s16(v1689, 10045); + int16x8_t v1690 = vaddq_s16(v1690_tmp, v1689); + int16x8_t v1691 = vaddq_s16(v1688, v1690); + int16x8_t v1692 = vqrdmulhq_n_s16(v1691, 19705); + int16x8_t v1693 = vaddq_s16(v1687, v1692); + int16x8_t v1694 = vqrdmulhq_n_s16(v1693, 17121); + int16x8_t v1695 = vaddq_s16(v1683, v1694); + int16x8_t v1696 = vsubq_s16(v617, v620); + int16x8_t v1697 = vsubq_s16(v625, v629); + int16x8_t v1698_tmp = vqrdmulhq_n_s16(v1697, 10045); + int16x8_t v1698 = vaddq_s16(v1698_tmp, v1697); + int16x8_t v1699 = vaddq_s16(v1696, v1698); + int16x8_t v1700 = vsubq_s16(v636, v643); + int16x8_t v1701 = vsubq_s16(v646, v651); + int16x8_t v1702_tmp = vqrdmulhq_n_s16(v1701, 10045); + int16x8_t v1702 = vaddq_s16(v1702_tmp, v1701); + int16x8_t v1703 = vaddq_s16(v1700, v1702); + int16x8_t v1704 = vqrdmulhq_n_s16(v1703, 19705); + int16x8_t v1705 = vaddq_s16(v1699, v1704); + int16x8_t v1706 = vsubq_s16(v658, v661); + int16x8_t v1707 = vsubq_s16(v666, v670); + int16x8_t v1708_tmp = vqrdmulhq_n_s16(v1707, 10045); + int16x8_t v1708 = vaddq_s16(v1708_tmp, v1707); + int16x8_t v1709 = vaddq_s16(v1706, v1708); + int16x8_t v1710 = vsubq_s16(v675, v678); + int16x8_t v1711 = vsubq_s16(v681, v688); + int16x8_t v1712_tmp = vqrdmulhq_n_s16(v1711, 10045); + int16x8_t v1712 = vaddq_s16(v1712_tmp, v1711); + int16x8_t v1713 = vaddq_s16(v1710, v1712); + int16x8_t v1714 = vqrdmulhq_n_s16(v1713, 19705); + int16x8_t v1715 = vaddq_s16(v1709, v1714); + int16x8_t v1716 = vqrdmulhq_n_s16(v1715, 17121); + int16x8_t v1717 = vaddq_s16(v1705, v1716); + int16x8_t v1718 = vqrdmulhq_n_s16(v1717, 16563); + int16x8_t v1719 = vaddq_s16(v1695, v1718); + int16x8_t v1720 = vqrdmulhq_n_s16(v1719, 16429); + int16x8_t v1721 = vaddq_s16(v1673, v1720); + int16x8_t v1722 = vqrdmulhq_n_s16(v1721, 16395); + int16x8_t v1723 = vaddq_s16(v1627, v1722); + int16x8_t v1724 = vsubq_s16(v703, v706); + int16x8_t v1725 = vsubq_s16(v711, v715); + int16x8_t v1726_tmp = vqrdmulhq_n_s16(v1725, 10045); + int16x8_t v1726 = vaddq_s16(v1726_tmp, v1725); + int16x8_t v1727 = vaddq_s16(v1724, v1726); + int16x8_t v1728 = vsubq_s16(v722, v729); + int16x8_t v1729 = vsubq_s16(v732, v737); + int16x8_t v1730_tmp = vqrdmulhq_n_s16(v1729, 10045); + int16x8_t v1730 = vaddq_s16(v1730_tmp, v1729); + int16x8_t v1731 = vaddq_s16(v1728, v1730); + int16x8_t v1732 = vqrdmulhq_n_s16(v1731, 19705); + int16x8_t v1733 = vaddq_s16(v1727, v1732); + int16x8_t v1734 = vsubq_s16(v746, v753); + int16x8_t v1735 = vsubq_s16(v762, v770); + int16x8_t v1736_tmp = vqrdmulhq_n_s16(v1735, 10045); + int16x8_t v1736 = vaddq_s16(v1736_tmp, v1735); + int16x8_t v1737 = vaddq_s16(v1734, v1736); + int16x8_t v1738 = vsubq_s16(v775, v778); + int16x8_t v1739 = vsubq_s16(v781, v787); + int16x8_t v1740_tmp = vqrdmulhq_n_s16(v1739, 10045); + int16x8_t v1740 = vaddq_s16(v1740_tmp, v1739); + int16x8_t v1741 = vaddq_s16(v1738, v1740); + int16x8_t v1742 = vqrdmulhq_n_s16(v1741, 19705); + int16x8_t v1743 = vaddq_s16(v1737, v1742); + int16x8_t v1744 = vqrdmulhq_n_s16(v1743, 17121); + int16x8_t v1745 = vaddq_s16(v1733, v1744); + int16x8_t v1746 = vsubq_s16(v798, v805); + int16x8_t v1747 = vsubq_s16(v814, v822); + int16x8_t v1748_tmp = vqrdmulhq_n_s16(v1747, 10045); + int16x8_t v1748 = vaddq_s16(v1748_tmp, v1747); + int16x8_t v1749 = vaddq_s16(v1746, v1748); + int16x8_t v1750 = vsubq_s16(v833, v848); + int16x8_t v1751 = vsubq_s16(v851, v860); + int16x8_t v1752_tmp = vqrdmulhq_n_s16(v1751, 10045); + int16x8_t v1752 = vaddq_s16(v1752_tmp, v1751); + int16x8_t v1753 = vaddq_s16(v1750, v1752); + int16x8_t v1754 = vqrdmulhq_n_s16(v1753, 19705); + int16x8_t v1755 = vaddq_s16(v1749, v1754); + int16x8_t v1756 = vsubq_s16(v867, v870); + int16x8_t v1757 = vsubq_s16(v875, v879); + int16x8_t v1758_tmp = vqrdmulhq_n_s16(v1757, 10045); + int16x8_t v1758 = vaddq_s16(v1758_tmp, v1757); + int16x8_t v1759 = vaddq_s16(v1756, v1758); + int16x8_t v1760 = vsubq_s16(v884, v887); + int16x8_t v1761 = vsubq_s16(v890, v897); + int16x8_t v1762_tmp = vqrdmulhq_n_s16(v1761, 10045); + int16x8_t v1762 = vaddq_s16(v1762_tmp, v1761); + int16x8_t v1763 = vaddq_s16(v1760, v1762); + int16x8_t v1764 = vqrdmulhq_n_s16(v1763, 19705); + int16x8_t v1765 = vaddq_s16(v1759, v1764); + int16x8_t v1766 = vqrdmulhq_n_s16(v1765, 17121); + int16x8_t v1767 = vaddq_s16(v1755, v1766); + int16x8_t v1768 = vqrdmulhq_n_s16(v1767, 16563); + int16x8_t v1769 = vaddq_s16(v1745, v1768); + int16x8_t v1770 = vsubq_s16(v910, v917); + int16x8_t v1771 = vsubq_s16(v926, v934); + int16x8_t v1772_tmp = vqrdmulhq_n_s16(v1771, 10045); + int16x8_t v1772 = vaddq_s16(v1772_tmp, v1771); + int16x8_t v1773 = vaddq_s16(v1770, v1772); + int16x8_t v1774 = vsubq_s16(v945, v960); + int16x8_t v1775 = vsubq_s16(v963, v972); + int16x8_t v1776_tmp = vqrdmulhq_n_s16(v1775, 10045); + int16x8_t v1776 = vaddq_s16(v1776_tmp, v1775); + int16x8_t v1777 = vaddq_s16(v1774, v1776); + int16x8_t v1778 = vqrdmulhq_n_s16(v1777, 19705); + int16x8_t v1779 = vaddq_s16(v1773, v1778); + int16x8_t v1780 = vsubq_s16(v985, v1000); + int16x8_t v1781 = vsubq_s16(v1017, v1033); + int16x8_t v1782_tmp = vqrdmulhq_n_s16(v1781, 10045); + int16x8_t v1782 = vaddq_s16(v1782_tmp, v1781); + int16x8_t v1783 = vaddq_s16(v1780, v1782); + int16x8_t v1784 = vsubq_s16(v1038, v1041); + int16x8_t v1785 = vsubq_s16(v1044, v1054); + int16x8_t v1786_tmp = vqrdmulhq_n_s16(v1785, 10045); + int16x8_t v1786 = vaddq_s16(v1786_tmp, v1785); + int16x8_t v1787 = vaddq_s16(v1784, v1786); + int16x8_t v1788 = vqrdmulhq_n_s16(v1787, 19705); + int16x8_t v1789 = vaddq_s16(v1783, v1788); + int16x8_t v1790 = vqrdmulhq_n_s16(v1789, 17121); + int16x8_t v1791 = vaddq_s16(v1779, v1790); + int16x8_t v1792 = vsubq_s16(v1063, v1066); + int16x8_t v1793 = vsubq_s16(v1071, v1075); + int16x8_t v1794_tmp = vqrdmulhq_n_s16(v1793, 10045); + int16x8_t v1794 = vaddq_s16(v1794_tmp, v1793); + int16x8_t v1795 = vaddq_s16(v1792, v1794); + int16x8_t v1796 = vsubq_s16(v1082, v1089); + int16x8_t v1797 = vsubq_s16(v1092, v1097); + int16x8_t v1798_tmp = vqrdmulhq_n_s16(v1797, 10045); + int16x8_t v1798 = vaddq_s16(v1798_tmp, v1797); + int16x8_t v1799 = vaddq_s16(v1796, v1798); + int16x8_t v1800 = vqrdmulhq_n_s16(v1799, 19705); + int16x8_t v1801 = vaddq_s16(v1795, v1800); + int16x8_t v1802 = vsubq_s16(v1104, v1107); + int16x8_t v1803 = vsubq_s16(v1112, v1116); + int16x8_t v1804_tmp = vqrdmulhq_n_s16(v1803, 10045); + int16x8_t v1804 = vaddq_s16(v1804_tmp, v1803); + int16x8_t v1805 = vaddq_s16(v1802, v1804); + int16x8_t v1806 = vsubq_s16(v1121, v1124); + int16x8_t v1807 = vsubq_s16(v1127, v1135); + int16x8_t v1808_tmp = vqrdmulhq_n_s16(v1807, 10045); + int16x8_t v1808 = vaddq_s16(v1808_tmp, v1807); + int16x8_t v1809 = vaddq_s16(v1806, v1808); + int16x8_t v1810 = vqrdmulhq_n_s16(v1809, 19705); + int16x8_t v1811 = vaddq_s16(v1805, v1810); + int16x8_t v1812 = vqrdmulhq_n_s16(v1811, 17121); + int16x8_t v1813 = vaddq_s16(v1801, v1812); + int16x8_t v1814 = vqrdmulhq_n_s16(v1813, 16563); + int16x8_t v1815 = vaddq_s16(v1791, v1814); + int16x8_t v1816 = vqrdmulhq_n_s16(v1815, 16429); + int16x8_t v1817 = vaddq_s16(v1769, v1816); + int16x8_t v1818 = vsubq_s16(v1148, v1151); + int16x8_t v1819 = vsubq_s16(v1156, v1160); + int16x8_t v1820_tmp = vqrdmulhq_n_s16(v1819, 10045); + int16x8_t v1820 = vaddq_s16(v1820_tmp, v1819); + int16x8_t v1821 = vaddq_s16(v1818, v1820); + int16x8_t v1822 = vsubq_s16(v1167, v1174); + int16x8_t v1823 = vsubq_s16(v1177, v1182); + int16x8_t v1824_tmp = vqrdmulhq_n_s16(v1823, 10045); + int16x8_t v1824 = vaddq_s16(v1824_tmp, v1823); + int16x8_t v1825 = vaddq_s16(v1822, v1824); + int16x8_t v1826 = vqrdmulhq_n_s16(v1825, 19705); + int16x8_t v1827 = vaddq_s16(v1821, v1826); + int16x8_t v1828 = vsubq_s16(v1191, v1198); + int16x8_t v1829 = vsubq_s16(v1207, v1215); + int16x8_t v1830_tmp = vqrdmulhq_n_s16(v1829, 10045); + int16x8_t v1830 = vaddq_s16(v1830_tmp, v1829); + int16x8_t v1831 = vaddq_s16(v1828, v1830); + int16x8_t v1832 = vsubq_s16(v1220, v1223); + int16x8_t v1833 = vsubq_s16(v1226, v1232); + int16x8_t v1834_tmp = vqrdmulhq_n_s16(v1833, 10045); + int16x8_t v1834 = vaddq_s16(v1834_tmp, v1833); + int16x8_t v1835 = vaddq_s16(v1832, v1834); + int16x8_t v1836 = vqrdmulhq_n_s16(v1835, 19705); + int16x8_t v1837 = vaddq_s16(v1831, v1836); + int16x8_t v1838 = vqrdmulhq_n_s16(v1837, 17121); + int16x8_t v1839 = vaddq_s16(v1827, v1838); + int16x8_t v1840 = vsubq_s16(v1243, v1250); + int16x8_t v1841 = vsubq_s16(v1259, v1267); + int16x8_t v1842_tmp = vqrdmulhq_n_s16(v1841, 10045); + int16x8_t v1842 = vaddq_s16(v1842_tmp, v1841); + int16x8_t v1843 = vaddq_s16(v1840, v1842); + int16x8_t v1844 = vsubq_s16(v1278, v1293); + int16x8_t v1845 = vsubq_s16(v1296, v1305); + int16x8_t v1846_tmp = vqrdmulhq_n_s16(v1845, 10045); + int16x8_t v1846 = vaddq_s16(v1846_tmp, v1845); + int16x8_t v1847 = vaddq_s16(v1844, v1846); + int16x8_t v1848 = vqrdmulhq_n_s16(v1847, 19705); + int16x8_t v1849 = vaddq_s16(v1843, v1848); + int16x8_t v1850 = vsubq_s16(v1312, v1315); + int16x8_t v1851 = vsubq_s16(v1320, v1324); + int16x8_t v1852_tmp = vqrdmulhq_n_s16(v1851, 10045); + int16x8_t v1852 = vaddq_s16(v1852_tmp, v1851); + int16x8_t v1853 = vaddq_s16(v1850, v1852); + int16x8_t v1854 = vsubq_s16(v1329, v1332); + int16x8_t v1855 = vsubq_s16(v1335, v1342); + int16x8_t v1856_tmp = vqrdmulhq_n_s16(v1855, 10045); + int16x8_t v1856 = vaddq_s16(v1856_tmp, v1855); + int16x8_t v1857 = vaddq_s16(v1854, v1856); + int16x8_t v1858 = vqrdmulhq_n_s16(v1857, 19705); + int16x8_t v1859 = vaddq_s16(v1853, v1858); + int16x8_t v1860 = vqrdmulhq_n_s16(v1859, 17121); + int16x8_t v1861 = vaddq_s16(v1849, v1860); + int16x8_t v1862 = vqrdmulhq_n_s16(v1861, 16563); + int16x8_t v1863 = vaddq_s16(v1839, v1862); + int16x8_t v1864 = vsubq_s16(v1353, v1356); + int16x8_t v1865 = vsubq_s16(v1361, v1365); + int16x8_t v1866_tmp = vqrdmulhq_n_s16(v1865, 10045); + int16x8_t v1866 = vaddq_s16(v1866_tmp, v1865); + int16x8_t v1867 = vaddq_s16(v1864, v1866); + int16x8_t v1868 = vsubq_s16(v1372, v1379); + int16x8_t v1869 = vsubq_s16(v1382, v1387); + int16x8_t v1870_tmp = vqrdmulhq_n_s16(v1869, 10045); + int16x8_t v1870 = vaddq_s16(v1870_tmp, v1869); + int16x8_t v1871 = vaddq_s16(v1868, v1870); + int16x8_t v1872 = vqrdmulhq_n_s16(v1871, 19705); + int16x8_t v1873 = vaddq_s16(v1867, v1872); + int16x8_t v1874 = vsubq_s16(v1396, v1403); + int16x8_t v1875 = vsubq_s16(v1412, v1420); + int16x8_t v1876_tmp = vqrdmulhq_n_s16(v1875, 10045); + int16x8_t v1876 = vaddq_s16(v1876_tmp, v1875); + int16x8_t v1877 = vaddq_s16(v1874, v1876); + int16x8_t v1878 = vsubq_s16(v1425, v1428); + int16x8_t v1879 = vsubq_s16(v1431, v1437); + int16x8_t v1880_tmp = vqrdmulhq_n_s16(v1879, 10045); + int16x8_t v1880 = vaddq_s16(v1880_tmp, v1879); + int16x8_t v1881 = vaddq_s16(v1878, v1880); + int16x8_t v1882 = vqrdmulhq_n_s16(v1881, 19705); + int16x8_t v1883 = vaddq_s16(v1877, v1882); + int16x8_t v1884 = vqrdmulhq_n_s16(v1883, 17121); + int16x8_t v1885 = vaddq_s16(v1873, v1884); + int16x8_t v1886 = vsubq_s16(v1446, v1449); + int16x8_t v1887 = vsubq_s16(v1454, v1458); + int16x8_t v1888_tmp = vqrdmulhq_n_s16(v1887, 10045); + int16x8_t v1888 = vaddq_s16(v1888_tmp, v1887); + int16x8_t v1889 = vaddq_s16(v1886, v1888); + int16x8_t v1890 = vsubq_s16(v1465, v1472); + int16x8_t v1891 = vsubq_s16(v1475, v1480); + int16x8_t v1892_tmp = vqrdmulhq_n_s16(v1891, 10045); + int16x8_t v1892 = vaddq_s16(v1892_tmp, v1891); + int16x8_t v1893 = vaddq_s16(v1890, v1892); + int16x8_t v1894 = vqrdmulhq_n_s16(v1893, 19705); + int16x8_t v1895 = vaddq_s16(v1889, v1894); + int16x8_t v1896 = vsubq_s16(v1487, v1490); + int16x8_t v1897 = vsubq_s16(v1495, v1499); + int16x8_t v1898_tmp = vqrdmulhq_n_s16(v1897, 10045); + int16x8_t v1898 = vaddq_s16(v1898_tmp, v1897); + int16x8_t v1899 = vaddq_s16(v1896, v1898); + int16x8_t v1900 = vsubq_s16(v1504, v1507); + int16x8_t v1901 = vsubq_s16(v1510, v1518); + int16x8_t v1902_tmp = vqrdmulhq_n_s16(v1901, 10045); + int16x8_t v1902 = vaddq_s16(v1902_tmp, v1901); + int16x8_t v1903 = vaddq_s16(v1900, v1902); + int16x8_t v1904 = vqrdmulhq_n_s16(v1903, 19705); + int16x8_t v1905 = vaddq_s16(v1899, v1904); + int16x8_t v1906 = vqrdmulhq_n_s16(v1905, 17121); + int16x8_t v1907 = vaddq_s16(v1895, v1906); + int16x8_t v1908 = vqrdmulhq_n_s16(v1907, 16563); + int16x8_t v1909 = vaddq_s16(v1885, v1908); + int16x8_t v1910 = vqrdmulhq_n_s16(v1909, 16429); + int16x8_t v1911 = vaddq_s16(v1863, v1910); + int16x8_t v1912 = vqrdmulhq_n_s16(v1911, 16395); + int16x8_t v1913 = vaddq_s16(v1817, v1912); + int16x8_t v1914 = vqrdmulhq_n_s16(v1913, 16387); + int16x8_t v1915 = vaddq_s16(v1723, v1914); + int16x8_t v1916 = vsubq_s16(v1534, v1536); + int16x8_t v1917 = vsubq_s16(v1538, v1540); + int16x8_t v1918 = vqrdmulhq_n_s16(v1917, 29490); + int16x8_t v1919 = vaddq_s16(v1916, v1918); + int16x8_t v1920 = vsubq_s16(v1544, v1546); + int16x8_t v1921 = vsubq_s16(v1548, v1550); + int16x8_t v1922 = vqrdmulhq_n_s16(v1921, 29490); + int16x8_t v1923 = vaddq_s16(v1920, v1922); + int16x8_t v1924 = vqrdmulhq_n_s16(v1923, 18578); + int16x8_t v1925 = vaddq_s16(v1919, v1924); + int16x8_t v1926 = vsubq_s16(v1556, v1558); + int16x8_t v1927 = vsubq_s16(v1560, v1562); + int16x8_t v1928 = vqrdmulhq_n_s16(v1927, 29490); + int16x8_t v1929 = vaddq_s16(v1926, v1928); + int16x8_t v1930 = vsubq_s16(v1566, v1568); + int16x8_t v1931 = vsubq_s16(v1570, v1572); + int16x8_t v1932 = vqrdmulhq_n_s16(v1931, 29490); + int16x8_t v1933 = vaddq_s16(v1930, v1932); + int16x8_t v1934 = vqrdmulhq_n_s16(v1933, 18578); + int16x8_t v1935 = vaddq_s16(v1929, v1934); + int16x8_t v1936 = vqrdmulhq_n_s16(v1935, 16890); + int16x8_t v1937 = vaddq_s16(v1925, v1936); + int16x8_t v1938 = vsubq_s16(v1580, v1582); + int16x8_t v1939 = vsubq_s16(v1584, v1586); + int16x8_t v1940 = vqrdmulhq_n_s16(v1939, 29490); + int16x8_t v1941 = vaddq_s16(v1938, v1940); + int16x8_t v1942 = vsubq_s16(v1590, v1592); + int16x8_t v1943 = vsubq_s16(v1594, v1596); + int16x8_t v1944 = vqrdmulhq_n_s16(v1943, 29490); + int16x8_t v1945 = vaddq_s16(v1942, v1944); + int16x8_t v1946 = vqrdmulhq_n_s16(v1945, 18578); + int16x8_t v1947 = vaddq_s16(v1941, v1946); + int16x8_t v1948 = vsubq_s16(v1602, v1604); + int16x8_t v1949 = vsubq_s16(v1606, v1608); + int16x8_t v1950 = vqrdmulhq_n_s16(v1949, 29490); + int16x8_t v1951 = vaddq_s16(v1948, v1950); + int16x8_t v1952 = vsubq_s16(v1612, v1614); + int16x8_t v1953 = vsubq_s16(v1616, v1618); + int16x8_t v1954 = vqrdmulhq_n_s16(v1953, 29490); + int16x8_t v1955 = vaddq_s16(v1952, v1954); + int16x8_t v1956 = vqrdmulhq_n_s16(v1955, 18578); + int16x8_t v1957 = vaddq_s16(v1951, v1956); + int16x8_t v1958 = vqrdmulhq_n_s16(v1957, 16890); + int16x8_t v1959 = vaddq_s16(v1947, v1958); + int16x8_t v1960 = vqrdmulhq_n_s16(v1959, 16508); + int16x8_t v1961 = vaddq_s16(v1937, v1960); + int16x8_t v1962 = vsubq_s16(v1628, v1630); + int16x8_t v1963 = vsubq_s16(v1632, v1634); + int16x8_t v1964 = vqrdmulhq_n_s16(v1963, 29490); + int16x8_t v1965 = vaddq_s16(v1962, v1964); + int16x8_t v1966 = vsubq_s16(v1638, v1640); + int16x8_t v1967 = vsubq_s16(v1642, v1644); + int16x8_t v1968 = vqrdmulhq_n_s16(v1967, 29490); + int16x8_t v1969 = vaddq_s16(v1966, v1968); + int16x8_t v1970 = vqrdmulhq_n_s16(v1969, 18578); + int16x8_t v1971 = vaddq_s16(v1965, v1970); + int16x8_t v1972 = vsubq_s16(v1650, v1652); + int16x8_t v1973 = vsubq_s16(v1654, v1656); + int16x8_t v1974 = vqrdmulhq_n_s16(v1973, 29490); + int16x8_t v1975 = vaddq_s16(v1972, v1974); + int16x8_t v1976 = vsubq_s16(v1660, v1662); + int16x8_t v1977 = vsubq_s16(v1664, v1666); + int16x8_t v1978 = vqrdmulhq_n_s16(v1977, 29490); + int16x8_t v1979 = vaddq_s16(v1976, v1978); + int16x8_t v1980 = vqrdmulhq_n_s16(v1979, 18578); + int16x8_t v1981 = vaddq_s16(v1975, v1980); + int16x8_t v1982 = vqrdmulhq_n_s16(v1981, 16890); + int16x8_t v1983 = vaddq_s16(v1971, v1982); + int16x8_t v1984 = vsubq_s16(v1674, v1676); + int16x8_t v1985 = vsubq_s16(v1678, v1680); + int16x8_t v1986 = vqrdmulhq_n_s16(v1985, 29490); + int16x8_t v1987 = vaddq_s16(v1984, v1986); + int16x8_t v1988 = vsubq_s16(v1684, v1686); + int16x8_t v1989 = vsubq_s16(v1688, v1690); + int16x8_t v1990 = vqrdmulhq_n_s16(v1989, 29490); + int16x8_t v1991 = vaddq_s16(v1988, v1990); + int16x8_t v1992 = vqrdmulhq_n_s16(v1991, 18578); + int16x8_t v1993 = vaddq_s16(v1987, v1992); + int16x8_t v1994 = vsubq_s16(v1696, v1698); + int16x8_t v1995 = vsubq_s16(v1700, v1702); + int16x8_t v1996 = vqrdmulhq_n_s16(v1995, 29490); + int16x8_t v1997 = vaddq_s16(v1994, v1996); + int16x8_t v1998 = vsubq_s16(v1706, v1708); + int16x8_t v1999 = vsubq_s16(v1710, v1712); + int16x8_t v2000 = vqrdmulhq_n_s16(v1999, 29490); + int16x8_t v2001 = vaddq_s16(v1998, v2000); + int16x8_t v2002 = vqrdmulhq_n_s16(v2001, 18578); + int16x8_t v2003 = vaddq_s16(v1997, v2002); + int16x8_t v2004 = vqrdmulhq_n_s16(v2003, 16890); + int16x8_t v2005 = vaddq_s16(v1993, v2004); + int16x8_t v2006 = vqrdmulhq_n_s16(v2005, 16508); + int16x8_t v2007 = vaddq_s16(v1983, v2006); + int16x8_t v2008 = vqrdmulhq_n_s16(v2007, 16415); + int16x8_t v2009 = vaddq_s16(v1961, v2008); + int16x8_t v2010 = vsubq_s16(v1724, v1726); + int16x8_t v2011 = vsubq_s16(v1728, v1730); + int16x8_t v2012 = vqrdmulhq_n_s16(v2011, 29490); + int16x8_t v2013 = vaddq_s16(v2010, v2012); + int16x8_t v2014 = vsubq_s16(v1734, v1736); + int16x8_t v2015 = vsubq_s16(v1738, v1740); + int16x8_t v2016 = vqrdmulhq_n_s16(v2015, 29490); + int16x8_t v2017 = vaddq_s16(v2014, v2016); + int16x8_t v2018 = vqrdmulhq_n_s16(v2017, 18578); + int16x8_t v2019 = vaddq_s16(v2013, v2018); + int16x8_t v2020 = vsubq_s16(v1746, v1748); + int16x8_t v2021 = vsubq_s16(v1750, v1752); + int16x8_t v2022 = vqrdmulhq_n_s16(v2021, 29490); + int16x8_t v2023 = vaddq_s16(v2020, v2022); + int16x8_t v2024 = vsubq_s16(v1756, v1758); + int16x8_t v2025 = vsubq_s16(v1760, v1762); + int16x8_t v2026 = vqrdmulhq_n_s16(v2025, 29490); + int16x8_t v2027 = vaddq_s16(v2024, v2026); + int16x8_t v2028 = vqrdmulhq_n_s16(v2027, 18578); + int16x8_t v2029 = vaddq_s16(v2023, v2028); + int16x8_t v2030 = vqrdmulhq_n_s16(v2029, 16890); + int16x8_t v2031 = vaddq_s16(v2019, v2030); + int16x8_t v2032 = vsubq_s16(v1770, v1772); + int16x8_t v2033 = vsubq_s16(v1774, v1776); + int16x8_t v2034 = vqrdmulhq_n_s16(v2033, 29490); + int16x8_t v2035 = vaddq_s16(v2032, v2034); + int16x8_t v2036 = vsubq_s16(v1780, v1782); + int16x8_t v2037 = vsubq_s16(v1784, v1786); + int16x8_t v2038 = vqrdmulhq_n_s16(v2037, 29490); + int16x8_t v2039 = vaddq_s16(v2036, v2038); + int16x8_t v2040 = vqrdmulhq_n_s16(v2039, 18578); + int16x8_t v2041 = vaddq_s16(v2035, v2040); + int16x8_t v2042 = vsubq_s16(v1792, v1794); + int16x8_t v2043 = vsubq_s16(v1796, v1798); + int16x8_t v2044 = vqrdmulhq_n_s16(v2043, 29490); + int16x8_t v2045 = vaddq_s16(v2042, v2044); + int16x8_t v2046 = vsubq_s16(v1802, v1804); + int16x8_t v2047 = vsubq_s16(v1806, v1808); + int16x8_t v2048 = vqrdmulhq_n_s16(v2047, 29490); + int16x8_t v2049 = vaddq_s16(v2046, v2048); + int16x8_t v2050 = vqrdmulhq_n_s16(v2049, 18578); + int16x8_t v2051 = vaddq_s16(v2045, v2050); + int16x8_t v2052 = vqrdmulhq_n_s16(v2051, 16890); + int16x8_t v2053 = vaddq_s16(v2041, v2052); + int16x8_t v2054 = vqrdmulhq_n_s16(v2053, 16508); + int16x8_t v2055 = vaddq_s16(v2031, v2054); + int16x8_t v2056 = vsubq_s16(v1818, v1820); + int16x8_t v2057 = vsubq_s16(v1822, v1824); + int16x8_t v2058 = vqrdmulhq_n_s16(v2057, 29490); + int16x8_t v2059 = vaddq_s16(v2056, v2058); + int16x8_t v2060 = vsubq_s16(v1828, v1830); + int16x8_t v2061 = vsubq_s16(v1832, v1834); + int16x8_t v2062 = vqrdmulhq_n_s16(v2061, 29490); + int16x8_t v2063 = vaddq_s16(v2060, v2062); + int16x8_t v2064 = vqrdmulhq_n_s16(v2063, 18578); + int16x8_t v2065 = vaddq_s16(v2059, v2064); + int16x8_t v2066 = vsubq_s16(v1840, v1842); + int16x8_t v2067 = vsubq_s16(v1844, v1846); + int16x8_t v2068 = vqrdmulhq_n_s16(v2067, 29490); + int16x8_t v2069 = vaddq_s16(v2066, v2068); + int16x8_t v2070 = vsubq_s16(v1850, v1852); + int16x8_t v2071 = vqrdmulhq_n_s16(v2070, 18578); + int16x8_t v2072 = vsubq_s16(v1854, v1856); + int16x8_t v2073 = vqrdmulhq_n_s16(v2072, 16719); + int16x8_t v2074 = vaddq_s16(v2071, v2073); + int16x8_t v2075 = vaddq_s16(v2069, v2074); + int16x8_t v2076 = vqrdmulhq_n_s16(v2075, 16890); + int16x8_t v2077 = vaddq_s16(v2065, v2076); + int16x8_t v2078 = vsubq_s16(v1864, v1866); + int16x8_t v2079 = vsubq_s16(v1868, v1870); + int16x8_t v2080 = vqrdmulhq_n_s16(v2079, 29490); + int16x8_t v2081 = vaddq_s16(v2078, v2080); + int16x8_t v2082 = vsubq_s16(v1874, v1876); + int16x8_t v2083 = vsubq_s16(v1878, v1880); + int16x8_t v2084 = vqrdmulhq_n_s16(v2083, 29490); + int16x8_t v2085 = vaddq_s16(v2082, v2084); + int16x8_t v2086 = vqrdmulhq_n_s16(v2085, 18578); + int16x8_t v2087 = vaddq_s16(v2081, v2086); + int16x8_t v2088 = vsubq_s16(v1886, v1888); + int16x8_t v2089 = vsubq_s16(v1890, v1892); + int16x8_t v2090 = vqrdmulhq_n_s16(v2089, 29490); + int16x8_t v2091 = vaddq_s16(v2088, v2090); + int16x8_t v2092 = vsubq_s16(v1896, v1898); + int16x8_t v2093 = vsubq_s16(v1900, v1902); + int16x8_t v2094 = vqrdmulhq_n_s16(v2093, 29490); + int16x8_t v2095 = vaddq_s16(v2092, v2094); + int16x8_t v2096 = vqrdmulhq_n_s16(v2095, 18578); + int16x8_t v2097 = vaddq_s16(v2091, v2096); + int16x8_t v2098 = vqrdmulhq_n_s16(v2097, 16890); + int16x8_t v2099 = vaddq_s16(v2087, v2098); + int16x8_t v2100 = vqrdmulhq_n_s16(v2099, 16508); + int16x8_t v2101 = vaddq_s16(v2077, v2100); + int16x8_t v2102 = vqrdmulhq_n_s16(v2101, 16415); + int16x8_t v2103 = vaddq_s16(v2055, v2102); + int16x8_t v2104 = vqrdmulhq_n_s16(v2103, 16392); + int16x8_t v2105 = vaddq_s16(v2009, v2104); + int16x8_t v2106 = vsubq_s16(v2, v8); + int16x8_t v2107 = vsubq_s16(v15, v22); + int16x8_t v2108_tmp = vqrdmulhq_n_s16(v2107, 18446); + int16x8_t v2108 = vmlaq_n_s16(v2108_tmp, v2107, 2); + int16x8_t v2109 = vaddq_s16(v2106, v2108); + int16x8_t v2110 = vsubq_s16(v31, v41); + int16x8_t v2111 = vsubq_s16(v48, v56); + int16x8_t v2112_tmp = vqrdmulhq_n_s16(v2111, 18446); + int16x8_t v2112 = vmlaq_n_s16(v2112_tmp, v2111, 2); + int16x8_t v2113 = vaddq_s16(v2110, v2112); + int16x8_t v2114 = vqrdmulhq_n_s16(v2113, 21195); + int16x8_t v2115 = vaddq_s16(v2109, v2114); + int16x8_t v2116 = vsubq_s16(v67, v77); + int16x8_t v2117 = vsubq_s16(v90, v99); + int16x8_t v2118_tmp = vqrdmulhq_n_s16(v2117, 18446); + int16x8_t v2118 = vmlaq_n_s16(v2118_tmp, v2117, 2); + int16x8_t v2119 = vaddq_s16(v2116, v2118); + int16x8_t v2120 = vsubq_s16(v108, v118); + int16x8_t v2121 = vsubq_s16(v125, v134); + int16x8_t v2122_tmp = vqrdmulhq_n_s16(v2121, 18446); + int16x8_t v2122 = vmlaq_n_s16(v2122_tmp, v2121, 2); + int16x8_t v2123 = vaddq_s16(v2120, v2122); + int16x8_t v2124 = vqrdmulhq_n_s16(v2123, 21195); + int16x8_t v2125 = vaddq_s16(v2119, v2124); + int16x8_t v2126 = vqrdmulhq_n_s16(v2125, 17401); + int16x8_t v2127 = vaddq_s16(v2115, v2126); + int16x8_t v2128 = vsubq_s16(v147, v157); + int16x8_t v2129 = vsubq_s16(v170, v179); + int16x8_t v2130_tmp = vqrdmulhq_n_s16(v2129, 18446); + int16x8_t v2130 = vmlaq_n_s16(v2130_tmp, v2129, 2); + int16x8_t v2131 = vaddq_s16(v2128, v2130); + int16x8_t v2132 = vsubq_s16(v194, v212); + int16x8_t v2133 = vsubq_s16(v219, v229); + int16x8_t v2134_tmp = vqrdmulhq_n_s16(v2133, 18446); + int16x8_t v2134 = vmlaq_n_s16(v2134_tmp, v2133, 2); + int16x8_t v2135 = vaddq_s16(v2132, v2134); + int16x8_t v2136 = vqrdmulhq_n_s16(v2135, 21195); + int16x8_t v2137 = vaddq_s16(v2131, v2136); + int16x8_t v2138 = vsubq_s16(v240, v250); + int16x8_t v2139 = vsubq_s16(v263, v272); + int16x8_t v2140_tmp = vqrdmulhq_n_s16(v2139, 18446); + int16x8_t v2140 = vmlaq_n_s16(v2140_tmp, v2139, 2); + int16x8_t v2141 = vaddq_s16(v2138, v2140); + int16x8_t v2142 = vsubq_s16(v281, v291); + int16x8_t v2143 = vsubq_s16(v298, v308); + int16x8_t v2144_tmp = vqrdmulhq_n_s16(v2143, 18446); + int16x8_t v2144 = vmlaq_n_s16(v2144_tmp, v2143, 2); + int16x8_t v2145 = vaddq_s16(v2142, v2144); + int16x8_t v2146 = vqrdmulhq_n_s16(v2145, 21195); + int16x8_t v2147 = vaddq_s16(v2141, v2146); + int16x8_t v2148 = vqrdmulhq_n_s16(v2147, 17401); + int16x8_t v2149 = vaddq_s16(v2137, v2148); + int16x8_t v2150 = vqrdmulhq_n_s16(v2149, 16629); + int16x8_t v2151 = vaddq_s16(v2127, v2150); + int16x8_t v2152 = vsubq_s16(v323, v333); + int16x8_t v2153 = vsubq_s16(v346, v355); + int16x8_t v2154_tmp = vqrdmulhq_n_s16(v2153, 18446); + int16x8_t v2154 = vmlaq_n_s16(v2154_tmp, v2153, 2); + int16x8_t v2155 = vaddq_s16(v2152, v2154); + int16x8_t v2156 = vsubq_s16(v370, v388); + int16x8_t v2157 = vsubq_s16(v395, v405); + int16x8_t v2158_tmp = vqrdmulhq_n_s16(v2157, 18446); + int16x8_t v2158 = vmlaq_n_s16(v2158_tmp, v2157, 2); + int16x8_t v2159 = vaddq_s16(v2156, v2158); + int16x8_t v2160 = vqrdmulhq_n_s16(v2159, 21195); + int16x8_t v2161 = vaddq_s16(v2155, v2160); + int16x8_t v2162 = vsubq_s16(v422, v440); + int16x8_t v2163 = vsubq_s16(v465, v478); + int16x8_t v2164_tmp = vqrdmulhq_n_s16(v2163, 18446); + int16x8_t v2164 = vmlaq_n_s16(v2164_tmp, v2163, 2); + int16x8_t v2165 = vaddq_s16(v2162, v2164); + int16x8_t v2166 = vsubq_s16(v487, v497); + int16x8_t v2167 = vsubq_s16(v504, v515); + int16x8_t v2168_tmp = vqrdmulhq_n_s16(v2167, 18446); + int16x8_t v2168 = vmlaq_n_s16(v2168_tmp, v2167, 2); + int16x8_t v2169 = vaddq_s16(v2166, v2168); + int16x8_t v2170 = vqrdmulhq_n_s16(v2169, 21195); + int16x8_t v2171 = vaddq_s16(v2165, v2170); + int16x8_t v2172 = vqrdmulhq_n_s16(v2171, 17401); + int16x8_t v2173 = vaddq_s16(v2161, v2172); + int16x8_t v2174 = vsubq_s16(v528, v538); + int16x8_t v2175 = vsubq_s16(v551, v560); + int16x8_t v2176_tmp = vqrdmulhq_n_s16(v2175, 18446); + int16x8_t v2176 = vmlaq_n_s16(v2176_tmp, v2175, 2); + int16x8_t v2177 = vaddq_s16(v2174, v2176); + int16x8_t v2178 = vsubq_s16(v575, v593); + int16x8_t v2179 = vsubq_s16(v600, v610); + int16x8_t v2180_tmp = vqrdmulhq_n_s16(v2179, 18446); + int16x8_t v2180 = vmlaq_n_s16(v2180_tmp, v2179, 2); + int16x8_t v2181 = vaddq_s16(v2178, v2180); + int16x8_t v2182 = vqrdmulhq_n_s16(v2181, 21195); + int16x8_t v2183 = vaddq_s16(v2177, v2182); + int16x8_t v2184 = vsubq_s16(v621, v631); + int16x8_t v2185 = vsubq_s16(v644, v653); + int16x8_t v2186_tmp = vqrdmulhq_n_s16(v2185, 18446); + int16x8_t v2186 = vmlaq_n_s16(v2186_tmp, v2185, 2); + int16x8_t v2187 = vaddq_s16(v2184, v2186); + int16x8_t v2188 = vsubq_s16(v662, v672); + int16x8_t v2189 = vsubq_s16(v679, v690); + int16x8_t v2190_tmp = vqrdmulhq_n_s16(v2189, 18446); + int16x8_t v2190 = vmlaq_n_s16(v2190_tmp, v2189, 2); + int16x8_t v2191 = vaddq_s16(v2188, v2190); + int16x8_t v2192 = vqrdmulhq_n_s16(v2191, 21195); + int16x8_t v2193 = vaddq_s16(v2187, v2192); + int16x8_t v2194 = vqrdmulhq_n_s16(v2193, 17401); + int16x8_t v2195 = vaddq_s16(v2183, v2194); + int16x8_t v2196 = vqrdmulhq_n_s16(v2195, 16629); + int16x8_t v2197 = vaddq_s16(v2173, v2196); + int16x8_t v2198 = vqrdmulhq_n_s16(v2197, 16445); + int16x8_t v2199 = vaddq_s16(v2151, v2198); + int16x8_t v2200 = vsubq_s16(v707, v717); + int16x8_t v2201 = vsubq_s16(v730, v739); + int16x8_t v2202_tmp = vqrdmulhq_n_s16(v2201, 18446); + int16x8_t v2202 = vmlaq_n_s16(v2202_tmp, v2201, 2); + int16x8_t v2203 = vaddq_s16(v2200, v2202); + int16x8_t v2204 = vsubq_s16(v754, v772); + int16x8_t v2205 = vsubq_s16(v779, v789); + int16x8_t v2206_tmp = vqrdmulhq_n_s16(v2205, 18446); + int16x8_t v2206 = vmlaq_n_s16(v2206_tmp, v2205, 2); + int16x8_t v2207 = vaddq_s16(v2204, v2206); + int16x8_t v2208 = vqrdmulhq_n_s16(v2207, 21195); + int16x8_t v2209 = vaddq_s16(v2203, v2208); + int16x8_t v2210 = vsubq_s16(v806, v824); + int16x8_t v2211 = vsubq_s16(v849, v862); + int16x8_t v2212_tmp = vqrdmulhq_n_s16(v2211, 18446); + int16x8_t v2212 = vmlaq_n_s16(v2212_tmp, v2211, 2); + int16x8_t v2213 = vaddq_s16(v2210, v2212); + int16x8_t v2214 = vsubq_s16(v871, v881); + int16x8_t v2215 = vsubq_s16(v888, v899); + int16x8_t v2216_tmp = vqrdmulhq_n_s16(v2215, 18446); + int16x8_t v2216 = vmlaq_n_s16(v2216_tmp, v2215, 2); + int16x8_t v2217 = vaddq_s16(v2214, v2216); + int16x8_t v2218 = vqrdmulhq_n_s16(v2217, 21195); + int16x8_t v2219 = vaddq_s16(v2213, v2218); + int16x8_t v2220 = vqrdmulhq_n_s16(v2219, 17401); + int16x8_t v2221 = vaddq_s16(v2209, v2220); + int16x8_t v2222 = vsubq_s16(v918, v936); + int16x8_t v2223 = vsubq_s16(v961, v974); + int16x8_t v2224_tmp = vqrdmulhq_n_s16(v2223, 18446); + int16x8_t v2224 = vmlaq_n_s16(v2224_tmp, v2223, 2); + int16x8_t v2225 = vaddq_s16(v2222, v2224); + int16x8_t v2226 = vsubq_s16(v1001, v1035); + int16x8_t v2227 = vsubq_s16(v1042, v1056); + int16x8_t v2228_tmp = vqrdmulhq_n_s16(v2227, 18446); + int16x8_t v2228 = vmlaq_n_s16(v2228_tmp, v2227, 2); + int16x8_t v2229 = vaddq_s16(v2226, v2228); + int16x8_t v2230 = vqrdmulhq_n_s16(v2229, 21195); + int16x8_t v2231 = vaddq_s16(v2225, v2230); + int16x8_t v2232 = vsubq_s16(v1067, v1077); + int16x8_t v2233 = vsubq_s16(v1090, v1099); + int16x8_t v2234_tmp = vqrdmulhq_n_s16(v2233, 18446); + int16x8_t v2234 = vmlaq_n_s16(v2234_tmp, v2233, 2); + int16x8_t v2235 = vaddq_s16(v2232, v2234); + int16x8_t v2236 = vsubq_s16(v1108, v1118); + int16x8_t v2237 = vsubq_s16(v1125, v1137); + int16x8_t v2238_tmp = vqrdmulhq_n_s16(v2237, 18446); + int16x8_t v2238 = vmlaq_n_s16(v2238_tmp, v2237, 2); + int16x8_t v2239 = vaddq_s16(v2236, v2238); + int16x8_t v2240 = vqrdmulhq_n_s16(v2239, 21195); + int16x8_t v2241 = vaddq_s16(v2235, v2240); + int16x8_t v2242 = vqrdmulhq_n_s16(v2241, 17401); + int16x8_t v2243 = vaddq_s16(v2231, v2242); + int16x8_t v2244 = vqrdmulhq_n_s16(v2243, 16629); + int16x8_t v2245 = vaddq_s16(v2221, v2244); + int16x8_t v2246 = vsubq_s16(v1152, v1162); + int16x8_t v2247 = vsubq_s16(v1175, v1184); + int16x8_t v2248_tmp = vqrdmulhq_n_s16(v2247, 18446); + int16x8_t v2248 = vmlaq_n_s16(v2248_tmp, v2247, 2); + int16x8_t v2249 = vaddq_s16(v2246, v2248); + int16x8_t v2250 = vsubq_s16(v1199, v1217); + int16x8_t v2251 = vsubq_s16(v1224, v1234); + int16x8_t v2252_tmp = vqrdmulhq_n_s16(v2251, 18446); + int16x8_t v2252 = vmlaq_n_s16(v2252_tmp, v2251, 2); + int16x8_t v2253 = vaddq_s16(v2250, v2252); + int16x8_t v2254 = vqrdmulhq_n_s16(v2253, 21195); + int16x8_t v2255 = vaddq_s16(v2249, v2254); + int16x8_t v2256 = vsubq_s16(v1251, v1269); + int16x8_t v2257 = vsubq_s16(v1294, v1307); + int16x8_t v2258_tmp = vqrdmulhq_n_s16(v2257, 18446); + int16x8_t v2258 = vmlaq_n_s16(v2258_tmp, v2257, 2); + int16x8_t v2259 = vaddq_s16(v2256, v2258); + int16x8_t v2260 = vsubq_s16(v1316, v1326); + int16x8_t v2261 = vsubq_s16(v1333, v1344); + int16x8_t v2262_tmp = vqrdmulhq_n_s16(v2261, 18446); + int16x8_t v2262 = vmlaq_n_s16(v2262_tmp, v2261, 2); + int16x8_t v2263 = vaddq_s16(v2260, v2262); + int16x8_t v2264 = vqrdmulhq_n_s16(v2263, 21195); + int16x8_t v2265 = vaddq_s16(v2259, v2264); + int16x8_t v2266 = vqrdmulhq_n_s16(v2265, 17401); + int16x8_t v2267 = vaddq_s16(v2255, v2266); + int16x8_t v2268 = vsubq_s16(v1357, v1367); + int16x8_t v2269 = vsubq_s16(v1380, v1389); + int16x8_t v2270_tmp = vqrdmulhq_n_s16(v2269, 18446); + int16x8_t v2270 = vmlaq_n_s16(v2270_tmp, v2269, 2); + int16x8_t v2271 = vaddq_s16(v2268, v2270); + int16x8_t v2272 = vsubq_s16(v1404, v1422); + int16x8_t v2273 = vsubq_s16(v1429, v1439); + int16x8_t v2274_tmp = vqrdmulhq_n_s16(v2273, 18446); + int16x8_t v2274 = vmlaq_n_s16(v2274_tmp, v2273, 2); + int16x8_t v2275 = vaddq_s16(v2272, v2274); + int16x8_t v2276 = vqrdmulhq_n_s16(v2275, 21195); + int16x8_t v2277 = vaddq_s16(v2271, v2276); + int16x8_t v2278 = vsubq_s16(v1450, v1460); + int16x8_t v2279 = vsubq_s16(v1473, v1482); + int16x8_t v2280_tmp = vqrdmulhq_n_s16(v2279, 18446); + int16x8_t v2280 = vmlaq_n_s16(v2280_tmp, v2279, 2); + int16x8_t v2281 = vaddq_s16(v2278, v2280); + int16x8_t v2282 = vsubq_s16(v1491, v1501); + int16x8_t v2283 = vsubq_s16(v1508, v1520); + int16x8_t v2284_tmp = vqrdmulhq_n_s16(v2283, 18446); + int16x8_t v2284 = vmlaq_n_s16(v2284_tmp, v2283, 2); + int16x8_t v2285 = vaddq_s16(v2282, v2284); + int16x8_t v2286 = vqrdmulhq_n_s16(v2285, 21195); + int16x8_t v2287 = vaddq_s16(v2281, v2286); + int16x8_t v2288 = vqrdmulhq_n_s16(v2287, 17401); + int16x8_t v2289 = vaddq_s16(v2277, v2288); + int16x8_t v2290 = vqrdmulhq_n_s16(v2289, 16629); + int16x8_t v2291 = vaddq_s16(v2267, v2290); + int16x8_t v2292 = vqrdmulhq_n_s16(v2291, 16445); + int16x8_t v2293 = vaddq_s16(v2245, v2292); + int16x8_t v2294 = vqrdmulhq_n_s16(v2293, 16399); + int16x8_t v2295 = vaddq_s16(v2199, v2294); + int16x8_t v2296 = vsubq_s16(v2106, v2108); + int16x8_t v2297 = vsubq_s16(v2110, v2112); + int16x8_t v2298 = vqrdmulhq_n_s16(v2297, 25826); + int16x8_t v2299 = vaddq_s16(v2296, v2298); + int16x8_t v2300 = vsubq_s16(v2116, v2118); + int16x8_t v2301 = vsubq_s16(v2120, v2122); + int16x8_t v2302 = vqrdmulhq_n_s16(v2301, 25826); + int16x8_t v2303 = vaddq_s16(v2300, v2302); + int16x8_t v2304 = vqrdmulhq_n_s16(v2303, 18124); + int16x8_t v2305 = vaddq_s16(v2299, v2304); + int16x8_t v2306 = vsubq_s16(v2128, v2130); + int16x8_t v2307 = vsubq_s16(v2132, v2134); + int16x8_t v2308 = vqrdmulhq_n_s16(v2307, 25826); + int16x8_t v2309 = vaddq_s16(v2306, v2308); + int16x8_t v2310 = vsubq_s16(v2138, v2140); + int16x8_t v2311 = vsubq_s16(v2142, v2144); + int16x8_t v2312 = vqrdmulhq_n_s16(v2311, 25826); + int16x8_t v2313 = vaddq_s16(v2310, v2312); + int16x8_t v2314 = vqrdmulhq_n_s16(v2313, 18124); + int16x8_t v2315 = vaddq_s16(v2309, v2314); + int16x8_t v2316 = vqrdmulhq_n_s16(v2315, 16792); + int16x8_t v2317 = vaddq_s16(v2305, v2316); + int16x8_t v2318 = vsubq_s16(v2152, v2154); + int16x8_t v2319 = vsubq_s16(v2156, v2158); + int16x8_t v2320 = vqrdmulhq_n_s16(v2319, 25826); + int16x8_t v2321 = vaddq_s16(v2318, v2320); + int16x8_t v2322 = vsubq_s16(v2162, v2164); + int16x8_t v2323 = vsubq_s16(v2166, v2168); + int16x8_t v2324 = vqrdmulhq_n_s16(v2323, 25826); + int16x8_t v2325 = vaddq_s16(v2322, v2324); + int16x8_t v2326 = vqrdmulhq_n_s16(v2325, 18124); + int16x8_t v2327 = vaddq_s16(v2321, v2326); + int16x8_t v2328 = vsubq_s16(v2174, v2176); + int16x8_t v2329 = vsubq_s16(v2178, v2180); + int16x8_t v2330 = vqrdmulhq_n_s16(v2329, 25826); + int16x8_t v2331 = vaddq_s16(v2328, v2330); + int16x8_t v2332 = vsubq_s16(v2184, v2186); + int16x8_t v2333 = vsubq_s16(v2188, v2190); + int16x8_t v2334 = vqrdmulhq_n_s16(v2333, 25826); + int16x8_t v2335 = vaddq_s16(v2332, v2334); + int16x8_t v2336 = vqrdmulhq_n_s16(v2335, 18124); + int16x8_t v2337 = vaddq_s16(v2331, v2336); + int16x8_t v2338 = vqrdmulhq_n_s16(v2337, 16792); + int16x8_t v2339 = vaddq_s16(v2327, v2338); + int16x8_t v2340 = vqrdmulhq_n_s16(v2339, 16484); + int16x8_t v2341 = vaddq_s16(v2317, v2340); + int16x8_t v2342 = vsubq_s16(v2200, v2202); + int16x8_t v2343 = vsubq_s16(v2204, v2206); + int16x8_t v2344 = vqrdmulhq_n_s16(v2343, 25826); + int16x8_t v2345 = vaddq_s16(v2342, v2344); + int16x8_t v2346 = vsubq_s16(v2210, v2212); + int16x8_t v2347 = vsubq_s16(v2214, v2216); + int16x8_t v2348 = vqrdmulhq_n_s16(v2347, 25826); + int16x8_t v2349 = vaddq_s16(v2346, v2348); + int16x8_t v2350 = vqrdmulhq_n_s16(v2349, 18124); + int16x8_t v2351 = vaddq_s16(v2345, v2350); + int16x8_t v2352 = vsubq_s16(v2222, v2224); + int16x8_t v2353 = vsubq_s16(v2226, v2228); + int16x8_t v2354 = vqrdmulhq_n_s16(v2353, 25826); + int16x8_t v2355 = vaddq_s16(v2352, v2354); + int16x8_t v2356 = vsubq_s16(v2232, v2234); + int16x8_t v2357 = vsubq_s16(v2236, v2238); + int16x8_t v2358 = vqrdmulhq_n_s16(v2357, 25826); + int16x8_t v2359 = vaddq_s16(v2356, v2358); + int16x8_t v2360 = vqrdmulhq_n_s16(v2359, 18124); + int16x8_t v2361 = vaddq_s16(v2355, v2360); + int16x8_t v2362 = vqrdmulhq_n_s16(v2361, 16792); + int16x8_t v2363 = vaddq_s16(v2351, v2362); + int16x8_t v2364 = vsubq_s16(v2246, v2248); + int16x8_t v2365 = vsubq_s16(v2250, v2252); + int16x8_t v2366 = vqrdmulhq_n_s16(v2365, 25826); + int16x8_t v2367 = vaddq_s16(v2364, v2366); + int16x8_t v2368 = vsubq_s16(v2256, v2258); + int16x8_t v2369 = vsubq_s16(v2260, v2262); + int16x8_t v2370 = vqrdmulhq_n_s16(v2369, 25826); + int16x8_t v2371 = vaddq_s16(v2368, v2370); + int16x8_t v2372 = vqrdmulhq_n_s16(v2371, 18124); + int16x8_t v2373 = vaddq_s16(v2367, v2372); + int16x8_t v2374 = vsubq_s16(v2268, v2270); + int16x8_t v2375 = vsubq_s16(v2272, v2274); + int16x8_t v2376 = vqrdmulhq_n_s16(v2375, 25826); + int16x8_t v2377 = vaddq_s16(v2374, v2376); + int16x8_t v2378 = vsubq_s16(v2278, v2280); + int16x8_t v2379 = vsubq_s16(v2282, v2284); + int16x8_t v2380 = vqrdmulhq_n_s16(v2379, 25826); + int16x8_t v2381 = vaddq_s16(v2378, v2380); + int16x8_t v2382 = vqrdmulhq_n_s16(v2381, 18124); + int16x8_t v2383 = vaddq_s16(v2377, v2382); + int16x8_t v2384 = vqrdmulhq_n_s16(v2383, 16792); + int16x8_t v2385 = vaddq_s16(v2373, v2384); + int16x8_t v2386 = vqrdmulhq_n_s16(v2385, 16484); + int16x8_t v2387 = vaddq_s16(v2363, v2386); + int16x8_t v2388 = vqrdmulhq_n_s16(v2387, 16409); + int16x8_t v2389 = vaddq_s16(v2341, v2388); + int16x8_t v2390 = vsubq_s16(v1916, v1918); + int16x8_t v2391 = vsubq_s16(v1920, v1922); + int16x8_t v2392_tmp = vqrdmulhq_n_s16(v2391, 1988); + int16x8_t v2392 = vaddq_s16(v2392_tmp, v2391); + int16x8_t v2393 = vaddq_s16(v2390, v2392); + int16x8_t v2394 = vsubq_s16(v1926, v1928); + int16x8_t v2395 = vsubq_s16(v1930, v1932); + int16x8_t v2396_tmp = vqrdmulhq_n_s16(v2395, 1988); + int16x8_t v2396 = vaddq_s16(v2396_tmp, v2395); + int16x8_t v2397 = vaddq_s16(v2394, v2396); + int16x8_t v2398 = vqrdmulhq_n_s16(v2397, 19102); + int16x8_t v2399 = vaddq_s16(v2393, v2398); + int16x8_t v2400 = vsubq_s16(v1938, v1940); + int16x8_t v2401 = vsubq_s16(v1942, v1944); + int16x8_t v2402_tmp = vqrdmulhq_n_s16(v2401, 1988); + int16x8_t v2402 = vaddq_s16(v2402_tmp, v2401); + int16x8_t v2403 = vaddq_s16(v2400, v2402); + int16x8_t v2404 = vsubq_s16(v1948, v1950); + int16x8_t v2405 = vsubq_s16(v1952, v1954); + int16x8_t v2406_tmp = vqrdmulhq_n_s16(v2405, 1988); + int16x8_t v2406 = vaddq_s16(v2406_tmp, v2405); + int16x8_t v2407 = vaddq_s16(v2404, v2406); + int16x8_t v2408 = vqrdmulhq_n_s16(v2407, 19102); + int16x8_t v2409 = vaddq_s16(v2403, v2408); + int16x8_t v2410 = vqrdmulhq_n_s16(v2409, 17000); + int16x8_t v2411 = vaddq_s16(v2399, v2410); + int16x8_t v2412 = vsubq_s16(v1962, v1964); + int16x8_t v2413 = vsubq_s16(v1966, v1968); + int16x8_t v2414_tmp = vqrdmulhq_n_s16(v2413, 1988); + int16x8_t v2414 = vaddq_s16(v2414_tmp, v2413); + int16x8_t v2415 = vaddq_s16(v2412, v2414); + int16x8_t v2416 = vsubq_s16(v1972, v1974); + int16x8_t v2417 = vsubq_s16(v1976, v1978); + int16x8_t v2418_tmp = vqrdmulhq_n_s16(v2417, 1988); + int16x8_t v2418 = vaddq_s16(v2418_tmp, v2417); + int16x8_t v2419 = vaddq_s16(v2416, v2418); + int16x8_t v2420 = vqrdmulhq_n_s16(v2419, 19102); + int16x8_t v2421 = vaddq_s16(v2415, v2420); + int16x8_t v2422 = vsubq_s16(v1984, v1986); + int16x8_t v2423 = vsubq_s16(v1988, v1990); + int16x8_t v2424_tmp = vqrdmulhq_n_s16(v2423, 1988); + int16x8_t v2424 = vaddq_s16(v2424_tmp, v2423); + int16x8_t v2425 = vaddq_s16(v2422, v2424); + int16x8_t v2426 = vsubq_s16(v1994, v1996); + int16x8_t v2427 = vsubq_s16(v1998, v2000); + int16x8_t v2428_tmp = vqrdmulhq_n_s16(v2427, 1988); + int16x8_t v2428 = vaddq_s16(v2428_tmp, v2427); + int16x8_t v2429 = vaddq_s16(v2426, v2428); + int16x8_t v2430 = vqrdmulhq_n_s16(v2429, 19102); + int16x8_t v2431 = vaddq_s16(v2425, v2430); + int16x8_t v2432 = vqrdmulhq_n_s16(v2431, 17000); + int16x8_t v2433 = vaddq_s16(v2421, v2432); + int16x8_t v2434 = vqrdmulhq_n_s16(v2433, 16534); + int16x8_t v2435 = vaddq_s16(v2411, v2434); + int16x8_t v2436 = vsubq_s16(v2010, v2012); + int16x8_t v2437 = vsubq_s16(v2014, v2016); + int16x8_t v2438_tmp = vqrdmulhq_n_s16(v2437, 1988); + int16x8_t v2438 = vaddq_s16(v2438_tmp, v2437); + int16x8_t v2439 = vaddq_s16(v2436, v2438); + int16x8_t v2440 = vsubq_s16(v2020, v2022); + int16x8_t v2441 = vsubq_s16(v2024, v2026); + int16x8_t v2442_tmp = vqrdmulhq_n_s16(v2441, 1988); + int16x8_t v2442 = vaddq_s16(v2442_tmp, v2441); + int16x8_t v2443 = vaddq_s16(v2440, v2442); + int16x8_t v2444 = vqrdmulhq_n_s16(v2443, 19102); + int16x8_t v2445 = vaddq_s16(v2439, v2444); + int16x8_t v2446 = vsubq_s16(v2032, v2034); + int16x8_t v2447 = vsubq_s16(v2036, v2038); + int16x8_t v2448_tmp = vqrdmulhq_n_s16(v2447, 1988); + int16x8_t v2448 = vaddq_s16(v2448_tmp, v2447); + int16x8_t v2449 = vaddq_s16(v2446, v2448); + int16x8_t v2450 = vsubq_s16(v2042, v2044); + int16x8_t v2451 = vsubq_s16(v2046, v2048); + int16x8_t v2452_tmp = vqrdmulhq_n_s16(v2451, 1988); + int16x8_t v2452 = vaddq_s16(v2452_tmp, v2451); + int16x8_t v2453 = vaddq_s16(v2450, v2452); + int16x8_t v2454 = vqrdmulhq_n_s16(v2453, 19102); + int16x8_t v2455 = vaddq_s16(v2449, v2454); + int16x8_t v2456 = vqrdmulhq_n_s16(v2455, 17000); + int16x8_t v2457 = vaddq_s16(v2445, v2456); + int16x8_t v2458 = vsubq_s16(v2056, v2058); + int16x8_t v2459 = vsubq_s16(v2060, v2062); + int16x8_t v2460_tmp = vqrdmulhq_n_s16(v2459, 1988); + int16x8_t v2460 = vaddq_s16(v2460_tmp, v2459); + int16x8_t v2461 = vaddq_s16(v2458, v2460); + int16x8_t v2462 = vsubq_s16(v2066, v2068); + int16x8_t v2463 = vqrdmulhq_n_s16(v2072, 29490); + int16x8_t v2464 = vsubq_s16(v2070, v2463); + int16x8_t v2465_tmp = vqrdmulhq_n_s16(v2464, 1988); + int16x8_t v2465 = vaddq_s16(v2465_tmp, v2464); + int16x8_t v2466 = vaddq_s16(v2462, v2465); + int16x8_t v2467 = vqrdmulhq_n_s16(v2466, 19102); + int16x8_t v2468 = vaddq_s16(v2461, v2467); + int16x8_t v2469 = vsubq_s16(v2078, v2080); + int16x8_t v2470 = vsubq_s16(v2082, v2084); + int16x8_t v2471_tmp = vqrdmulhq_n_s16(v2470, 1988); + int16x8_t v2471 = vaddq_s16(v2471_tmp, v2470); + int16x8_t v2472 = vaddq_s16(v2469, v2471); + int16x8_t v2473 = vsubq_s16(v2088, v2090); + int16x8_t v2474 = vsubq_s16(v2092, v2094); + int16x8_t v2475_tmp = vqrdmulhq_n_s16(v2474, 1988); + int16x8_t v2475 = vaddq_s16(v2475_tmp, v2474); + int16x8_t v2476 = vaddq_s16(v2473, v2475); + int16x8_t v2477 = vqrdmulhq_n_s16(v2476, 19102); + int16x8_t v2478 = vaddq_s16(v2472, v2477); + int16x8_t v2479 = vqrdmulhq_n_s16(v2478, 17000); + int16x8_t v2480 = vaddq_s16(v2468, v2479); + int16x8_t v2481 = vqrdmulhq_n_s16(v2480, 16534); + int16x8_t v2482 = vaddq_s16(v2457, v2481); + int16x8_t v2483 = vqrdmulhq_n_s16(v2482, 16421); + int16x8_t v2484 = vaddq_s16(v2435, v2483); + int16x8_t v2485 = vsubq_s16(v1537, v1542); + int16x8_t v2486 = vsubq_s16(v1547, v1552); + int16x8_t v2487_tmp = vqrdmulhq_n_s16(v2486, 23673); + int16x8_t v2487 = vaddq_s16(v2487_tmp, v2486); + int16x8_t v2488 = vaddq_s16(v2485, v2487); + int16x8_t v2489 = vsubq_s16(v1559, v1564); + int16x8_t v2490 = vsubq_s16(v1569, v1574); + int16x8_t v2491_tmp = vqrdmulhq_n_s16(v2490, 23673); + int16x8_t v2491 = vaddq_s16(v2491_tmp, v2490); + int16x8_t v2492 = vaddq_s16(v2489, v2491); + int16x8_t v2493 = vqrdmulhq_n_s16(v2492, 20398); + int16x8_t v2494 = vaddq_s16(v2488, v2493); + int16x8_t v2495 = vsubq_s16(v1583, v1588); + int16x8_t v2496 = vsubq_s16(v1593, v1598); + int16x8_t v2497_tmp = vqrdmulhq_n_s16(v2496, 23673); + int16x8_t v2497 = vaddq_s16(v2497_tmp, v2496); + int16x8_t v2498 = vaddq_s16(v2495, v2497); + int16x8_t v2499 = vsubq_s16(v1605, v1610); + int16x8_t v2500 = vsubq_s16(v1615, v1620); + int16x8_t v2501_tmp = vqrdmulhq_n_s16(v2500, 23673); + int16x8_t v2501 = vaddq_s16(v2501_tmp, v2500); + int16x8_t v2502 = vaddq_s16(v2499, v2501); + int16x8_t v2503 = vqrdmulhq_n_s16(v2502, 20398); + int16x8_t v2504 = vaddq_s16(v2498, v2503); + int16x8_t v2505 = vqrdmulhq_n_s16(v2504, 17255); + int16x8_t v2506 = vaddq_s16(v2494, v2505); + int16x8_t v2507 = vsubq_s16(v1631, v1636); + int16x8_t v2508 = vsubq_s16(v1641, v1646); + int16x8_t v2509_tmp = vqrdmulhq_n_s16(v2508, 23673); + int16x8_t v2509 = vaddq_s16(v2509_tmp, v2508); + int16x8_t v2510 = vaddq_s16(v2507, v2509); + int16x8_t v2511 = vsubq_s16(v1653, v1658); + int16x8_t v2512 = vsubq_s16(v1663, v1668); + int16x8_t v2513_tmp = vqrdmulhq_n_s16(v2512, 23673); + int16x8_t v2513 = vaddq_s16(v2513_tmp, v2512); + int16x8_t v2514 = vaddq_s16(v2511, v2513); + int16x8_t v2515 = vqrdmulhq_n_s16(v2514, 20398); + int16x8_t v2516 = vaddq_s16(v2510, v2515); + int16x8_t v2517 = vsubq_s16(v1677, v1682); + int16x8_t v2518 = vsubq_s16(v1687, v1692); + int16x8_t v2519_tmp = vqrdmulhq_n_s16(v2518, 23673); + int16x8_t v2519 = vaddq_s16(v2519_tmp, v2518); + int16x8_t v2520 = vaddq_s16(v2517, v2519); + int16x8_t v2521 = vsubq_s16(v1699, v1704); + int16x8_t v2522 = vsubq_s16(v1709, v1714); + int16x8_t v2523_tmp = vqrdmulhq_n_s16(v2522, 23673); + int16x8_t v2523 = vaddq_s16(v2523_tmp, v2522); + int16x8_t v2524 = vaddq_s16(v2521, v2523); + int16x8_t v2525 = vqrdmulhq_n_s16(v2524, 20398); + int16x8_t v2526 = vaddq_s16(v2520, v2525); + int16x8_t v2527 = vqrdmulhq_n_s16(v2526, 17255); + int16x8_t v2528 = vaddq_s16(v2516, v2527); + int16x8_t v2529 = vqrdmulhq_n_s16(v2528, 16595); + int16x8_t v2530 = vaddq_s16(v2506, v2529); + int16x8_t v2531 = vsubq_s16(v1727, v1732); + int16x8_t v2532 = vsubq_s16(v1737, v1742); + int16x8_t v2533_tmp = vqrdmulhq_n_s16(v2532, 23673); + int16x8_t v2533 = vaddq_s16(v2533_tmp, v2532); + int16x8_t v2534 = vaddq_s16(v2531, v2533); + int16x8_t v2535 = vsubq_s16(v1749, v1754); + int16x8_t v2536 = vsubq_s16(v1759, v1764); + int16x8_t v2537_tmp = vqrdmulhq_n_s16(v2536, 23673); + int16x8_t v2537 = vaddq_s16(v2537_tmp, v2536); + int16x8_t v2538 = vaddq_s16(v2535, v2537); + int16x8_t v2539 = vqrdmulhq_n_s16(v2538, 20398); + int16x8_t v2540 = vaddq_s16(v2534, v2539); + int16x8_t v2541 = vsubq_s16(v1773, v1778); + int16x8_t v2542 = vsubq_s16(v1783, v1788); + int16x8_t v2543_tmp = vqrdmulhq_n_s16(v2542, 23673); + int16x8_t v2543 = vaddq_s16(v2543_tmp, v2542); + int16x8_t v2544 = vaddq_s16(v2541, v2543); + int16x8_t v2545 = vsubq_s16(v1795, v1800); + int16x8_t v2546 = vsubq_s16(v1805, v1810); + int16x8_t v2547_tmp = vqrdmulhq_n_s16(v2546, 23673); + int16x8_t v2547 = vaddq_s16(v2547_tmp, v2546); + int16x8_t v2548 = vaddq_s16(v2545, v2547); + int16x8_t v2549 = vqrdmulhq_n_s16(v2548, 20398); + int16x8_t v2550 = vaddq_s16(v2544, v2549); + int16x8_t v2551 = vqrdmulhq_n_s16(v2550, 17255); + int16x8_t v2552 = vaddq_s16(v2540, v2551); + int16x8_t v2553 = vsubq_s16(v1821, v1826); + int16x8_t v2554 = vsubq_s16(v1831, v1836); + int16x8_t v2555_tmp = vqrdmulhq_n_s16(v2554, 23673); + int16x8_t v2555 = vaddq_s16(v2555_tmp, v2554); + int16x8_t v2556 = vaddq_s16(v2553, v2555); + int16x8_t v2557 = vsubq_s16(v1843, v1848); + int16x8_t v2558 = vsubq_s16(v1853, v1858); + int16x8_t v2559_tmp = vqrdmulhq_n_s16(v2558, 23673); + int16x8_t v2559 = vaddq_s16(v2559_tmp, v2558); + int16x8_t v2560 = vaddq_s16(v2557, v2559); + int16x8_t v2561 = vqrdmulhq_n_s16(v2560, 20398); + int16x8_t v2562 = vaddq_s16(v2556, v2561); + int16x8_t v2563 = vsubq_s16(v1867, v1872); + int16x8_t v2564 = vsubq_s16(v1877, v1882); + int16x8_t v2565_tmp = vqrdmulhq_n_s16(v2564, 23673); + int16x8_t v2565 = vaddq_s16(v2565_tmp, v2564); + int16x8_t v2566 = vaddq_s16(v2563, v2565); + int16x8_t v2567 = vsubq_s16(v1889, v1894); + int16x8_t v2568 = vsubq_s16(v1899, v1904); + int16x8_t v2569_tmp = vqrdmulhq_n_s16(v2568, 23673); + int16x8_t v2569 = vaddq_s16(v2569_tmp, v2568); + int16x8_t v2570 = vaddq_s16(v2567, v2569); + int16x8_t v2571 = vqrdmulhq_n_s16(v2570, 20398); + int16x8_t v2572 = vaddq_s16(v2566, v2571); + int16x8_t v2573 = vqrdmulhq_n_s16(v2572, 17255); + int16x8_t v2574 = vaddq_s16(v2562, v2573); + int16x8_t v2575 = vqrdmulhq_n_s16(v2574, 16595); + int16x8_t v2576 = vaddq_s16(v2552, v2575); + int16x8_t v2577 = vqrdmulhq_n_s16(v2576, 16436); + int16x8_t v2578 = vaddq_s16(v2530, v2577); + int16x8_t v2579 = vsubq_s16(v9, v24); + int16x8_t v2580 = vsubq_s16(v42, v58); + int16x8_t v2581_tmp = vqrdmulhq_n_s16(v2580, 3314); + int16x8_t v2581 = vmlaq_n_s16(v2581_tmp, v2580, 5); + int16x8_t v2582 = vaddq_s16(v2579, v2581); + int16x8_t v2583 = vsubq_s16(v78, v101); + int16x8_t v2584 = vsubq_s16(v119, v136); + int16x8_t v2585_tmp = vqrdmulhq_n_s16(v2584, 3314); + int16x8_t v2585 = vmlaq_n_s16(v2585_tmp, v2584, 5); + int16x8_t v2586 = vaddq_s16(v2583, v2585); + int16x8_t v2587 = vqrdmulhq_n_s16(v2586, 22112); + int16x8_t v2588 = vaddq_s16(v2582, v2587); + int16x8_t v2589 = vsubq_s16(v158, v181); + int16x8_t v2590 = vsubq_s16(v213, v231); + int16x8_t v2591_tmp = vqrdmulhq_n_s16(v2590, 3314); + int16x8_t v2591 = vmlaq_n_s16(v2591_tmp, v2590, 5); + int16x8_t v2592 = vaddq_s16(v2589, v2591); + int16x8_t v2593 = vsubq_s16(v251, v274); + int16x8_t v2594 = vsubq_s16(v292, v310); + int16x8_t v2595_tmp = vqrdmulhq_n_s16(v2594, 3314); + int16x8_t v2595 = vmlaq_n_s16(v2595_tmp, v2594, 5); + int16x8_t v2596 = vaddq_s16(v2593, v2595); + int16x8_t v2597 = vqrdmulhq_n_s16(v2596, 22112); + int16x8_t v2598 = vaddq_s16(v2592, v2597); + int16x8_t v2599 = vqrdmulhq_n_s16(v2598, 17561); + int16x8_t v2600 = vaddq_s16(v2588, v2599); + int16x8_t v2601 = vsubq_s16(v334, v357); + int16x8_t v2602 = vsubq_s16(v389, v407); + int16x8_t v2603_tmp = vqrdmulhq_n_s16(v2602, 3314); + int16x8_t v2603 = vmlaq_n_s16(v2603_tmp, v2602, 5); + int16x8_t v2604 = vaddq_s16(v2601, v2603); + int16x8_t v2605 = vsubq_s16(v441, v480); + int16x8_t v2606 = vsubq_s16(v498, v517); + int16x8_t v2607_tmp = vqrdmulhq_n_s16(v2606, 3314); + int16x8_t v2607 = vmlaq_n_s16(v2607_tmp, v2606, 5); + int16x8_t v2608 = vaddq_s16(v2605, v2607); + int16x8_t v2609 = vqrdmulhq_n_s16(v2608, 22112); + int16x8_t v2610 = vaddq_s16(v2604, v2609); + int16x8_t v2611 = vsubq_s16(v539, v562); + int16x8_t v2612 = vsubq_s16(v594, v612); + int16x8_t v2613_tmp = vqrdmulhq_n_s16(v2612, 3314); + int16x8_t v2613 = vmlaq_n_s16(v2613_tmp, v2612, 5); + int16x8_t v2614 = vaddq_s16(v2611, v2613); + int16x8_t v2615 = vsubq_s16(v632, v655); + int16x8_t v2616 = vsubq_s16(v673, v692); + int16x8_t v2617_tmp = vqrdmulhq_n_s16(v2616, 3314); + int16x8_t v2617 = vmlaq_n_s16(v2617_tmp, v2616, 5); + int16x8_t v2618 = vaddq_s16(v2615, v2617); + int16x8_t v2619 = vqrdmulhq_n_s16(v2618, 22112); + int16x8_t v2620 = vaddq_s16(v2614, v2619); + int16x8_t v2621 = vqrdmulhq_n_s16(v2620, 17561); + int16x8_t v2622 = vaddq_s16(v2610, v2621); + int16x8_t v2623 = vqrdmulhq_n_s16(v2622, 16666); + int16x8_t v2624 = vaddq_s16(v2600, v2623); + int16x8_t v2625 = vsubq_s16(v718, v741); + int16x8_t v2626 = vsubq_s16(v773, v791); + int16x8_t v2627_tmp = vqrdmulhq_n_s16(v2626, 3314); + int16x8_t v2627 = vmlaq_n_s16(v2627_tmp, v2626, 5); + int16x8_t v2628 = vaddq_s16(v2625, v2627); + int16x8_t v2629 = vsubq_s16(v825, v864); + int16x8_t v2630 = vsubq_s16(v882, v901); + int16x8_t v2631_tmp = vqrdmulhq_n_s16(v2630, 3314); + int16x8_t v2631 = vmlaq_n_s16(v2631_tmp, v2630, 5); + int16x8_t v2632 = vaddq_s16(v2629, v2631); + int16x8_t v2633 = vqrdmulhq_n_s16(v2632, 22112); + int16x8_t v2634 = vaddq_s16(v2628, v2633); + int16x8_t v2635 = vsubq_s16(v937, v976); + int16x8_t v2636 = vsubq_s16(v1036, v1058); + int16x8_t v2637_tmp = vqrdmulhq_n_s16(v2636, 3314); + int16x8_t v2637 = vmlaq_n_s16(v2637_tmp, v2636, 5); + int16x8_t v2638 = vaddq_s16(v2635, v2637); + int16x8_t v2639 = vsubq_s16(v1078, v1101); + int16x8_t v2640 = vsubq_s16(v1119, v1139); + int16x8_t v2641_tmp = vqrdmulhq_n_s16(v2640, 3314); + int16x8_t v2641 = vmlaq_n_s16(v2641_tmp, v2640, 5); + int16x8_t v2642 = vaddq_s16(v2639, v2641); + int16x8_t v2643 = vqrdmulhq_n_s16(v2642, 22112); + int16x8_t v2644 = vaddq_s16(v2638, v2643); + int16x8_t v2645 = vqrdmulhq_n_s16(v2644, 17561); + int16x8_t v2646 = vaddq_s16(v2634, v2645); + int16x8_t v2647 = vsubq_s16(v1163, v1186); + int16x8_t v2648 = vsubq_s16(v1218, v1236); + int16x8_t v2649_tmp = vqrdmulhq_n_s16(v2648, 3314); + int16x8_t v2649 = vmlaq_n_s16(v2649_tmp, v2648, 5); + int16x8_t v2650 = vaddq_s16(v2647, v2649); + int16x8_t v2651 = vsubq_s16(v1270, v1309); + int16x8_t v2652 = vsubq_s16(v1327, v1346); + int16x8_t v2653_tmp = vqrdmulhq_n_s16(v2652, 3314); + int16x8_t v2653 = vmlaq_n_s16(v2653_tmp, v2652, 5); + int16x8_t v2654 = vaddq_s16(v2651, v2653); + int16x8_t v2655 = vqrdmulhq_n_s16(v2654, 22112); + int16x8_t v2656 = vaddq_s16(v2650, v2655); + int16x8_t v2657 = vsubq_s16(v1368, v1391); + int16x8_t v2658 = vsubq_s16(v1423, v1441); + int16x8_t v2659_tmp = vqrdmulhq_n_s16(v2658, 3314); + int16x8_t v2659 = vmlaq_n_s16(v2659_tmp, v2658, 5); + int16x8_t v2660 = vaddq_s16(v2657, v2659); + int16x8_t v2661 = vsubq_s16(v1461, v1484); + int16x8_t v2662 = vsubq_s16(v1502, v1522); + int16x8_t v2663_tmp = vqrdmulhq_n_s16(v2662, 3314); + int16x8_t v2663 = vmlaq_n_s16(v2663_tmp, v2662, 5); + int16x8_t v2664 = vaddq_s16(v2661, v2663); + int16x8_t v2665 = vqrdmulhq_n_s16(v2664, 22112); + int16x8_t v2666 = vaddq_s16(v2660, v2665); + int16x8_t v2667 = vqrdmulhq_n_s16(v2666, 17561); + int16x8_t v2668 = vaddq_s16(v2656, v2667); + int16x8_t v2669 = vqrdmulhq_n_s16(v2668, 16666); + int16x8_t v2670 = vaddq_s16(v2646, v2669); + int16x8_t v2671 = vqrdmulhq_n_s16(v2670, 16454); + int16x8_t v2672 = vaddq_s16(v2624, v2671); + int16x8_t v2673 = vsubq_s16(v2579, v2581); + int16x8_t v2674 = vsubq_s16(v2583, v2585); + int16x8_t v2675 = vqrdmulhq_n_s16(v2674, 24397); + int16x8_t v2676 = vaddq_s16(v2673, v2675); + int16x8_t v2677 = vsubq_s16(v2589, v2591); + int16x8_t v2678 = vsubq_s16(v2593, v2595); + int16x8_t v2679 = vqrdmulhq_n_s16(v2678, 24397); + int16x8_t v2680 = vaddq_s16(v2677, v2679); + int16x8_t v2681 = vqrdmulhq_n_s16(v2680, 17921); + int16x8_t v2682 = vaddq_s16(v2676, v2681); + int16x8_t v2683 = vsubq_s16(v2601, v2603); + int16x8_t v2684 = vsubq_s16(v2605, v2607); + int16x8_t v2685 = vqrdmulhq_n_s16(v2684, 24397); + int16x8_t v2686 = vaddq_s16(v2683, v2685); + int16x8_t v2687 = vsubq_s16(v2611, v2613); + int16x8_t v2688 = vsubq_s16(v2615, v2617); + int16x8_t v2689 = vqrdmulhq_n_s16(v2688, 24397); + int16x8_t v2690 = vaddq_s16(v2687, v2689); + int16x8_t v2691 = vqrdmulhq_n_s16(v2690, 17921); + int16x8_t v2692 = vaddq_s16(v2686, v2691); + int16x8_t v2693 = vqrdmulhq_n_s16(v2692, 16747); + int16x8_t v2694 = vaddq_s16(v2682, v2693); + int16x8_t v2695 = vsubq_s16(v2625, v2627); + int16x8_t v2696 = vsubq_s16(v2629, v2631); + int16x8_t v2697 = vqrdmulhq_n_s16(v2696, 24397); + int16x8_t v2698 = vaddq_s16(v2695, v2697); + int16x8_t v2699 = vsubq_s16(v2635, v2637); + int16x8_t v2700 = vsubq_s16(v2639, v2641); + int16x8_t v2701 = vqrdmulhq_n_s16(v2700, 24397); + int16x8_t v2702 = vaddq_s16(v2699, v2701); + int16x8_t v2703 = vqrdmulhq_n_s16(v2702, 17921); + int16x8_t v2704 = vaddq_s16(v2698, v2703); + int16x8_t v2705 = vsubq_s16(v2647, v2649); + int16x8_t v2706 = vsubq_s16(v2651, v2653); + int16x8_t v2707 = vqrdmulhq_n_s16(v2706, 24397); + int16x8_t v2708 = vaddq_s16(v2705, v2707); + int16x8_t v2709 = vsubq_s16(v2657, v2659); + int16x8_t v2710 = vsubq_s16(v2661, v2663); + int16x8_t v2711 = vqrdmulhq_n_s16(v2710, 24397); + int16x8_t v2712 = vaddq_s16(v2709, v2711); + int16x8_t v2713 = vqrdmulhq_n_s16(v2712, 17921); + int16x8_t v2714 = vaddq_s16(v2708, v2713); + int16x8_t v2715 = vqrdmulhq_n_s16(v2714, 16747); + int16x8_t v2716 = vaddq_s16(v2704, v2715); + int16x8_t v2717 = vqrdmulhq_n_s16(v2716, 16474); + int16x8_t v2718 = vaddq_s16(v2694, v2717); + int16x8_t v2719 = vsubq_s16(v2485, v2487); + int16x8_t v2720 = vsubq_s16(v2489, v2491); + int16x8_t v2721 = vqrdmulhq_n_s16(v2720, 27504); + int16x8_t v2722 = vaddq_s16(v2719, v2721); + int16x8_t v2723 = vsubq_s16(v2495, v2497); + int16x8_t v2724 = vsubq_s16(v2499, v2501); + int16x8_t v2725 = vqrdmulhq_n_s16(v2724, 27504); + int16x8_t v2726 = vaddq_s16(v2723, v2725); + int16x8_t v2727 = vqrdmulhq_n_s16(v2726, 18343); + int16x8_t v2728 = vaddq_s16(v2722, v2727); + int16x8_t v2729 = vsubq_s16(v2507, v2509); + int16x8_t v2730 = vsubq_s16(v2511, v2513); + int16x8_t v2731 = vqrdmulhq_n_s16(v2730, 27504); + int16x8_t v2732 = vaddq_s16(v2729, v2731); + int16x8_t v2733 = vsubq_s16(v2517, v2519); + int16x8_t v2734 = vsubq_s16(v2521, v2523); + int16x8_t v2735 = vqrdmulhq_n_s16(v2734, 27504); + int16x8_t v2736 = vaddq_s16(v2733, v2735); + int16x8_t v2737 = vqrdmulhq_n_s16(v2736, 18343); + int16x8_t v2738 = vaddq_s16(v2732, v2737); + int16x8_t v2739 = vqrdmulhq_n_s16(v2738, 16840); + int16x8_t v2740 = vaddq_s16(v2728, v2739); + int16x8_t v2741 = vsubq_s16(v2531, v2533); + int16x8_t v2742 = vsubq_s16(v2535, v2537); + int16x8_t v2743 = vqrdmulhq_n_s16(v2742, 27504); + int16x8_t v2744 = vaddq_s16(v2741, v2743); + int16x8_t v2745 = vsubq_s16(v2541, v2543); + int16x8_t v2746 = vsubq_s16(v2545, v2547); + int16x8_t v2747 = vqrdmulhq_n_s16(v2746, 27504); + int16x8_t v2748 = vaddq_s16(v2745, v2747); + int16x8_t v2749 = vqrdmulhq_n_s16(v2748, 18343); + int16x8_t v2750 = vaddq_s16(v2744, v2749); + int16x8_t v2751 = vsubq_s16(v2553, v2555); + int16x8_t v2752 = vsubq_s16(v2557, v2559); + int16x8_t v2753 = vqrdmulhq_n_s16(v2752, 27504); + int16x8_t v2754 = vaddq_s16(v2751, v2753); + int16x8_t v2755 = vsubq_s16(v2563, v2565); + int16x8_t v2756 = vsubq_s16(v2567, v2569); + int16x8_t v2757 = vqrdmulhq_n_s16(v2756, 27504); + int16x8_t v2758 = vaddq_s16(v2755, v2757); + int16x8_t v2759 = vqrdmulhq_n_s16(v2758, 18343); + int16x8_t v2760 = vaddq_s16(v2754, v2759); + int16x8_t v2761 = vqrdmulhq_n_s16(v2760, 16840); + int16x8_t v2762 = vaddq_s16(v2750, v2761); + int16x8_t v2763 = vqrdmulhq_n_s16(v2762, 16496); + int16x8_t v2764 = vaddq_s16(v2740, v2763); + int16x8_t v2765 = vsubq_s16(v2390, v2392); + int16x8_t v2766 = vsubq_s16(v2394, v2396); + int16x8_t v2767 = vqrdmulhq_n_s16(v2766, 31869); + int16x8_t v2768 = vaddq_s16(v2765, v2767); + int16x8_t v2769 = vsubq_s16(v2400, v2402); + int16x8_t v2770 = vsubq_s16(v2404, v2406); + int16x8_t v2771 = vqrdmulhq_n_s16(v2770, 31869); + int16x8_t v2772 = vaddq_s16(v2769, v2771); + int16x8_t v2773 = vqrdmulhq_n_s16(v2772, 18830); + int16x8_t v2774 = vaddq_s16(v2768, v2773); + int16x8_t v2775 = vsubq_s16(v2412, v2414); + int16x8_t v2776 = vsubq_s16(v2416, v2418); + int16x8_t v2777 = vqrdmulhq_n_s16(v2776, 31869); + int16x8_t v2778 = vaddq_s16(v2775, v2777); + int16x8_t v2779 = vsubq_s16(v2422, v2424); + int16x8_t v2780 = vsubq_s16(v2426, v2428); + int16x8_t v2781 = vqrdmulhq_n_s16(v2780, 31869); + int16x8_t v2782 = vaddq_s16(v2779, v2781); + int16x8_t v2783 = vqrdmulhq_n_s16(v2782, 18830); + int16x8_t v2784 = vaddq_s16(v2778, v2783); + int16x8_t v2785 = vqrdmulhq_n_s16(v2784, 16944); + int16x8_t v2786 = vaddq_s16(v2774, v2785); + int16x8_t v2787 = vsubq_s16(v2436, v2438); + int16x8_t v2788 = vsubq_s16(v2440, v2442); + int16x8_t v2789 = vqrdmulhq_n_s16(v2788, 31869); + int16x8_t v2790 = vaddq_s16(v2787, v2789); + int16x8_t v2791 = vsubq_s16(v2446, v2448); + int16x8_t v2792 = vsubq_s16(v2450, v2452); + int16x8_t v2793 = vqrdmulhq_n_s16(v2792, 31869); + int16x8_t v2794 = vaddq_s16(v2791, v2793); + int16x8_t v2795 = vqrdmulhq_n_s16(v2794, 18830); + int16x8_t v2796 = vaddq_s16(v2790, v2795); + int16x8_t v2797 = vsubq_s16(v2458, v2460); + int16x8_t v2798 = vsubq_s16(v2462, v2465); + int16x8_t v2799 = vqrdmulhq_n_s16(v2798, 31869); + int16x8_t v2800 = vaddq_s16(v2797, v2799); + int16x8_t v2801 = vsubq_s16(v2469, v2471); + int16x8_t v2802 = vsubq_s16(v2473, v2475); + int16x8_t v2803 = vqrdmulhq_n_s16(v2802, 31869); + int16x8_t v2804 = vaddq_s16(v2801, v2803); + int16x8_t v2805 = vqrdmulhq_n_s16(v2804, 18830); + int16x8_t v2806 = vaddq_s16(v2800, v2805); + int16x8_t v2807 = vqrdmulhq_n_s16(v2806, 16944); + int16x8_t v2808 = vaddq_s16(v2796, v2807); + int16x8_t v2809 = vqrdmulhq_n_s16(v2808, 16521); + int16x8_t v2810 = vaddq_s16(v2786, v2809); + int16x8_t v2811 = vsubq_s16(v2296, v2298); + int16x8_t v2812 = vsubq_s16(v2300, v2302); + int16x8_t v2813_tmp = vqrdmulhq_n_s16(v2812, 5552); + int16x8_t v2813 = vaddq_s16(v2813_tmp, v2812); + int16x8_t v2814 = vaddq_s16(v2811, v2813); + int16x8_t v2815 = vsubq_s16(v2306, v2308); + int16x8_t v2816 = vsubq_s16(v2310, v2312); + int16x8_t v2817_tmp = vqrdmulhq_n_s16(v2816, 5552); + int16x8_t v2817 = vaddq_s16(v2817_tmp, v2816); + int16x8_t v2818 = vaddq_s16(v2815, v2817); + int16x8_t v2819 = vqrdmulhq_n_s16(v2818, 19393); + int16x8_t v2820 = vaddq_s16(v2814, v2819); + int16x8_t v2821 = vsubq_s16(v2318, v2320); + int16x8_t v2822 = vsubq_s16(v2322, v2324); + int16x8_t v2823_tmp = vqrdmulhq_n_s16(v2822, 5552); + int16x8_t v2823 = vaddq_s16(v2823_tmp, v2822); + int16x8_t v2824 = vaddq_s16(v2821, v2823); + int16x8_t v2825 = vsubq_s16(v2328, v2330); + int16x8_t v2826 = vsubq_s16(v2332, v2334); + int16x8_t v2827_tmp = vqrdmulhq_n_s16(v2826, 5552); + int16x8_t v2827 = vaddq_s16(v2827_tmp, v2826); + int16x8_t v2828 = vaddq_s16(v2825, v2827); + int16x8_t v2829 = vqrdmulhq_n_s16(v2828, 19393); + int16x8_t v2830 = vaddq_s16(v2824, v2829); + int16x8_t v2831 = vqrdmulhq_n_s16(v2830, 17059); + int16x8_t v2832 = vaddq_s16(v2820, v2831); + int16x8_t v2833 = vsubq_s16(v2342, v2344); + int16x8_t v2834 = vsubq_s16(v2346, v2348); + int16x8_t v2835_tmp = vqrdmulhq_n_s16(v2834, 5552); + int16x8_t v2835 = vaddq_s16(v2835_tmp, v2834); + int16x8_t v2836 = vaddq_s16(v2833, v2835); + int16x8_t v2837 = vsubq_s16(v2352, v2354); + int16x8_t v2838 = vsubq_s16(v2356, v2358); + int16x8_t v2839_tmp = vqrdmulhq_n_s16(v2838, 5552); + int16x8_t v2839 = vaddq_s16(v2839_tmp, v2838); + int16x8_t v2840 = vaddq_s16(v2837, v2839); + int16x8_t v2841 = vqrdmulhq_n_s16(v2840, 19393); + int16x8_t v2842 = vaddq_s16(v2836, v2841); + int16x8_t v2843 = vsubq_s16(v2364, v2366); + int16x8_t v2844 = vsubq_s16(v2368, v2370); + int16x8_t v2845_tmp = vqrdmulhq_n_s16(v2844, 5552); + int16x8_t v2845 = vaddq_s16(v2845_tmp, v2844); + int16x8_t v2846 = vaddq_s16(v2843, v2845); + int16x8_t v2847 = vsubq_s16(v2374, v2376); + int16x8_t v2848 = vsubq_s16(v2378, v2380); + int16x8_t v2849_tmp = vqrdmulhq_n_s16(v2848, 5552); + int16x8_t v2849 = vaddq_s16(v2849_tmp, v2848); + int16x8_t v2850 = vaddq_s16(v2847, v2849); + int16x8_t v2851 = vqrdmulhq_n_s16(v2850, 19393); + int16x8_t v2852 = vaddq_s16(v2846, v2851); + int16x8_t v2853 = vqrdmulhq_n_s16(v2852, 17059); + int16x8_t v2854 = vaddq_s16(v2842, v2853); + int16x8_t v2855 = vqrdmulhq_n_s16(v2854, 16549); + int16x8_t v2856 = vaddq_s16(v2832, v2855); + int16x8_t v2857 = vsubq_s16(v2109, v2114); + int16x8_t v2858 = vsubq_s16(v2119, v2124); + int16x8_t v2859_tmp = vqrdmulhq_n_s16(v2858, 15865); + int16x8_t v2859 = vaddq_s16(v2859_tmp, v2858); + int16x8_t v2860 = vaddq_s16(v2857, v2859); + int16x8_t v2861 = vsubq_s16(v2131, v2136); + int16x8_t v2862 = vsubq_s16(v2141, v2146); + int16x8_t v2863_tmp = vqrdmulhq_n_s16(v2862, 15865); + int16x8_t v2863 = vaddq_s16(v2863_tmp, v2862); + int16x8_t v2864 = vaddq_s16(v2861, v2863); + int16x8_t v2865 = vqrdmulhq_n_s16(v2864, 20040); + int16x8_t v2866 = vaddq_s16(v2860, v2865); + int16x8_t v2867 = vsubq_s16(v2155, v2160); + int16x8_t v2868 = vsubq_s16(v2165, v2170); + int16x8_t v2869_tmp = vqrdmulhq_n_s16(v2868, 15865); + int16x8_t v2869 = vaddq_s16(v2869_tmp, v2868); + int16x8_t v2870 = vaddq_s16(v2867, v2869); + int16x8_t v2871 = vsubq_s16(v2177, v2182); + int16x8_t v2872 = vsubq_s16(v2187, v2192); + int16x8_t v2873_tmp = vqrdmulhq_n_s16(v2872, 15865); + int16x8_t v2873 = vaddq_s16(v2873_tmp, v2872); + int16x8_t v2874 = vaddq_s16(v2871, v2873); + int16x8_t v2875 = vqrdmulhq_n_s16(v2874, 20040); + int16x8_t v2876 = vaddq_s16(v2870, v2875); + int16x8_t v2877 = vqrdmulhq_n_s16(v2876, 17187); + int16x8_t v2878 = vaddq_s16(v2866, v2877); + int16x8_t v2879 = vsubq_s16(v2203, v2208); + int16x8_t v2880 = vsubq_s16(v2213, v2218); + int16x8_t v2881_tmp = vqrdmulhq_n_s16(v2880, 15865); + int16x8_t v2881 = vaddq_s16(v2881_tmp, v2880); + int16x8_t v2882 = vaddq_s16(v2879, v2881); + int16x8_t v2883 = vsubq_s16(v2225, v2230); + int16x8_t v2884 = vsubq_s16(v2235, v2240); + int16x8_t v2885_tmp = vqrdmulhq_n_s16(v2884, 15865); + int16x8_t v2885 = vaddq_s16(v2885_tmp, v2884); + int16x8_t v2886 = vaddq_s16(v2883, v2885); + int16x8_t v2887 = vqrdmulhq_n_s16(v2886, 20040); + int16x8_t v2888 = vaddq_s16(v2882, v2887); + int16x8_t v2889 = vsubq_s16(v2249, v2254); + int16x8_t v2890 = vsubq_s16(v2259, v2264); + int16x8_t v2891_tmp = vqrdmulhq_n_s16(v2890, 15865); + int16x8_t v2891 = vaddq_s16(v2891_tmp, v2890); + int16x8_t v2892 = vaddq_s16(v2889, v2891); + int16x8_t v2893 = vsubq_s16(v2271, v2276); + int16x8_t v2894 = vsubq_s16(v2281, v2286); + int16x8_t v2895_tmp = vqrdmulhq_n_s16(v2894, 15865); + int16x8_t v2895 = vaddq_s16(v2895_tmp, v2894); + int16x8_t v2896 = vaddq_s16(v2893, v2895); + int16x8_t v2897 = vqrdmulhq_n_s16(v2896, 20040); + int16x8_t v2898 = vaddq_s16(v2892, v2897); + int16x8_t v2899 = vqrdmulhq_n_s16(v2898, 17187); + int16x8_t v2900 = vaddq_s16(v2888, v2899); + int16x8_t v2901 = vqrdmulhq_n_s16(v2900, 16579); + int16x8_t v2902 = vaddq_s16(v2878, v2901); + int16x8_t v2903 = vsubq_s16(v1919, v1924); + int16x8_t v2904 = vsubq_s16(v1929, v1934); + int16x8_t v2905_tmp = vqrdmulhq_n_s16(v2904, 1893); + int16x8_t v2905 = vmlaq_n_s16(v2905_tmp, v2904, 2); + int16x8_t v2906 = vaddq_s16(v2903, v2905); + int16x8_t v2907 = vsubq_s16(v1941, v1946); + int16x8_t v2908 = vsubq_s16(v1951, v1956); + int16x8_t v2909_tmp = vqrdmulhq_n_s16(v2908, 1893); + int16x8_t v2909 = vmlaq_n_s16(v2909_tmp, v2908, 2); + int16x8_t v2910 = vaddq_s16(v2907, v2909); + int16x8_t v2911 = vqrdmulhq_n_s16(v2910, 20783); + int16x8_t v2912 = vaddq_s16(v2906, v2911); + int16x8_t v2913 = vsubq_s16(v1965, v1970); + int16x8_t v2914 = vsubq_s16(v1975, v1980); + int16x8_t v2915_tmp = vqrdmulhq_n_s16(v2914, 1893); + int16x8_t v2915 = vmlaq_n_s16(v2915_tmp, v2914, 2); + int16x8_t v2916 = vaddq_s16(v2913, v2915); + int16x8_t v2917 = vsubq_s16(v1987, v1992); + int16x8_t v2918 = vsubq_s16(v1997, v2002); + int16x8_t v2919_tmp = vqrdmulhq_n_s16(v2918, 1893); + int16x8_t v2919 = vmlaq_n_s16(v2919_tmp, v2918, 2); + int16x8_t v2920 = vaddq_s16(v2917, v2919); + int16x8_t v2921 = vqrdmulhq_n_s16(v2920, 20783); + int16x8_t v2922 = vaddq_s16(v2916, v2921); + int16x8_t v2923 = vqrdmulhq_n_s16(v2922, 17326); + int16x8_t v2924 = vaddq_s16(v2912, v2923); + int16x8_t v2925 = vsubq_s16(v2013, v2018); + int16x8_t v2926 = vsubq_s16(v2023, v2028); + int16x8_t v2927_tmp = vqrdmulhq_n_s16(v2926, 1893); + int16x8_t v2927 = vmlaq_n_s16(v2927_tmp, v2926, 2); + int16x8_t v2928 = vaddq_s16(v2925, v2927); + int16x8_t v2929 = vsubq_s16(v2035, v2040); + int16x8_t v2930 = vsubq_s16(v2045, v2050); + int16x8_t v2931_tmp = vqrdmulhq_n_s16(v2930, 1893); + int16x8_t v2931 = vmlaq_n_s16(v2931_tmp, v2930, 2); + int16x8_t v2932 = vaddq_s16(v2929, v2931); + int16x8_t v2933 = vqrdmulhq_n_s16(v2932, 20783); + int16x8_t v2934 = vaddq_s16(v2928, v2933); + int16x8_t v2935 = vsubq_s16(v2059, v2064); + int16x8_t v2936 = vsubq_s16(v2069, v2074); + int16x8_t v2937_tmp = vqrdmulhq_n_s16(v2936, 1893); + int16x8_t v2937 = vmlaq_n_s16(v2937_tmp, v2936, 2); + int16x8_t v2938 = vaddq_s16(v2935, v2937); + int16x8_t v2939 = vsubq_s16(v2081, v2086); + int16x8_t v2940 = vsubq_s16(v2091, v2096); + int16x8_t v2941_tmp = vqrdmulhq_n_s16(v2940, 1893); + int16x8_t v2941 = vmlaq_n_s16(v2941_tmp, v2940, 2); + int16x8_t v2942 = vaddq_s16(v2939, v2941); + int16x8_t v2943 = vqrdmulhq_n_s16(v2942, 20783); + int16x8_t v2944 = vaddq_s16(v2938, v2943); + int16x8_t v2945 = vqrdmulhq_n_s16(v2944, 17326); + int16x8_t v2946 = vaddq_s16(v2934, v2945); + int16x8_t v2947 = vqrdmulhq_n_s16(v2946, 16611); + int16x8_t v2948 = vaddq_s16(v2924, v2947); + int16x8_t v2949 = vsubq_s16(v1543, v1554); + int16x8_t v2950 = vsubq_s16(v1565, v1576); + int16x8_t v2951_tmp = vqrdmulhq_n_s16(v2950, 13357); + int16x8_t v2951 = vmlaq_n_s16(v2951_tmp, v2950, 3); + int16x8_t v2952 = vaddq_s16(v2949, v2951); + int16x8_t v2953 = vsubq_s16(v1589, v1600); + int16x8_t v2954 = vsubq_s16(v1611, v1622); + int16x8_t v2955_tmp = vqrdmulhq_n_s16(v2954, 13357); + int16x8_t v2955 = vmlaq_n_s16(v2955_tmp, v2954, 3); + int16x8_t v2956 = vaddq_s16(v2953, v2955); + int16x8_t v2957 = vqrdmulhq_n_s16(v2956, 21637); + int16x8_t v2958 = vaddq_s16(v2952, v2957); + int16x8_t v2959 = vsubq_s16(v1637, v1648); + int16x8_t v2960 = vsubq_s16(v1659, v1670); + int16x8_t v2961_tmp = vqrdmulhq_n_s16(v2960, 13357); + int16x8_t v2961 = vmlaq_n_s16(v2961_tmp, v2960, 3); + int16x8_t v2962 = vaddq_s16(v2959, v2961); + int16x8_t v2963 = vsubq_s16(v1683, v1694); + int16x8_t v2964 = vsubq_s16(v1705, v1716); + int16x8_t v2965_tmp = vqrdmulhq_n_s16(v2964, 13357); + int16x8_t v2965 = vmlaq_n_s16(v2965_tmp, v2964, 3); + int16x8_t v2966 = vaddq_s16(v2963, v2965); + int16x8_t v2967 = vqrdmulhq_n_s16(v2966, 21637); + int16x8_t v2968 = vaddq_s16(v2962, v2967); + int16x8_t v2969 = vqrdmulhq_n_s16(v2968, 17479); + int16x8_t v2970 = vaddq_s16(v2958, v2969); + int16x8_t v2971 = vsubq_s16(v1733, v1744); + int16x8_t v2972 = vsubq_s16(v1755, v1766); + int16x8_t v2973_tmp = vqrdmulhq_n_s16(v2972, 13357); + int16x8_t v2973 = vmlaq_n_s16(v2973_tmp, v2972, 3); + int16x8_t v2974 = vaddq_s16(v2971, v2973); + int16x8_t v2975 = vsubq_s16(v1779, v1790); + int16x8_t v2976 = vsubq_s16(v1801, v1812); + int16x8_t v2977_tmp = vqrdmulhq_n_s16(v2976, 13357); + int16x8_t v2977 = vmlaq_n_s16(v2977_tmp, v2976, 3); + int16x8_t v2978 = vaddq_s16(v2975, v2977); + int16x8_t v2979 = vqrdmulhq_n_s16(v2978, 21637); + int16x8_t v2980 = vaddq_s16(v2974, v2979); + int16x8_t v2981 = vsubq_s16(v1827, v1838); + int16x8_t v2982 = vsubq_s16(v1849, v1860); + int16x8_t v2983_tmp = vqrdmulhq_n_s16(v2982, 13357); + int16x8_t v2983 = vmlaq_n_s16(v2983_tmp, v2982, 3); + int16x8_t v2984 = vaddq_s16(v2981, v2983); + int16x8_t v2985 = vsubq_s16(v1873, v1884); + int16x8_t v2986 = vsubq_s16(v1895, v1906); + int16x8_t v2987_tmp = vqrdmulhq_n_s16(v2986, 13357); + int16x8_t v2987 = vmlaq_n_s16(v2987_tmp, v2986, 3); + int16x8_t v2988 = vaddq_s16(v2985, v2987); + int16x8_t v2989 = vqrdmulhq_n_s16(v2988, 21637); + int16x8_t v2990 = vaddq_s16(v2984, v2989); + int16x8_t v2991 = vqrdmulhq_n_s16(v2990, 17479); + int16x8_t v2992 = vaddq_s16(v2980, v2991); + int16x8_t v2993 = vqrdmulhq_n_s16(v2992, 16647); + int16x8_t v2994 = vaddq_s16(v2970, v2993); + int16x8_t v2995 = vsubq_s16(v25, v60); + int16x8_t v2996 = vsubq_s16(v102, v138); + int16x8_t v2997_tmp = vqrdmulhq_n_s16(v2996, 6226); + int16x8_t v2997 = vmlaq_n_s16(v2997_tmp, v2996, 10); + int16x8_t v2998 = vaddq_s16(v2995, v2997); + int16x8_t v2999 = vsubq_s16(v182, v233); + int16x8_t v3000 = vsubq_s16(v275, v312); + int16x8_t v3001_tmp = vqrdmulhq_n_s16(v3000, 6226); + int16x8_t v3001 = vmlaq_n_s16(v3001_tmp, v3000, 10); + int16x8_t v3002 = vaddq_s16(v2999, v3001); + int16x8_t v3003 = vqrdmulhq_n_s16(v3002, 22622); + int16x8_t v3004 = vaddq_s16(v2998, v3003); + int16x8_t v3005 = vsubq_s16(v358, v409); + int16x8_t v3006 = vsubq_s16(v481, v519); + int16x8_t v3007_tmp = vqrdmulhq_n_s16(v3006, 6226); + int16x8_t v3007 = vmlaq_n_s16(v3007_tmp, v3006, 10); + int16x8_t v3008 = vaddq_s16(v3005, v3007); + int16x8_t v3009 = vsubq_s16(v563, v614); + int16x8_t v3010 = vsubq_s16(v656, v694); + int16x8_t v3011_tmp = vqrdmulhq_n_s16(v3010, 6226); + int16x8_t v3011 = vmlaq_n_s16(v3011_tmp, v3010, 10); + int16x8_t v3012 = vaddq_s16(v3009, v3011); + int16x8_t v3013 = vqrdmulhq_n_s16(v3012, 22622); + int16x8_t v3014 = vaddq_s16(v3008, v3013); + int16x8_t v3015 = vqrdmulhq_n_s16(v3014, 17646); + int16x8_t v3016 = vaddq_s16(v3004, v3015); + int16x8_t v3017 = vsubq_s16(v742, v793); + int16x8_t v3018 = vsubq_s16(v865, v903); + int16x8_t v3019_tmp = vqrdmulhq_n_s16(v3018, 6226); + int16x8_t v3019 = vmlaq_n_s16(v3019_tmp, v3018, 10); + int16x8_t v3020 = vaddq_s16(v3017, v3019); + int16x8_t v3021 = vsubq_s16(v977, v1060); + int16x8_t v3022 = vsubq_s16(v1102, v1141); + int16x8_t v3023_tmp = vqrdmulhq_n_s16(v3022, 6226); + int16x8_t v3023 = vmlaq_n_s16(v3023_tmp, v3022, 10); + int16x8_t v3024 = vaddq_s16(v3021, v3023); + int16x8_t v3025 = vqrdmulhq_n_s16(v3024, 22622); + int16x8_t v3026 = vaddq_s16(v3020, v3025); + int16x8_t v3027 = vsubq_s16(v1187, v1238); + int16x8_t v3028 = vsubq_s16(v1310, v1348); + int16x8_t v3029_tmp = vqrdmulhq_n_s16(v3028, 6226); + int16x8_t v3029 = vmlaq_n_s16(v3029_tmp, v3028, 10); + int16x8_t v3030 = vaddq_s16(v3027, v3029); + int16x8_t v3031 = vsubq_s16(v1392, v1443); + int16x8_t v3032 = vsubq_s16(v1485, v1524); + int16x8_t v3033_tmp = vqrdmulhq_n_s16(v3032, 6226); + int16x8_t v3033 = vmlaq_n_s16(v3033_tmp, v3032, 10); + int16x8_t v3034 = vaddq_s16(v3031, v3033); + int16x8_t v3035 = vqrdmulhq_n_s16(v3034, 22622); + int16x8_t v3036 = vaddq_s16(v3030, v3035); + int16x8_t v3037 = vqrdmulhq_n_s16(v3036, 17646); + int16x8_t v3038 = vaddq_s16(v3026, v3037); + int16x8_t v3039 = vqrdmulhq_n_s16(v3038, 16685); + int16x8_t v3040 = vaddq_s16(v3016, v3039); + int16x8_t v3041 = vsubq_s16(v2995, v2997); + int16x8_t v3042 = vsubq_s16(v2999, v3001); + int16x8_t v3043 = vqrdmulhq_n_s16(v3042, 23761); + int16x8_t v3044 = vaddq_s16(v3041, v3043); + int16x8_t v3045 = vsubq_s16(v3005, v3007); + int16x8_t v3046 = vsubq_s16(v3009, v3011); + int16x8_t v3047 = vqrdmulhq_n_s16(v3046, 23761); + int16x8_t v3048 = vaddq_s16(v3045, v3047); + int16x8_t v3049 = vqrdmulhq_n_s16(v3048, 17826); + int16x8_t v3050 = vaddq_s16(v3044, v3049); + int16x8_t v3051 = vsubq_s16(v3017, v3019); + int16x8_t v3052 = vsubq_s16(v3021, v3023); + int16x8_t v3053 = vqrdmulhq_n_s16(v3052, 23761); + int16x8_t v3054 = vaddq_s16(v3051, v3053); + int16x8_t v3055 = vsubq_s16(v3027, v3029); + int16x8_t v3056 = vsubq_s16(v3031, v3033); + int16x8_t v3057 = vqrdmulhq_n_s16(v3056, 23761); + int16x8_t v3058 = vaddq_s16(v3055, v3057); + int16x8_t v3059 = vqrdmulhq_n_s16(v3058, 17826); + int16x8_t v3060 = vaddq_s16(v3054, v3059); + int16x8_t v3061 = vqrdmulhq_n_s16(v3060, 16726); + int16x8_t v3062 = vaddq_s16(v3050, v3061); + int16x8_t v3063 = vsubq_s16(v2949, v2951); + int16x8_t v3064 = vsubq_s16(v2953, v2955); + int16x8_t v3065 = vqrdmulhq_n_s16(v3064, 25084); + int16x8_t v3066 = vaddq_s16(v3063, v3065); + int16x8_t v3067 = vsubq_s16(v2959, v2961); + int16x8_t v3068 = vsubq_s16(v2963, v2965); + int16x8_t v3069 = vqrdmulhq_n_s16(v3068, 25084); + int16x8_t v3070 = vaddq_s16(v3067, v3069); + int16x8_t v3071 = vqrdmulhq_n_s16(v3070, 18021); + int16x8_t v3072 = vaddq_s16(v3066, v3071); + int16x8_t v3073 = vsubq_s16(v2971, v2973); + int16x8_t v3074 = vsubq_s16(v2975, v2977); + int16x8_t v3075 = vqrdmulhq_n_s16(v3074, 25084); + int16x8_t v3076 = vaddq_s16(v3073, v3075); + int16x8_t v3077 = vsubq_s16(v2981, v2983); + int16x8_t v3078 = vsubq_s16(v2985, v2987); + int16x8_t v3079 = vqrdmulhq_n_s16(v3078, 25084); + int16x8_t v3080 = vaddq_s16(v3077, v3079); + int16x8_t v3081 = vqrdmulhq_n_s16(v3080, 18021); + int16x8_t v3082 = vaddq_s16(v3076, v3081); + int16x8_t v3083 = vqrdmulhq_n_s16(v3082, 16769); + int16x8_t v3084 = vaddq_s16(v3072, v3083); + int16x8_t v3085 = vsubq_s16(v2903, v2905); + int16x8_t v3086 = vsubq_s16(v2907, v2909); + int16x8_t v3087 = vqrdmulhq_n_s16(v3086, 26631); + int16x8_t v3088 = vaddq_s16(v3085, v3087); + int16x8_t v3089 = vsubq_s16(v2913, v2915); + int16x8_t v3090 = vsubq_s16(v2917, v2919); + int16x8_t v3091 = vqrdmulhq_n_s16(v3090, 26631); + int16x8_t v3092 = vaddq_s16(v3089, v3091); + int16x8_t v3093 = vqrdmulhq_n_s16(v3092, 18231); + int16x8_t v3094 = vaddq_s16(v3088, v3093); + int16x8_t v3095 = vsubq_s16(v2925, v2927); + int16x8_t v3096 = vsubq_s16(v2929, v2931); + int16x8_t v3097 = vqrdmulhq_n_s16(v3096, 26631); + int16x8_t v3098 = vaddq_s16(v3095, v3097); + int16x8_t v3099 = vsubq_s16(v2935, v2937); + int16x8_t v3100 = vsubq_s16(v2939, v2941); + int16x8_t v3101 = vqrdmulhq_n_s16(v3100, 26631); + int16x8_t v3102 = vaddq_s16(v3099, v3101); + int16x8_t v3103 = vqrdmulhq_n_s16(v3102, 18231); + int16x8_t v3104 = vaddq_s16(v3098, v3103); + int16x8_t v3105 = vqrdmulhq_n_s16(v3104, 16815); + int16x8_t v3106 = vaddq_s16(v3094, v3105); + int16x8_t v3107 = vsubq_s16(v2857, v2859); + int16x8_t v3108 = vsubq_s16(v2861, v2863); + int16x8_t v3109 = vqrdmulhq_n_s16(v3108, 28454); + int16x8_t v3110 = vaddq_s16(v3107, v3109); + int16x8_t v3111 = vsubq_s16(v2867, v2869); + int16x8_t v3112 = vsubq_s16(v2871, v2873); + int16x8_t v3113 = vqrdmulhq_n_s16(v3112, 28454); + int16x8_t v3114 = vaddq_s16(v3111, v3113); + int16x8_t v3115 = vqrdmulhq_n_s16(v3114, 18458); + int16x8_t v3116 = vaddq_s16(v3110, v3115); + int16x8_t v3117 = vsubq_s16(v2879, v2881); + int16x8_t v3118 = vsubq_s16(v2883, v2885); + int16x8_t v3119 = vqrdmulhq_n_s16(v3118, 28454); + int16x8_t v3120 = vaddq_s16(v3117, v3119); + int16x8_t v3121 = vsubq_s16(v2889, v2891); + int16x8_t v3122 = vsubq_s16(v2893, v2895); + int16x8_t v3123 = vqrdmulhq_n_s16(v3122, 28454); + int16x8_t v3124 = vaddq_s16(v3121, v3123); + int16x8_t v3125 = vqrdmulhq_n_s16(v3124, 18458); + int16x8_t v3126 = vaddq_s16(v3120, v3125); + int16x8_t v3127 = vqrdmulhq_n_s16(v3126, 16865); + int16x8_t v3128 = vaddq_s16(v3116, v3127); + int16x8_t v3129 = vsubq_s16(v2811, v2813); + int16x8_t v3130 = vsubq_s16(v2815, v2817); + int16x8_t v3131 = vqrdmulhq_n_s16(v3130, 30624); + int16x8_t v3132 = vaddq_s16(v3129, v3131); + int16x8_t v3133 = vsubq_s16(v2821, v2823); + int16x8_t v3134 = vsubq_s16(v2825, v2827); + int16x8_t v3135 = vqrdmulhq_n_s16(v3134, 30624); + int16x8_t v3136 = vaddq_s16(v3133, v3135); + int16x8_t v3137 = vqrdmulhq_n_s16(v3136, 18702); + int16x8_t v3138 = vaddq_s16(v3132, v3137); + int16x8_t v3139 = vsubq_s16(v2833, v2835); + int16x8_t v3140 = vsubq_s16(v2837, v2839); + int16x8_t v3141 = vqrdmulhq_n_s16(v3140, 30624); + int16x8_t v3142 = vaddq_s16(v3139, v3141); + int16x8_t v3143 = vsubq_s16(v2843, v2845); + int16x8_t v3144 = vsubq_s16(v2847, v2849); + int16x8_t v3145 = vqrdmulhq_n_s16(v3144, 30624); + int16x8_t v3146 = vaddq_s16(v3143, v3145); + int16x8_t v3147 = vqrdmulhq_n_s16(v3146, 18702); + int16x8_t v3148 = vaddq_s16(v3142, v3147); + int16x8_t v3149 = vqrdmulhq_n_s16(v3148, 16916); + int16x8_t v3150 = vaddq_s16(v3138, v3149); + int16x8_t v3151 = vsubq_s16(v2765, v2767); + int16x8_t v3152 = vsubq_s16(v2769, v2771); + int16x8_t v3153_tmp = vqrdmulhq_n_s16(v3152, 472); + int16x8_t v3153 = vaddq_s16(v3153_tmp, v3152); + int16x8_t v3154 = vaddq_s16(v3151, v3153); + int16x8_t v3155 = vsubq_s16(v2775, v2777); + int16x8_t v3156 = vsubq_s16(v2779, v2781); + int16x8_t v3157_tmp = vqrdmulhq_n_s16(v3156, 472); + int16x8_t v3157 = vaddq_s16(v3157_tmp, v3156); + int16x8_t v3158 = vaddq_s16(v3155, v3157); + int16x8_t v3159 = vqrdmulhq_n_s16(v3158, 18964); + int16x8_t v3160 = vaddq_s16(v3154, v3159); + int16x8_t v3161 = vsubq_s16(v2787, v2789); + int16x8_t v3162 = vsubq_s16(v2791, v2793); + int16x8_t v3163_tmp = vqrdmulhq_n_s16(v3162, 472); + int16x8_t v3163 = vaddq_s16(v3163_tmp, v3162); + int16x8_t v3164 = vaddq_s16(v3161, v3163); + int16x8_t v3165 = vsubq_s16(v2797, v2799); + int16x8_t v3166 = vsubq_s16(v2801, v2803); + int16x8_t v3167_tmp = vqrdmulhq_n_s16(v3166, 472); + int16x8_t v3167 = vaddq_s16(v3167_tmp, v3166); + int16x8_t v3168 = vaddq_s16(v3165, v3167); + int16x8_t v3169 = vqrdmulhq_n_s16(v3168, 18964); + int16x8_t v3170 = vaddq_s16(v3164, v3169); + int16x8_t v3171 = vqrdmulhq_n_s16(v3170, 16971); + int16x8_t v3172 = vaddq_s16(v3160, v3171); + int16x8_t v3173 = vsubq_s16(v2719, v2721); + int16x8_t v3174 = vsubq_s16(v2723, v2725); + int16x8_t v3175_tmp = vqrdmulhq_n_s16(v3174, 3672); + int16x8_t v3175 = vaddq_s16(v3175_tmp, v3174); + int16x8_t v3176 = vaddq_s16(v3173, v3175); + int16x8_t v3177 = vsubq_s16(v2729, v2731); + int16x8_t v3178 = vsubq_s16(v2733, v2735); + int16x8_t v3179_tmp = vqrdmulhq_n_s16(v3178, 3672); + int16x8_t v3179 = vaddq_s16(v3179_tmp, v3178); + int16x8_t v3180 = vaddq_s16(v3177, v3179); + int16x8_t v3181 = vqrdmulhq_n_s16(v3180, 19245); + int16x8_t v3182 = vaddq_s16(v3176, v3181); + int16x8_t v3183 = vsubq_s16(v2741, v2743); + int16x8_t v3184 = vsubq_s16(v2745, v2747); + int16x8_t v3185_tmp = vqrdmulhq_n_s16(v3184, 3672); + int16x8_t v3185 = vaddq_s16(v3185_tmp, v3184); + int16x8_t v3186 = vaddq_s16(v3183, v3185); + int16x8_t v3187 = vsubq_s16(v2751, v2753); + int16x8_t v3188 = vsubq_s16(v2755, v2757); + int16x8_t v3189_tmp = vqrdmulhq_n_s16(v3188, 3672); + int16x8_t v3189 = vaddq_s16(v3189_tmp, v3188); + int16x8_t v3190 = vaddq_s16(v3187, v3189); + int16x8_t v3191 = vqrdmulhq_n_s16(v3190, 19245); + int16x8_t v3192 = vaddq_s16(v3186, v3191); + int16x8_t v3193 = vqrdmulhq_n_s16(v3192, 17029); + int16x8_t v3194 = vaddq_s16(v3182, v3193); + int16x8_t v3195 = vsubq_s16(v2673, v2675); + int16x8_t v3196 = vsubq_s16(v2677, v2679); + int16x8_t v3197_tmp = vqrdmulhq_n_s16(v3196, 7662); + int16x8_t v3197 = vaddq_s16(v3197_tmp, v3196); + int16x8_t v3198 = vaddq_s16(v3195, v3197); + int16x8_t v3199 = vsubq_s16(v2683, v2685); + int16x8_t v3200 = vsubq_s16(v2687, v2689); + int16x8_t v3201_tmp = vqrdmulhq_n_s16(v3200, 7662); + int16x8_t v3201 = vaddq_s16(v3201_tmp, v3200); + int16x8_t v3202 = vaddq_s16(v3199, v3201); + int16x8_t v3203 = vqrdmulhq_n_s16(v3202, 19546); + int16x8_t v3204 = vaddq_s16(v3198, v3203); + int16x8_t v3205 = vsubq_s16(v2695, v2697); + int16x8_t v3206 = vsubq_s16(v2699, v2701); + int16x8_t v3207_tmp = vqrdmulhq_n_s16(v3206, 7662); + int16x8_t v3207 = vaddq_s16(v3207_tmp, v3206); + int16x8_t v3208 = vaddq_s16(v3205, v3207); + int16x8_t v3209 = vsubq_s16(v2705, v2707); + int16x8_t v3210 = vsubq_s16(v2709, v2711); + int16x8_t v3211_tmp = vqrdmulhq_n_s16(v3210, 7662); + int16x8_t v3211 = vaddq_s16(v3211_tmp, v3210); + int16x8_t v3212 = vaddq_s16(v3209, v3211); + int16x8_t v3213 = vqrdmulhq_n_s16(v3212, 19546); + int16x8_t v3214 = vaddq_s16(v3208, v3213); + int16x8_t v3215 = vqrdmulhq_n_s16(v3214, 17090); + int16x8_t v3216 = vaddq_s16(v3204, v3215); + int16x8_t v3217 = vsubq_s16(v2582, v2587); + int16x8_t v3218 = vsubq_s16(v2592, v2597); + int16x8_t v3219_tmp = vqrdmulhq_n_s16(v3218, 12756); + int16x8_t v3219 = vaddq_s16(v3219_tmp, v3218); + int16x8_t v3220 = vaddq_s16(v3217, v3219); + int16x8_t v3221 = vsubq_s16(v2604, v2609); + int16x8_t v3222 = vsubq_s16(v2614, v2619); + int16x8_t v3223_tmp = vqrdmulhq_n_s16(v3222, 12756); + int16x8_t v3223 = vaddq_s16(v3223_tmp, v3222); + int16x8_t v3224 = vaddq_s16(v3221, v3223); + int16x8_t v3225 = vqrdmulhq_n_s16(v3224, 19869); + int16x8_t v3226 = vaddq_s16(v3220, v3225); + int16x8_t v3227 = vsubq_s16(v2628, v2633); + int16x8_t v3228 = vsubq_s16(v2638, v2643); + int16x8_t v3229_tmp = vqrdmulhq_n_s16(v3228, 12756); + int16x8_t v3229 = vaddq_s16(v3229_tmp, v3228); + int16x8_t v3230 = vaddq_s16(v3227, v3229); + int16x8_t v3231 = vsubq_s16(v2650, v2655); + int16x8_t v3232 = vsubq_s16(v2660, v2665); + int16x8_t v3233_tmp = vqrdmulhq_n_s16(v3232, 12756); + int16x8_t v3233 = vaddq_s16(v3233_tmp, v3232); + int16x8_t v3234 = vaddq_s16(v3231, v3233); + int16x8_t v3235 = vqrdmulhq_n_s16(v3234, 19869); + int16x8_t v3236 = vaddq_s16(v3230, v3235); + int16x8_t v3237 = vqrdmulhq_n_s16(v3236, 17153); + int16x8_t v3238 = vaddq_s16(v3226, v3237); + int16x8_t v3239 = vsubq_s16(v2488, v2493); + int16x8_t v3240 = vsubq_s16(v2498, v2503); + int16x8_t v3241_tmp = vqrdmulhq_n_s16(v3240, 19463); + int16x8_t v3241 = vaddq_s16(v3241_tmp, v3240); + int16x8_t v3242 = vaddq_s16(v3239, v3241); + int16x8_t v3243 = vsubq_s16(v2510, v2515); + int16x8_t v3244 = vsubq_s16(v2520, v2525); + int16x8_t v3245_tmp = vqrdmulhq_n_s16(v3244, 19463); + int16x8_t v3245 = vaddq_s16(v3245_tmp, v3244); + int16x8_t v3246 = vaddq_s16(v3243, v3245); + int16x8_t v3247 = vqrdmulhq_n_s16(v3246, 20216); + int16x8_t v3248 = vaddq_s16(v3242, v3247); + int16x8_t v3249 = vsubq_s16(v2534, v2539); + int16x8_t v3250 = vsubq_s16(v2544, v2549); + int16x8_t v3251_tmp = vqrdmulhq_n_s16(v3250, 19463); + int16x8_t v3251 = vaddq_s16(v3251_tmp, v3250); + int16x8_t v3252 = vaddq_s16(v3249, v3251); + int16x8_t v3253 = vsubq_s16(v2556, v2561); + int16x8_t v3254 = vsubq_s16(v2566, v2571); + int16x8_t v3255_tmp = vqrdmulhq_n_s16(v3254, 19463); + int16x8_t v3255 = vaddq_s16(v3255_tmp, v3254); + int16x8_t v3256 = vaddq_s16(v3253, v3255); + int16x8_t v3257 = vqrdmulhq_n_s16(v3256, 20216); + int16x8_t v3258 = vaddq_s16(v3252, v3257); + int16x8_t v3259 = vqrdmulhq_n_s16(v3258, 17220); + int16x8_t v3260 = vaddq_s16(v3248, v3259); + int16x8_t v3261 = vsubq_s16(v2393, v2398); + int16x8_t v3262 = vsubq_s16(v2403, v2408); + int16x8_t v3263_tmp = vqrdmulhq_n_s16(v3262, 28661); + int16x8_t v3263 = vaddq_s16(v3263_tmp, v3262); + int16x8_t v3264 = vaddq_s16(v3261, v3263); + int16x8_t v3265 = vsubq_s16(v2415, v2420); + int16x8_t v3266 = vsubq_s16(v2425, v2430); + int16x8_t v3267_tmp = vqrdmulhq_n_s16(v3266, 28661); + int16x8_t v3267 = vaddq_s16(v3267_tmp, v3266); + int16x8_t v3268 = vaddq_s16(v3265, v3267); + int16x8_t v3269 = vqrdmulhq_n_s16(v3268, 20587); + int16x8_t v3270 = vaddq_s16(v3264, v3269); + int16x8_t v3271 = vsubq_s16(v2439, v2444); + int16x8_t v3272 = vsubq_s16(v2449, v2454); + int16x8_t v3273_tmp = vqrdmulhq_n_s16(v3272, 28661); + int16x8_t v3273 = vaddq_s16(v3273_tmp, v3272); + int16x8_t v3274 = vaddq_s16(v3271, v3273); + int16x8_t v3275 = vsubq_s16(v2461, v2467); + int16x8_t v3276 = vsubq_s16(v2472, v2477); + int16x8_t v3277_tmp = vqrdmulhq_n_s16(v3276, 28661); + int16x8_t v3277 = vaddq_s16(v3277_tmp, v3276); + int16x8_t v3278 = vaddq_s16(v3275, v3277); + int16x8_t v3279 = vqrdmulhq_n_s16(v3278, 20587); + int16x8_t v3280 = vaddq_s16(v3274, v3279); + int16x8_t v3281 = vqrdmulhq_n_s16(v3280, 17290); + int16x8_t v3282 = vaddq_s16(v3270, v3281); + int16x8_t v3283 = vsubq_s16(v2299, v2304); + int16x8_t v3284 = vsubq_s16(v2309, v2314); + int16x8_t v3285_tmp = vqrdmulhq_n_s16(v3284, 9242); + int16x8_t v3285 = vmlaq_n_s16(v3285_tmp, v3284, 2); + int16x8_t v3286 = vaddq_s16(v3283, v3285); + int16x8_t v3287 = vsubq_s16(v2321, v2326); + int16x8_t v3288 = vsubq_s16(v2331, v2336); + int16x8_t v3289_tmp = vqrdmulhq_n_s16(v3288, 9242); + int16x8_t v3289 = vmlaq_n_s16(v3289_tmp, v3288, 2); + int16x8_t v3290 = vaddq_s16(v3287, v3289); + int16x8_t v3291 = vqrdmulhq_n_s16(v3290, 20985); + int16x8_t v3292 = vaddq_s16(v3286, v3291); + int16x8_t v3293 = vsubq_s16(v2345, v2350); + int16x8_t v3294 = vsubq_s16(v2355, v2360); + int16x8_t v3295_tmp = vqrdmulhq_n_s16(v3294, 9242); + int16x8_t v3295 = vmlaq_n_s16(v3295_tmp, v3294, 2); + int16x8_t v3296 = vaddq_s16(v3293, v3295); + int16x8_t v3297 = vsubq_s16(v2367, v2372); + int16x8_t v3298 = vsubq_s16(v2377, v2382); + int16x8_t v3299_tmp = vqrdmulhq_n_s16(v3298, 9242); + int16x8_t v3299 = vmlaq_n_s16(v3299_tmp, v3298, 2); + int16x8_t v3300 = vaddq_s16(v3297, v3299); + int16x8_t v3301 = vqrdmulhq_n_s16(v3300, 20985); + int16x8_t v3302 = vaddq_s16(v3296, v3301); + int16x8_t v3303 = vqrdmulhq_n_s16(v3302, 17363); + int16x8_t v3304 = vaddq_s16(v3292, v3303); + int16x8_t v3305 = vsubq_s16(v2115, v2126); + int16x8_t v3306 = vsubq_s16(v2137, v2148); + int16x8_t v3307_tmp = vqrdmulhq_n_s16(v3306, 30298); + int16x8_t v3307 = vmlaq_n_s16(v3307_tmp, v3306, 2); + int16x8_t v3308 = vaddq_s16(v3305, v3307); + int16x8_t v3309 = vsubq_s16(v2161, v2172); + int16x8_t v3310 = vsubq_s16(v2183, v2194); + int16x8_t v3311_tmp = vqrdmulhq_n_s16(v3310, 30298); + int16x8_t v3311 = vmlaq_n_s16(v3311_tmp, v3310, 2); + int16x8_t v3312 = vaddq_s16(v3309, v3311); + int16x8_t v3313 = vqrdmulhq_n_s16(v3312, 21412); + int16x8_t v3314 = vaddq_s16(v3308, v3313); + int16x8_t v3315 = vsubq_s16(v2209, v2220); + int16x8_t v3316 = vsubq_s16(v2231, v2242); + int16x8_t v3317_tmp = vqrdmulhq_n_s16(v3316, 30298); + int16x8_t v3317 = vmlaq_n_s16(v3317_tmp, v3316, 2); + int16x8_t v3318 = vaddq_s16(v3315, v3317); + int16x8_t v3319 = vsubq_s16(v2255, v2266); + int16x8_t v3320 = vsubq_s16(v2277, v2288); + int16x8_t v3321_tmp = vqrdmulhq_n_s16(v3320, 30298); + int16x8_t v3321 = vmlaq_n_s16(v3321_tmp, v3320, 2); + int16x8_t v3322 = vaddq_s16(v3319, v3321); + int16x8_t v3323 = vqrdmulhq_n_s16(v3322, 21412); + int16x8_t v3324 = vaddq_s16(v3318, v3323); + int16x8_t v3325 = vqrdmulhq_n_s16(v3324, 17440); + int16x8_t v3326 = vaddq_s16(v3314, v3325); + int16x8_t v3327 = vsubq_s16(v1925, v1936); + int16x8_t v3328 = vsubq_s16(v1947, v1958); + int16x8_t v3329_tmp = vqrdmulhq_n_s16(v3328, 2773); + int16x8_t v3329 = vmlaq_n_s16(v3329_tmp, v3328, 4); + int16x8_t v3330 = vaddq_s16(v3327, v3329); + int16x8_t v3331 = vsubq_s16(v1971, v1982); + int16x8_t v3332 = vsubq_s16(v1993, v2004); + int16x8_t v3333_tmp = vqrdmulhq_n_s16(v3332, 2773); + int16x8_t v3333 = vmlaq_n_s16(v3333_tmp, v3332, 4); + int16x8_t v3334 = vaddq_s16(v3331, v3333); + int16x8_t v3335 = vqrdmulhq_n_s16(v3334, 21871); + int16x8_t v3336 = vaddq_s16(v3330, v3335); + int16x8_t v3337 = vsubq_s16(v2019, v2030); + int16x8_t v3338 = vsubq_s16(v2041, v2052); + int16x8_t v3339_tmp = vqrdmulhq_n_s16(v3338, 2773); + int16x8_t v3339 = vmlaq_n_s16(v3339_tmp, v3338, 4); + int16x8_t v3340 = vaddq_s16(v3337, v3339); + int16x8_t v3341 = vsubq_s16(v2065, v2076); + int16x8_t v3342 = vsubq_s16(v2087, v2098); + int16x8_t v3343_tmp = vqrdmulhq_n_s16(v3342, 2773); + int16x8_t v3343 = vmlaq_n_s16(v3343_tmp, v3342, 4); + int16x8_t v3344 = vaddq_s16(v3341, v3343); + int16x8_t v3345 = vqrdmulhq_n_s16(v3344, 21871); + int16x8_t v3346 = vaddq_s16(v3340, v3345); + int16x8_t v3347 = vqrdmulhq_n_s16(v3346, 17520); + int16x8_t v3348 = vaddq_s16(v3336, v3347); + int16x8_t v3349 = vsubq_s16(v1555, v1578); + int16x8_t v3350 = vsubq_s16(v1601, v1624); + int16x8_t v3351_tmp = vqrdmulhq_n_s16(v3350, 26108); + int16x8_t v3351 = vmlaq_n_s16(v3351_tmp, v3350, 6); + int16x8_t v3352 = vaddq_s16(v3349, v3351); + int16x8_t v3353 = vsubq_s16(v1649, v1672); + int16x8_t v3354 = vsubq_s16(v1695, v1718); + int16x8_t v3355_tmp = vqrdmulhq_n_s16(v3354, 26108); + int16x8_t v3355 = vmlaq_n_s16(v3355_tmp, v3354, 6); + int16x8_t v3356 = vaddq_s16(v3353, v3355); + int16x8_t v3357 = vqrdmulhq_n_s16(v3356, 22363); + int16x8_t v3358 = vaddq_s16(v3352, v3357); + int16x8_t v3359 = vsubq_s16(v1745, v1768); + int16x8_t v3360 = vsubq_s16(v1791, v1814); + int16x8_t v3361_tmp = vqrdmulhq_n_s16(v3360, 26108); + int16x8_t v3361 = vmlaq_n_s16(v3361_tmp, v3360, 6); + int16x8_t v3362 = vaddq_s16(v3359, v3361); + int16x8_t v3363 = vsubq_s16(v1839, v1862); + int16x8_t v3364 = vsubq_s16(v1885, v1908); + int16x8_t v3365_tmp = vqrdmulhq_n_s16(v3364, 26108); + int16x8_t v3365 = vmlaq_n_s16(v3365_tmp, v3364, 6); + int16x8_t v3366 = vaddq_s16(v3363, v3365); + int16x8_t v3367 = vqrdmulhq_n_s16(v3366, 22363); + int16x8_t v3368 = vaddq_s16(v3362, v3367); + int16x8_t v3369 = vqrdmulhq_n_s16(v3368, 17603); + int16x8_t v3370 = vaddq_s16(v3358, v3369); + int16x8_t v3371 = vsubq_s16(v61, v140); + int16x8_t v3372 = vsubq_s16(v234, v314); + int16x8_t v3373_tmp = vqrdmulhq_n_s16(v3372, 12251); + int16x8_t v3373 = vmlaq_n_s16(v3373_tmp, v3372, 20); + int16x8_t v3374 = vaddq_s16(v3371, v3373); + int16x8_t v3375 = vsubq_s16(v410, v521); + int16x8_t v3376 = vsubq_s16(v615, v696); + int16x8_t v3377_tmp = vqrdmulhq_n_s16(v3376, 12251); + int16x8_t v3377 = vmlaq_n_s16(v3377_tmp, v3376, 20); + int16x8_t v3378 = vaddq_s16(v3375, v3377); + int16x8_t v3379 = vqrdmulhq_n_s16(v3378, 22891); + int16x8_t v3380 = vaddq_s16(v3374, v3379); + int16x8_t v3381 = vsubq_s16(v794, v905); + int16x8_t v3382 = vsubq_s16(v1061, v1143); + int16x8_t v3383_tmp = vqrdmulhq_n_s16(v3382, 12251); + int16x8_t v3383 = vmlaq_n_s16(v3383_tmp, v3382, 20); + int16x8_t v3384 = vaddq_s16(v3381, v3383); + int16x8_t v3385 = vsubq_s16(v1239, v1350); + int16x8_t v3386 = vsubq_s16(v1444, v1526); + int16x8_t v3387_tmp = vqrdmulhq_n_s16(v3386, 12251); + int16x8_t v3387 = vmlaq_n_s16(v3387_tmp, v3386, 20); + int16x8_t v3388 = vaddq_s16(v3385, v3387); + int16x8_t v3389 = vqrdmulhq_n_s16(v3388, 22891); + int16x8_t v3390 = vaddq_s16(v3384, v3389); + int16x8_t v3391 = vqrdmulhq_n_s16(v3390, 17689); + int16x8_t v3392 = vaddq_s16(v3380, v3391); + int16x8_t v3393 = vsubq_s16(v3371, v3373); + int16x8_t v3394 = vsubq_s16(v3375, v3377); + int16x8_t v3395 = vqrdmulhq_n_s16(v3394, 23460); + int16x8_t v3396 = vaddq_s16(v3393, v3395); + int16x8_t v3397 = vsubq_s16(v3381, v3383); + int16x8_t v3398 = vsubq_s16(v3385, v3387); + int16x8_t v3399 = vqrdmulhq_n_s16(v3398, 23460); + int16x8_t v3400 = vaddq_s16(v3397, v3399); + int16x8_t v3401 = vqrdmulhq_n_s16(v3400, 17779); + int16x8_t v3402 = vaddq_s16(v3396, v3401); + int16x8_t v3403 = vsubq_s16(v3349, v3351); + int16x8_t v3404 = vsubq_s16(v3353, v3355); + int16x8_t v3405 = vqrdmulhq_n_s16(v3404, 24073); + int16x8_t v3406 = vaddq_s16(v3403, v3405); + int16x8_t v3407 = vsubq_s16(v3359, v3361); + int16x8_t v3408 = vsubq_s16(v3363, v3365); + int16x8_t v3409 = vqrdmulhq_n_s16(v3408, 24073); + int16x8_t v3410 = vaddq_s16(v3407, v3409); + int16x8_t v3411 = vqrdmulhq_n_s16(v3410, 17873); + int16x8_t v3412 = vaddq_s16(v3406, v3411); + int16x8_t v3413 = vsubq_s16(v3327, v3329); + int16x8_t v3414 = vsubq_s16(v3331, v3333); + int16x8_t v3415 = vqrdmulhq_n_s16(v3414, 24734); + int16x8_t v3416 = vaddq_s16(v3413, v3415); + int16x8_t v3417 = vsubq_s16(v3337, v3339); + int16x8_t v3418 = vsubq_s16(v3341, v3343); + int16x8_t v3419 = vqrdmulhq_n_s16(v3418, 24734); + int16x8_t v3420 = vaddq_s16(v3417, v3419); + int16x8_t v3421 = vqrdmulhq_n_s16(v3420, 17971); + int16x8_t v3422 = vaddq_s16(v3416, v3421); + int16x8_t v3423 = vsubq_s16(v3305, v3307); + int16x8_t v3424 = vsubq_s16(v3309, v3311); + int16x8_t v3425 = vqrdmulhq_n_s16(v3424, 25448); + int16x8_t v3426 = vaddq_s16(v3423, v3425); + int16x8_t v3427 = vsubq_s16(v3315, v3317); + int16x8_t v3428 = vsubq_s16(v3319, v3321); + int16x8_t v3429 = vqrdmulhq_n_s16(v3428, 25448); + int16x8_t v3430 = vaddq_s16(v3427, v3429); + int16x8_t v3431 = vqrdmulhq_n_s16(v3430, 18072); + int16x8_t v3432 = vaddq_s16(v3426, v3431); + int16x8_t v3433 = vsubq_s16(v3283, v3285); + int16x8_t v3434 = vsubq_s16(v3287, v3289); + int16x8_t v3435 = vqrdmulhq_n_s16(v3434, 26220); + int16x8_t v3436 = vaddq_s16(v3433, v3435); + int16x8_t v3437 = vsubq_s16(v3293, v3295); + int16x8_t v3438 = vsubq_s16(v3297, v3299); + int16x8_t v3439 = vqrdmulhq_n_s16(v3438, 26220); + int16x8_t v3440 = vaddq_s16(v3437, v3439); + int16x8_t v3441 = vqrdmulhq_n_s16(v3440, 18177); + int16x8_t v3442 = vaddq_s16(v3436, v3441); + int16x8_t v3443 = vsubq_s16(v3261, v3263); + int16x8_t v3444 = vsubq_s16(v3265, v3267); + int16x8_t v3445 = vqrdmulhq_n_s16(v3444, 27058); + int16x8_t v3446 = vaddq_s16(v3443, v3445); + int16x8_t v3447 = vsubq_s16(v3271, v3273); + int16x8_t v3448 = vsubq_s16(v3275, v3277); + int16x8_t v3449 = vqrdmulhq_n_s16(v3448, 27058); + int16x8_t v3450 = vaddq_s16(v3447, v3449); + int16x8_t v3451 = vqrdmulhq_n_s16(v3450, 18286); + int16x8_t v3452 = vaddq_s16(v3446, v3451); + int16x8_t v3453 = vsubq_s16(v3239, v3241); + int16x8_t v3454 = vsubq_s16(v3243, v3245); + int16x8_t v3455 = vqrdmulhq_n_s16(v3454, 27969); + int16x8_t v3456 = vaddq_s16(v3453, v3455); + int16x8_t v3457 = vsubq_s16(v3249, v3251); + int16x8_t v3458 = vsubq_s16(v3253, v3255); + int16x8_t v3459 = vqrdmulhq_n_s16(v3458, 27969); + int16x8_t v3460 = vaddq_s16(v3457, v3459); + int16x8_t v3461 = vqrdmulhq_n_s16(v3460, 18400); + int16x8_t v3462 = vaddq_s16(v3456, v3461); + int16x8_t v3463 = vsubq_s16(v3217, v3219); + int16x8_t v3464 = vsubq_s16(v3221, v3223); + int16x8_t v3465 = vqrdmulhq_n_s16(v3464, 28961); + int16x8_t v3466 = vaddq_s16(v3463, v3465); + int16x8_t v3467 = vsubq_s16(v3227, v3229); + int16x8_t v3468 = vsubq_s16(v3231, v3233); + int16x8_t v3469 = vqrdmulhq_n_s16(v3468, 28961); + int16x8_t v3470 = vaddq_s16(v3467, v3469); + int16x8_t v3471 = vqrdmulhq_n_s16(v3470, 18517); + int16x8_t v3472 = vaddq_s16(v3466, v3471); + int16x8_t v3473 = vsubq_s16(v3195, v3197); + int16x8_t v3474 = vsubq_s16(v3199, v3201); + int16x8_t v3475 = vqrdmulhq_n_s16(v3474, 30044); + int16x8_t v3476 = vaddq_s16(v3473, v3475); + int16x8_t v3477 = vsubq_s16(v3205, v3207); + int16x8_t v3478 = vsubq_s16(v3209, v3211); + int16x8_t v3479 = vqrdmulhq_n_s16(v3478, 30044); + int16x8_t v3480 = vaddq_s16(v3477, v3479); + int16x8_t v3481 = vqrdmulhq_n_s16(v3480, 18639); + int16x8_t v3482 = vaddq_s16(v3476, v3481); + int16x8_t v3483 = vsubq_s16(v3173, v3175); + int16x8_t v3484 = vsubq_s16(v3177, v3179); + int16x8_t v3485 = vqrdmulhq_n_s16(v3484, 31232); + int16x8_t v3486 = vaddq_s16(v3483, v3485); + int16x8_t v3487 = vsubq_s16(v3183, v3185); + int16x8_t v3488 = vsubq_s16(v3187, v3189); + int16x8_t v3489 = vqrdmulhq_n_s16(v3488, 31232); + int16x8_t v3490 = vaddq_s16(v3487, v3489); + int16x8_t v3491 = vqrdmulhq_n_s16(v3490, 18765); + int16x8_t v3492 = vaddq_s16(v3486, v3491); + int16x8_t v3493 = vsubq_s16(v3151, v3153); + int16x8_t v3494 = vsubq_s16(v3155, v3157); + int16x8_t v3495 = vqrdmulhq_n_s16(v3494, 32538); + int16x8_t v3496 = vaddq_s16(v3493, v3495); + int16x8_t v3497 = vsubq_s16(v3161, v3163); + int16x8_t v3498 = vsubq_s16(v3165, v3167); + int16x8_t v3499 = vqrdmulhq_n_s16(v3498, 32538); + int16x8_t v3500 = vaddq_s16(v3497, v3499); + int16x8_t v3501 = vqrdmulhq_n_s16(v3500, 18896); + int16x8_t v3502 = vaddq_s16(v3496, v3501); + int16x8_t v3503 = vsubq_s16(v3129, v3131); + int16x8_t v3504 = vsubq_s16(v3133, v3135); + int16x8_t v3505_tmp = vqrdmulhq_n_s16(v3504, 1211); + int16x8_t v3505 = vaddq_s16(v3505_tmp, v3504); + int16x8_t v3506 = vaddq_s16(v3503, v3505); + int16x8_t v3507 = vsubq_s16(v3139, v3141); + int16x8_t v3508 = vsubq_s16(v3143, v3145); + int16x8_t v3509_tmp = vqrdmulhq_n_s16(v3508, 1211); + int16x8_t v3509 = vaddq_s16(v3509_tmp, v3508); + int16x8_t v3510 = vaddq_s16(v3507, v3509); + int16x8_t v3511 = vqrdmulhq_n_s16(v3510, 19032); + int16x8_t v3512 = vaddq_s16(v3506, v3511); + int16x8_t v3513 = vsubq_s16(v3107, v3109); + int16x8_t v3514 = vsubq_s16(v3111, v3113); + int16x8_t v3515_tmp = vqrdmulhq_n_s16(v3514, 2808); + int16x8_t v3515 = vaddq_s16(v3515_tmp, v3514); + int16x8_t v3516 = vaddq_s16(v3513, v3515); + int16x8_t v3517 = vsubq_s16(v3117, v3119); + int16x8_t v3518 = vsubq_s16(v3121, v3123); + int16x8_t v3519_tmp = vqrdmulhq_n_s16(v3518, 2808); + int16x8_t v3519 = vaddq_s16(v3519_tmp, v3518); + int16x8_t v3520 = vaddq_s16(v3517, v3519); + int16x8_t v3521 = vqrdmulhq_n_s16(v3520, 19172); + int16x8_t v3522 = vaddq_s16(v3516, v3521); + int16x8_t v3523 = vsubq_s16(v3085, v3087); + int16x8_t v3524 = vsubq_s16(v3089, v3091); + int16x8_t v3525_tmp = vqrdmulhq_n_s16(v3524, 4586); + int16x8_t v3525 = vaddq_s16(v3525_tmp, v3524); + int16x8_t v3526 = vaddq_s16(v3523, v3525); + int16x8_t v3527 = vsubq_s16(v3095, v3097); + int16x8_t v3528 = vsubq_s16(v3099, v3101); + int16x8_t v3529_tmp = vqrdmulhq_n_s16(v3528, 4586); + int16x8_t v3529 = vaddq_s16(v3529_tmp, v3528); + int16x8_t v3530 = vaddq_s16(v3527, v3529); + int16x8_t v3531 = vqrdmulhq_n_s16(v3530, 19318); + int16x8_t v3532 = vaddq_s16(v3526, v3531); + int16x8_t v3533 = vsubq_s16(v3063, v3065); + int16x8_t v3534 = vsubq_s16(v3067, v3069); + int16x8_t v3535_tmp = vqrdmulhq_n_s16(v3534, 6576); + int16x8_t v3535 = vaddq_s16(v3535_tmp, v3534); + int16x8_t v3536 = vaddq_s16(v3533, v3535); + int16x8_t v3537 = vsubq_s16(v3073, v3075); + int16x8_t v3538 = vsubq_s16(v3077, v3079); + int16x8_t v3539_tmp = vqrdmulhq_n_s16(v3538, 6576); + int16x8_t v3539 = vaddq_s16(v3539_tmp, v3538); + int16x8_t v3540 = vaddq_s16(v3537, v3539); + int16x8_t v3541 = vqrdmulhq_n_s16(v3540, 19469); + int16x8_t v3542 = vaddq_s16(v3536, v3541); + int16x8_t v3543 = vsubq_s16(v3041, v3043); + int16x8_t v3544 = vsubq_s16(v3045, v3047); + int16x8_t v3545_tmp = vqrdmulhq_n_s16(v3544, 8817); + int16x8_t v3545 = vaddq_s16(v3545_tmp, v3544); + int16x8_t v3546 = vaddq_s16(v3543, v3545); + int16x8_t v3547 = vsubq_s16(v3051, v3053); + int16x8_t v3548 = vsubq_s16(v3055, v3057); + int16x8_t v3549_tmp = vqrdmulhq_n_s16(v3548, 8817); + int16x8_t v3549 = vaddq_s16(v3549_tmp, v3548); + int16x8_t v3550 = vaddq_s16(v3547, v3549); + int16x8_t v3551 = vqrdmulhq_n_s16(v3550, 19625); + int16x8_t v3552 = vaddq_s16(v3546, v3551); + int16x8_t v3553 = vsubq_s16(v2998, v3003); + int16x8_t v3554 = vsubq_s16(v3008, v3013); + int16x8_t v3555_tmp = vqrdmulhq_n_s16(v3554, 11356); + int16x8_t v3555 = vaddq_s16(v3555_tmp, v3554); + int16x8_t v3556 = vaddq_s16(v3553, v3555); + int16x8_t v3557 = vsubq_s16(v3020, v3025); + int16x8_t v3558 = vsubq_s16(v3030, v3035); + int16x8_t v3559_tmp = vqrdmulhq_n_s16(v3558, 11356); + int16x8_t v3559 = vaddq_s16(v3559_tmp, v3558); + int16x8_t v3560 = vaddq_s16(v3557, v3559); + int16x8_t v3561 = vqrdmulhq_n_s16(v3560, 19786); + int16x8_t v3562 = vaddq_s16(v3556, v3561); + int16x8_t v3563 = vsubq_s16(v2952, v2957); + int16x8_t v3564 = vsubq_s16(v2962, v2967); + int16x8_t v3565_tmp = vqrdmulhq_n_s16(v3564, 14256); + int16x8_t v3565 = vaddq_s16(v3565_tmp, v3564); + int16x8_t v3566 = vaddq_s16(v3563, v3565); + int16x8_t v3567 = vsubq_s16(v2974, v2979); + int16x8_t v3568 = vsubq_s16(v2984, v2989); + int16x8_t v3569_tmp = vqrdmulhq_n_s16(v3568, 14256); + int16x8_t v3569 = vaddq_s16(v3569_tmp, v3568); + int16x8_t v3570 = vaddq_s16(v3567, v3569); + int16x8_t v3571 = vqrdmulhq_n_s16(v3570, 19954); + int16x8_t v3572 = vaddq_s16(v3566, v3571); + int16x8_t v3573 = vsubq_s16(v2906, v2911); + int16x8_t v3574 = vsubq_s16(v2916, v2921); + int16x8_t v3575_tmp = vqrdmulhq_n_s16(v3574, 17596); + int16x8_t v3575 = vaddq_s16(v3575_tmp, v3574); + int16x8_t v3576 = vaddq_s16(v3573, v3575); + int16x8_t v3577 = vsubq_s16(v2928, v2933); + int16x8_t v3578 = vsubq_s16(v2938, v2943); + int16x8_t v3579_tmp = vqrdmulhq_n_s16(v3578, 17596); + int16x8_t v3579 = vaddq_s16(v3579_tmp, v3578); + int16x8_t v3580 = vaddq_s16(v3577, v3579); + int16x8_t v3581 = vqrdmulhq_n_s16(v3580, 20127); + int16x8_t v3582 = vaddq_s16(v3576, v3581); + int16x8_t v3583 = vsubq_s16(v2860, v2865); + int16x8_t v3584 = vsubq_s16(v2870, v2875); + int16x8_t v3585_tmp = vqrdmulhq_n_s16(v3584, 21483); + int16x8_t v3585 = vaddq_s16(v3585_tmp, v3584); + int16x8_t v3586 = vaddq_s16(v3583, v3585); + int16x8_t v3587 = vsubq_s16(v2882, v2887); + int16x8_t v3588 = vsubq_s16(v2892, v2897); + int16x8_t v3589_tmp = vqrdmulhq_n_s16(v3588, 21483); + int16x8_t v3589 = vaddq_s16(v3589_tmp, v3588); + int16x8_t v3590 = vaddq_s16(v3587, v3589); + int16x8_t v3591 = vqrdmulhq_n_s16(v3590, 20306); + int16x8_t v3592 = vaddq_s16(v3586, v3591); + int16x8_t v3593 = vsubq_s16(v2814, v2819); + int16x8_t v3594 = vsubq_s16(v2824, v2829); + int16x8_t v3595_tmp = vqrdmulhq_n_s16(v3594, 26057); + int16x8_t v3595 = vaddq_s16(v3595_tmp, v3594); + int16x8_t v3596 = vaddq_s16(v3593, v3595); + int16x8_t v3597 = vsubq_s16(v2836, v2841); + int16x8_t v3598 = vsubq_s16(v2846, v2851); + int16x8_t v3599_tmp = vqrdmulhq_n_s16(v3598, 26057); + int16x8_t v3599 = vaddq_s16(v3599_tmp, v3598); + int16x8_t v3600 = vaddq_s16(v3597, v3599); + int16x8_t v3601 = vqrdmulhq_n_s16(v3600, 20492); + int16x8_t v3602 = vaddq_s16(v3596, v3601); + int16x8_t v3603 = vsubq_s16(v2768, v2773); + int16x8_t v3604 = vsubq_s16(v2778, v2783); + int16x8_t v3605_tmp = vqrdmulhq_n_s16(v3604, 31517); + int16x8_t v3605 = vaddq_s16(v3605_tmp, v3604); + int16x8_t v3606 = vaddq_s16(v3603, v3605); + int16x8_t v3607 = vsubq_s16(v2790, v2795); + int16x8_t v3608 = vsubq_s16(v2800, v2805); + int16x8_t v3609_tmp = vqrdmulhq_n_s16(v3608, 31517); + int16x8_t v3609 = vaddq_s16(v3609_tmp, v3608); + int16x8_t v3610 = vaddq_s16(v3607, v3609); + int16x8_t v3611 = vqrdmulhq_n_s16(v3610, 20684); + int16x8_t v3612 = vaddq_s16(v3606, v3611); + int16x8_t v3613 = vsubq_s16(v2722, v2727); + int16x8_t v3614 = vsubq_s16(v2732, v2737); + int16x8_t v3615_tmp = vqrdmulhq_n_s16(v3614, 5373); + int16x8_t v3615 = vmlaq_n_s16(v3615_tmp, v3614, 2); + int16x8_t v3616 = vaddq_s16(v3613, v3615); + int16x8_t v3617 = vsubq_s16(v2744, v2749); + int16x8_t v3618 = vsubq_s16(v2754, v2759); + int16x8_t v3619_tmp = vqrdmulhq_n_s16(v3618, 5373); + int16x8_t v3619 = vmlaq_n_s16(v3619_tmp, v3618, 2); + int16x8_t v3620 = vaddq_s16(v3617, v3619); + int16x8_t v3621 = vqrdmulhq_n_s16(v3620, 20883); + int16x8_t v3622 = vaddq_s16(v3616, v3621); + int16x8_t v3623 = vsubq_s16(v2676, v2681); + int16x8_t v3624 = vsubq_s16(v2686, v2691); + int16x8_t v3625_tmp = vqrdmulhq_n_s16(v3624, 13571); + int16x8_t v3625 = vmlaq_n_s16(v3625_tmp, v3624, 2); + int16x8_t v3626 = vaddq_s16(v3623, v3625); + int16x8_t v3627 = vsubq_s16(v2698, v2703); + int16x8_t v3628 = vsubq_s16(v2708, v2713); + int16x8_t v3629_tmp = vqrdmulhq_n_s16(v3628, 13571); + int16x8_t v3629 = vmlaq_n_s16(v3629_tmp, v3628, 2); + int16x8_t v3630 = vaddq_s16(v3627, v3629); + int16x8_t v3631 = vqrdmulhq_n_s16(v3630, 21089); + int16x8_t v3632 = vaddq_s16(v3626, v3631); + int16x8_t v3633 = vsubq_s16(v2588, v2599); + int16x8_t v3634 = vsubq_s16(v2610, v2621); + int16x8_t v3635_tmp = vqrdmulhq_n_s16(v3634, 23975); + int16x8_t v3635 = vmlaq_n_s16(v3635_tmp, v3634, 2); + int16x8_t v3636 = vaddq_s16(v3633, v3635); + int16x8_t v3637 = vsubq_s16(v2634, v2645); + int16x8_t v3638 = vsubq_s16(v2656, v2667); + int16x8_t v3639_tmp = vqrdmulhq_n_s16(v3638, 23975); + int16x8_t v3639 = vmlaq_n_s16(v3639_tmp, v3638, 2); + int16x8_t v3640 = vaddq_s16(v3637, v3639); + int16x8_t v3641 = vqrdmulhq_n_s16(v3640, 21303); + int16x8_t v3642 = vaddq_s16(v3636, v3641); + int16x8_t v3643 = vsubq_s16(v2494, v2505); + int16x8_t v3644 = vsubq_s16(v2516, v2527); + int16x8_t v3645_tmp = vqrdmulhq_n_s16(v3644, 4832); + int16x8_t v3645 = vmlaq_n_s16(v3645_tmp, v3644, 3); + int16x8_t v3646 = vaddq_s16(v3643, v3645); + int16x8_t v3647 = vsubq_s16(v2540, v2551); + int16x8_t v3648 = vsubq_s16(v2562, v2573); + int16x8_t v3649_tmp = vqrdmulhq_n_s16(v3648, 4832); + int16x8_t v3649 = vmlaq_n_s16(v3649_tmp, v3648, 3); + int16x8_t v3650 = vaddq_s16(v3647, v3649); + int16x8_t v3651 = vqrdmulhq_n_s16(v3650, 21524); + int16x8_t v3652 = vaddq_s16(v3646, v3651); + int16x8_t v3653 = vsubq_s16(v2399, v2410); + int16x8_t v3654 = vsubq_s16(v2421, v2432); + int16x8_t v3655_tmp = vqrdmulhq_n_s16(v3654, 23437); + int16x8_t v3655 = vmlaq_n_s16(v3655_tmp, v3654, 3); + int16x8_t v3656 = vaddq_s16(v3653, v3655); + int16x8_t v3657 = vsubq_s16(v2445, v2456); + int16x8_t v3658 = vsubq_s16(v2468, v2479); + int16x8_t v3659_tmp = vqrdmulhq_n_s16(v3658, 23437); + int16x8_t v3659 = vmlaq_n_s16(v3659_tmp, v3658, 3); + int16x8_t v3660 = vaddq_s16(v3657, v3659); + int16x8_t v3661 = vqrdmulhq_n_s16(v3660, 21753); + int16x8_t v3662 = vaddq_s16(v3656, v3661); + int16x8_t v3663 = vsubq_s16(v2305, v2316); + int16x8_t v3664 = vsubq_s16(v2327, v2338); + int16x8_t v3665_tmp = vqrdmulhq_n_s16(v3664, 17573); + int16x8_t v3665 = vmlaq_n_s16(v3665_tmp, v3664, 4); + int16x8_t v3666 = vaddq_s16(v3663, v3665); + int16x8_t v3667 = vsubq_s16(v2351, v2362); + int16x8_t v3668 = vsubq_s16(v2373, v2384); + int16x8_t v3669_tmp = vqrdmulhq_n_s16(v3668, 17573); + int16x8_t v3669 = vmlaq_n_s16(v3669_tmp, v3668, 4); + int16x8_t v3670 = vaddq_s16(v3667, v3669); + int16x8_t v3671 = vqrdmulhq_n_s16(v3670, 21990); + int16x8_t v3672 = vaddq_s16(v3666, v3671); + int16x8_t v3673 = vsubq_s16(v2127, v2150); + int16x8_t v3674 = vsubq_s16(v2173, v2196); + int16x8_t v3675_tmp = vqrdmulhq_n_s16(v3674, 27122); + int16x8_t v3675 = vmlaq_n_s16(v3675_tmp, v3674, 5); + int16x8_t v3676 = vaddq_s16(v3673, v3675); + int16x8_t v3677 = vsubq_s16(v2221, v2244); + int16x8_t v3678 = vsubq_s16(v2267, v2290); + int16x8_t v3679_tmp = vqrdmulhq_n_s16(v3678, 27122); + int16x8_t v3679 = vmlaq_n_s16(v3679_tmp, v3678, 5); + int16x8_t v3680 = vaddq_s16(v3677, v3679); + int16x8_t v3681 = vqrdmulhq_n_s16(v3680, 22236); + int16x8_t v3682 = vaddq_s16(v3676, v3681); + int16x8_t v3683 = vsubq_s16(v1937, v1960); + int16x8_t v3684 = vsubq_s16(v1983, v2006); + int16x8_t v3685_tmp = vqrdmulhq_n_s16(v3684, 5041); + int16x8_t v3685 = vmlaq_n_s16(v3685_tmp, v3684, 8); + int16x8_t v3686 = vaddq_s16(v3683, v3685); + int16x8_t v3687 = vsubq_s16(v2031, v2054); + int16x8_t v3688 = vsubq_s16(v2077, v2100); + int16x8_t v3689_tmp = vqrdmulhq_n_s16(v3688, 5041); + int16x8_t v3689 = vmlaq_n_s16(v3689_tmp, v3688, 8); + int16x8_t v3690 = vaddq_s16(v3687, v3689); + int16x8_t v3691 = vqrdmulhq_n_s16(v3690, 22491); + int16x8_t v3692 = vaddq_s16(v3686, v3691); + int16x8_t v3693 = vsubq_s16(v1579, v1626); + int16x8_t v3694 = vsubq_s16(v1673, v1720); + int16x8_t v3695_tmp = vqrdmulhq_n_s16(v3694, 19146); + int16x8_t v3695 = vmlaq_n_s16(v3695_tmp, v3694, 13); + int16x8_t v3696 = vaddq_s16(v3693, v3695); + int16x8_t v3697 = vsubq_s16(v1769, v1816); + int16x8_t v3698 = vsubq_s16(v1863, v1910); + int16x8_t v3699_tmp = vqrdmulhq_n_s16(v3698, 19146); + int16x8_t v3699 = vmlaq_n_s16(v3699_tmp, v3698, 13); + int16x8_t v3700 = vaddq_s16(v3697, v3699); + int16x8_t v3701 = vqrdmulhq_n_s16(v3700, 22755); + int16x8_t v3702 = vaddq_s16(v3696, v3701); + int16x8_t v3703 = vsubq_s16(v141, v316); + int16x8_t v3704 = vsubq_s16(v522, v698); + int16x8_t v3705_tmp = vqrdmulhq_n_s16(v3704, 24402); + int16x8_t v3705 = vmlaq_n_s16(v3705_tmp, v3704, 40); + int16x8_t v3706 = vaddq_s16(v3703, v3705); + int16x8_t v3707 = vsubq_s16(v906, v1145); + int16x8_t v3708 = vsubq_s16(v1351, v1528); + int16x8_t v3709_tmp = vqrdmulhq_n_s16(v3708, 24402); + int16x8_t v3709 = vmlaq_n_s16(v3709_tmp, v3708, 40); + int16x8_t v3710 = vaddq_s16(v3707, v3709); + int16x8_t v3711 = vqrdmulhq_n_s16(v3710, 23030); + int16x8_t v3712 = vaddq_s16(v3706, v3711); + int16x8_t v3713 = vsubq_s16(v3703, v3705); + int16x8_t v3714 = vsubq_s16(v3707, v3709); + int16x8_t v3715 = vqrdmulhq_n_s16(v3714, 23314); + int16x8_t v3716 = vaddq_s16(v3713, v3715); + int16x8_t v3717 = vsubq_s16(v3693, v3695); + int16x8_t v3718 = vsubq_s16(v3697, v3699); + int16x8_t v3719 = vqrdmulhq_n_s16(v3718, 23609); + int16x8_t v3720 = vaddq_s16(v3717, v3719); + int16x8_t v3721 = vsubq_s16(v3683, v3685); + int16x8_t v3722 = vsubq_s16(v3687, v3689); + int16x8_t v3723 = vqrdmulhq_n_s16(v3722, 23915); + int16x8_t v3724 = vaddq_s16(v3721, v3723); + int16x8_t v3725 = vsubq_s16(v3673, v3675); + int16x8_t v3726 = vsubq_s16(v3677, v3679); + int16x8_t v3727 = vqrdmulhq_n_s16(v3726, 24233); + int16x8_t v3728 = vaddq_s16(v3725, v3727); + int16x8_t v3729 = vsubq_s16(v3663, v3665); + int16x8_t v3730 = vsubq_s16(v3667, v3669); + int16x8_t v3731 = vqrdmulhq_n_s16(v3730, 24564); + int16x8_t v3732 = vaddq_s16(v3729, v3731); + int16x8_t v3733 = vsubq_s16(v3653, v3655); + int16x8_t v3734 = vsubq_s16(v3657, v3659); + int16x8_t v3735 = vqrdmulhq_n_s16(v3734, 24907); + int16x8_t v3736 = vaddq_s16(v3733, v3735); + int16x8_t v3737 = vsubq_s16(v3643, v3645); + int16x8_t v3738 = vsubq_s16(v3647, v3649); + int16x8_t v3739 = vqrdmulhq_n_s16(v3738, 25264); + int16x8_t v3740 = vaddq_s16(v3737, v3739); + int16x8_t v3741 = vsubq_s16(v3633, v3635); + int16x8_t v3742 = vsubq_s16(v3637, v3639); + int16x8_t v3743 = vqrdmulhq_n_s16(v3742, 25635); + int16x8_t v3744 = vaddq_s16(v3741, v3743); + int16x8_t v3745 = vsubq_s16(v3623, v3625); + int16x8_t v3746 = vsubq_s16(v3627, v3629); + int16x8_t v3747 = vqrdmulhq_n_s16(v3746, 26021); + int16x8_t v3748 = vaddq_s16(v3745, v3747); + int16x8_t v3749 = vsubq_s16(v3613, v3615); + int16x8_t v3750 = vsubq_s16(v3617, v3619); + int16x8_t v3751 = vqrdmulhq_n_s16(v3750, 26423); + int16x8_t v3752 = vaddq_s16(v3749, v3751); + int16x8_t v3753 = vsubq_s16(v3603, v3605); + int16x8_t v3754 = vsubq_s16(v3607, v3609); + int16x8_t v3755 = vqrdmulhq_n_s16(v3754, 26842); + int16x8_t v3756 = vaddq_s16(v3753, v3755); + int16x8_t v3757 = vsubq_s16(v3593, v3595); + int16x8_t v3758 = vsubq_s16(v3597, v3599); + int16x8_t v3759 = vqrdmulhq_n_s16(v3758, 27279); + int16x8_t v3760 = vaddq_s16(v3757, v3759); + int16x8_t v3761 = vsubq_s16(v3583, v3585); + int16x8_t v3762 = vsubq_s16(v3587, v3589); + int16x8_t v3763 = vqrdmulhq_n_s16(v3762, 27734); + int16x8_t v3764 = vaddq_s16(v3761, v3763); + int16x8_t v3765 = vsubq_s16(v3573, v3575); + int16x8_t v3766 = vsubq_s16(v3577, v3579); + int16x8_t v3767 = vqrdmulhq_n_s16(v3766, 28209); + int16x8_t v3768 = vaddq_s16(v3765, v3767); + int16x8_t v3769 = vsubq_s16(v3563, v3565); + int16x8_t v3770 = vsubq_s16(v3567, v3569); + int16x8_t v3771 = vqrdmulhq_n_s16(v3770, 28705); + int16x8_t v3772 = vaddq_s16(v3769, v3771); + int16x8_t v3773 = vsubq_s16(v3553, v3555); + int16x8_t v3774 = vsubq_s16(v3557, v3559); + int16x8_t v3775 = vqrdmulhq_n_s16(v3774, 29223); + int16x8_t v3776 = vaddq_s16(v3773, v3775); + int16x8_t v3777 = vsubq_s16(v3543, v3545); + int16x8_t v3778 = vsubq_s16(v3547, v3549); + int16x8_t v3779 = vqrdmulhq_n_s16(v3778, 29764); + int16x8_t v3780 = vaddq_s16(v3777, v3779); + int16x8_t v3781 = vsubq_s16(v3533, v3535); + int16x8_t v3782 = vsubq_s16(v3537, v3539); + int16x8_t v3783 = vqrdmulhq_n_s16(v3782, 30331); + int16x8_t v3784 = vaddq_s16(v3781, v3783); + int16x8_t v3785 = vsubq_s16(v3523, v3525); + int16x8_t v3786 = vsubq_s16(v3527, v3529); + int16x8_t v3787 = vqrdmulhq_n_s16(v3786, 30925); + int16x8_t v3788 = vaddq_s16(v3785, v3787); + int16x8_t v3789 = vsubq_s16(v3513, v3515); + int16x8_t v3790 = vsubq_s16(v3517, v3519); + int16x8_t v3791 = vqrdmulhq_n_s16(v3790, 31547); + int16x8_t v3792 = vaddq_s16(v3789, v3791); + int16x8_t v3793 = vsubq_s16(v3503, v3505); + int16x8_t v3794 = vsubq_s16(v3507, v3509); + int16x8_t v3795 = vqrdmulhq_n_s16(v3794, 32199); + int16x8_t v3796 = vaddq_s16(v3793, v3795); + int16x8_t v3797 = vsubq_s16(v3493, v3495); + int16x8_t v3798 = vsubq_s16(v3497, v3499); + int16x8_t v3799_tmp = vqrdmulhq_n_s16(v3798, 117); + int16x8_t v3799 = vaddq_s16(v3799_tmp, v3798); + int16x8_t v3800 = vaddq_s16(v3797, v3799); + int16x8_t v3801 = vsubq_s16(v3483, v3485); + int16x8_t v3802 = vsubq_s16(v3487, v3489); + int16x8_t v3803_tmp = vqrdmulhq_n_s16(v3802, 837); + int16x8_t v3803 = vaddq_s16(v3803_tmp, v3802); + int16x8_t v3804 = vaddq_s16(v3801, v3803); + int16x8_t v3805 = vsubq_s16(v3473, v3475); + int16x8_t v3806 = vsubq_s16(v3477, v3479); + int16x8_t v3807_tmp = vqrdmulhq_n_s16(v3806, 1594); + int16x8_t v3807 = vaddq_s16(v3807_tmp, v3806); + int16x8_t v3808 = vaddq_s16(v3805, v3807); + int16x8_t v3809 = vsubq_s16(v3463, v3465); + int16x8_t v3810 = vsubq_s16(v3467, v3469); + int16x8_t v3811_tmp = vqrdmulhq_n_s16(v3810, 2393); + int16x8_t v3811 = vaddq_s16(v3811_tmp, v3810); + int16x8_t v3812 = vaddq_s16(v3809, v3811); + int16x8_t v3813 = vsubq_s16(v3453, v3455); + int16x8_t v3814 = vsubq_s16(v3457, v3459); + int16x8_t v3815_tmp = vqrdmulhq_n_s16(v3814, 3234); + int16x8_t v3815 = vaddq_s16(v3815_tmp, v3814); + int16x8_t v3816 = vaddq_s16(v3813, v3815); + int16x8_t v3817 = vsubq_s16(v3443, v3445); + int16x8_t v3818 = vsubq_s16(v3447, v3449); + int16x8_t v3819_tmp = vqrdmulhq_n_s16(v3818, 4123); + int16x8_t v3819 = vaddq_s16(v3819_tmp, v3818); + int16x8_t v3820 = vaddq_s16(v3817, v3819); + int16x8_t v3821 = vsubq_s16(v3433, v3435); + int16x8_t v3822 = vsubq_s16(v3437, v3439); + int16x8_t v3823_tmp = vqrdmulhq_n_s16(v3822, 5062); + int16x8_t v3823 = vaddq_s16(v3823_tmp, v3822); + int16x8_t v3824 = vaddq_s16(v3821, v3823); + int16x8_t v3825 = vsubq_s16(v3423, v3425); + int16x8_t v3826 = vsubq_s16(v3427, v3429); + int16x8_t v3827_tmp = vqrdmulhq_n_s16(v3826, 6057); + int16x8_t v3827 = vaddq_s16(v3827_tmp, v3826); + int16x8_t v3828 = vaddq_s16(v3825, v3827); + int16x8_t v3829 = vsubq_s16(v3413, v3415); + int16x8_t v3830 = vsubq_s16(v3417, v3419); + int16x8_t v3831_tmp = vqrdmulhq_n_s16(v3830, 7111); + int16x8_t v3831 = vaddq_s16(v3831_tmp, v3830); + int16x8_t v3832 = vaddq_s16(v3829, v3831); + int16x8_t v3833 = vsubq_s16(v3403, v3405); + int16x8_t v3834 = vsubq_s16(v3407, v3409); + int16x8_t v3835_tmp = vqrdmulhq_n_s16(v3834, 8231); + int16x8_t v3835 = vaddq_s16(v3835_tmp, v3834); + int16x8_t v3836 = vaddq_s16(v3833, v3835); + int16x8_t v3837 = vsubq_s16(v3393, v3395); + int16x8_t v3838 = vsubq_s16(v3397, v3399); + int16x8_t v3839_tmp = vqrdmulhq_n_s16(v3838, 9421); + int16x8_t v3839 = vaddq_s16(v3839_tmp, v3838); + int16x8_t v3840 = vaddq_s16(v3837, v3839); + int16x8_t v3841 = vsubq_s16(v3374, v3379); + int16x8_t v3842 = vsubq_s16(v3384, v3389); + int16x8_t v3843_tmp = vqrdmulhq_n_s16(v3842, 10690); + int16x8_t v3843 = vaddq_s16(v3843_tmp, v3842); + int16x8_t v3844 = vaddq_s16(v3841, v3843); + int16x8_t v3845 = vsubq_s16(v3352, v3357); + int16x8_t v3846 = vsubq_s16(v3362, v3367); + int16x8_t v3847_tmp = vqrdmulhq_n_s16(v3846, 12044); + int16x8_t v3847 = vaddq_s16(v3847_tmp, v3846); + int16x8_t v3848 = vaddq_s16(v3845, v3847); + int16x8_t v3849 = vsubq_s16(v3330, v3335); + int16x8_t v3850 = vsubq_s16(v3340, v3345); + int16x8_t v3851_tmp = vqrdmulhq_n_s16(v3850, 13493); + int16x8_t v3851 = vaddq_s16(v3851_tmp, v3850); + int16x8_t v3852 = vaddq_s16(v3849, v3851); + int16x8_t v3853 = vsubq_s16(v3308, v3313); + int16x8_t v3854 = vsubq_s16(v3318, v3323); + int16x8_t v3855_tmp = vqrdmulhq_n_s16(v3854, 15046); + int16x8_t v3855 = vaddq_s16(v3855_tmp, v3854); + int16x8_t v3856 = vaddq_s16(v3853, v3855); + int16x8_t v3857 = vsubq_s16(v3286, v3291); + int16x8_t v3858 = vsubq_s16(v3296, v3301); + int16x8_t v3859_tmp = vqrdmulhq_n_s16(v3858, 16715); + int16x8_t v3859 = vaddq_s16(v3859_tmp, v3858); + int16x8_t v3860 = vaddq_s16(v3857, v3859); + int16x8_t v3861 = vsubq_s16(v3264, v3269); + int16x8_t v3862 = vsubq_s16(v3274, v3279); + int16x8_t v3863_tmp = vqrdmulhq_n_s16(v3862, 18512); + int16x8_t v3863 = vaddq_s16(v3863_tmp, v3862); + int16x8_t v3864 = vaddq_s16(v3861, v3863); + int16x8_t v3865 = vsubq_s16(v3242, v3247); + int16x8_t v3866 = vsubq_s16(v3252, v3257); + int16x8_t v3867_tmp = vqrdmulhq_n_s16(v3866, 20453); + int16x8_t v3867 = vaddq_s16(v3867_tmp, v3866); + int16x8_t v3868 = vaddq_s16(v3865, v3867); + int16x8_t v3869 = vsubq_s16(v3220, v3225); + int16x8_t v3870 = vsubq_s16(v3230, v3235); + int16x8_t v3871_tmp = vqrdmulhq_n_s16(v3870, 22555); + int16x8_t v3871 = vaddq_s16(v3871_tmp, v3870); + int16x8_t v3872 = vaddq_s16(v3869, v3871); + int16x8_t v3873 = vsubq_s16(v3198, v3203); + int16x8_t v3874 = vsubq_s16(v3208, v3213); + int16x8_t v3875_tmp = vqrdmulhq_n_s16(v3874, 24839); + int16x8_t v3875 = vaddq_s16(v3875_tmp, v3874); + int16x8_t v3876 = vaddq_s16(v3873, v3875); + int16x8_t v3877 = vsubq_s16(v3176, v3181); + int16x8_t v3878 = vsubq_s16(v3186, v3191); + int16x8_t v3879_tmp = vqrdmulhq_n_s16(v3878, 27330); + int16x8_t v3879 = vaddq_s16(v3879_tmp, v3878); + int16x8_t v3880 = vaddq_s16(v3877, v3879); + int16x8_t v3881 = vsubq_s16(v3154, v3159); + int16x8_t v3882 = vsubq_s16(v3164, v3169); + int16x8_t v3883_tmp = vqrdmulhq_n_s16(v3882, 30056); + int16x8_t v3883 = vaddq_s16(v3883_tmp, v3882); + int16x8_t v3884 = vaddq_s16(v3881, v3883); + int16x8_t v3885 = vsubq_s16(v3132, v3137); + int16x8_t v3886 = vsubq_s16(v3142, v3147); + int16x8_t v3887_tmp = vqrdmulhq_n_s16(v3886, 282); + int16x8_t v3887 = vmlaq_n_s16(v3887_tmp, v3886, 2); + int16x8_t v3888 = vaddq_s16(v3885, v3887); + int16x8_t v3889 = vsubq_s16(v3110, v3115); + int16x8_t v3890 = vsubq_s16(v3120, v3125); + int16x8_t v3891_tmp = vqrdmulhq_n_s16(v3890, 3588); + int16x8_t v3891 = vmlaq_n_s16(v3891_tmp, v3890, 2); + int16x8_t v3892 = vaddq_s16(v3889, v3891); + int16x8_t v3893 = vsubq_s16(v3088, v3093); + int16x8_t v3894 = vsubq_s16(v3098, v3103); + int16x8_t v3895_tmp = vqrdmulhq_n_s16(v3894, 7255); + int16x8_t v3895 = vmlaq_n_s16(v3895_tmp, v3894, 2); + int16x8_t v3896 = vaddq_s16(v3893, v3895); + int16x8_t v3897 = vsubq_s16(v3066, v3071); + int16x8_t v3898 = vsubq_s16(v3076, v3081); + int16x8_t v3899_tmp = vqrdmulhq_n_s16(v3898, 11344); + int16x8_t v3899 = vmlaq_n_s16(v3899_tmp, v3898, 2); + int16x8_t v3900 = vaddq_s16(v3897, v3899); + int16x8_t v3901 = vsubq_s16(v3044, v3049); + int16x8_t v3902 = vsubq_s16(v3054, v3059); + int16x8_t v3903_tmp = vqrdmulhq_n_s16(v3902, 15934); + int16x8_t v3903 = vmlaq_n_s16(v3903_tmp, v3902, 2); + int16x8_t v3904 = vaddq_s16(v3901, v3903); + int16x8_t v3905 = vsubq_s16(v3004, v3015); + int16x8_t v3906 = vsubq_s16(v3026, v3037); + int16x8_t v3907_tmp = vqrdmulhq_n_s16(v3906, 21120); + int16x8_t v3907 = vmlaq_n_s16(v3907_tmp, v3906, 2); + int16x8_t v3908 = vaddq_s16(v3905, v3907); + int16x8_t v3909 = vsubq_s16(v2958, v2969); + int16x8_t v3910 = vsubq_s16(v2980, v2991); + int16x8_t v3911_tmp = vqrdmulhq_n_s16(v3910, 27027); + int16x8_t v3911 = vmlaq_n_s16(v3911_tmp, v3910, 2); + int16x8_t v3912 = vaddq_s16(v3909, v3911); + int16x8_t v3913 = vsubq_s16(v2912, v2923); + int16x8_t v3914 = vsubq_s16(v2934, v2945); + int16x8_t v3915_tmp = vqrdmulhq_n_s16(v3914, 1045); + int16x8_t v3915 = vmlaq_n_s16(v3915_tmp, v3914, 3); + int16x8_t v3916 = vaddq_s16(v3913, v3915); + int16x8_t v3917 = vsubq_s16(v2866, v2877); + int16x8_t v3918 = vsubq_s16(v2888, v2899); + int16x8_t v3919_tmp = vqrdmulhq_n_s16(v3918, 8923); + int16x8_t v3919 = vmlaq_n_s16(v3919_tmp, v3918, 3); + int16x8_t v3920 = vaddq_s16(v3917, v3919); + int16x8_t v3921 = vsubq_s16(v2820, v2831); + int16x8_t v3922 = vsubq_s16(v2842, v2853); + int16x8_t v3923_tmp = vqrdmulhq_n_s16(v3922, 18177); + int16x8_t v3923 = vmlaq_n_s16(v3923_tmp, v3922, 3); + int16x8_t v3924 = vaddq_s16(v3921, v3923); + int16x8_t v3925 = vsubq_s16(v2774, v2785); + int16x8_t v3926 = vsubq_s16(v2796, v2807); + int16x8_t v3927_tmp = vqrdmulhq_n_s16(v3926, 29200); + int16x8_t v3927 = vmlaq_n_s16(v3927_tmp, v3926, 3); + int16x8_t v3928 = vaddq_s16(v3925, v3927); + int16x8_t v3929 = vsubq_s16(v2728, v2739); + int16x8_t v3930 = vsubq_s16(v2750, v2761); + int16x8_t v3931_tmp = vqrdmulhq_n_s16(v3930, 9782); + int16x8_t v3931 = vmlaq_n_s16(v3931_tmp, v3930, 4); + int16x8_t v3932 = vaddq_s16(v3929, v3931); + int16x8_t v3933 = vsubq_s16(v2682, v2693); + int16x8_t v3934 = vsubq_s16(v2704, v2715); + int16x8_t v3935_tmp = vqrdmulhq_n_s16(v3934, 26282); + int16x8_t v3935 = vmlaq_n_s16(v3935_tmp, v3934, 4); + int16x8_t v3936 = vaddq_s16(v3933, v3935); + int16x8_t v3937 = vsubq_s16(v2600, v2623); + int16x8_t v3938 = vsubq_s16(v2646, v2669); + int16x8_t v3939_tmp = vqrdmulhq_n_s16(v3938, 14423); + int16x8_t v3939 = vmlaq_n_s16(v3939_tmp, v3938, 5); + int16x8_t v3940 = vaddq_s16(v3937, v3939); + int16x8_t v3941 = vsubq_s16(v2506, v2529); + int16x8_t v3942 = vsubq_s16(v2552, v2575); + int16x8_t v3943_tmp = vqrdmulhq_n_s16(v3942, 9008); + int16x8_t v3943 = vmlaq_n_s16(v3943_tmp, v3942, 6); + int16x8_t v3944 = vaddq_s16(v3941, v3943); + int16x8_t v3945 = vsubq_s16(v2411, v2434); + int16x8_t v3946 = vsubq_s16(v2457, v2481); + int16x8_t v3947_tmp = vqrdmulhq_n_s16(v3946, 13552); + int16x8_t v3947 = vmlaq_n_s16(v3947_tmp, v3946, 7); + int16x8_t v3948 = vaddq_s16(v3945, v3947); + int16x8_t v3949 = vsubq_s16(v2317, v2340); + int16x8_t v3950 = vsubq_s16(v2363, v2386); + int16x8_t v3951_tmp = vqrdmulhq_n_s16(v3950, 1925); + int16x8_t v3951 = vmlaq_n_s16(v3951_tmp, v3950, 9); + int16x8_t v3952 = vaddq_s16(v3949, v3951); + int16x8_t v3953 = vsubq_s16(v2151, v2198); + int16x8_t v3954 = vsubq_s16(v2245, v2292); + int16x8_t v3955_tmp = vqrdmulhq_n_s16(v3954, 21123); + int16x8_t v3955 = vmlaq_n_s16(v3955_tmp, v3954, 11); + int16x8_t v3956 = vaddq_s16(v3953, v3955); + int16x8_t v3957 = vsubq_s16(v1961, v2008); + int16x8_t v3958 = vsubq_s16(v2055, v2102); + int16x8_t v3959_tmp = vqrdmulhq_n_s16(v3958, 9831); + int16x8_t v3959 = vmlaq_n_s16(v3959_tmp, v3958, 16); + int16x8_t v3960 = vaddq_s16(v3957, v3959); + int16x8_t v3961 = vsubq_s16(v1627, v1722); + int16x8_t v3962 = vsubq_s16(v1817, v1912); + int16x8_t v3963_tmp = vqrdmulhq_n_s16(v3962, 5373); + int16x8_t v3963 = vmlaq_n_s16(v3963_tmp, v3962, 27); + int16x8_t v3964 = vaddq_s16(v3961, v3963); + int16x8_t v3965 = vsubq_s16(v317, v700); + int16x8_t v3966 = vsubq_s16(v1146, v1530); + int16x8_t v3967_tmp = vqrdmulhq_n_s16(v3966, 15986); + int16x8_t v3967 = vmlaq_n_s16(v3967_tmp, v3966, 81); + int16x8_t v3968 = vaddq_s16(v3965, v3967); + int16x8_t v3969 = vsubq_s16(v3965, v3967); + int16x8_t v3970 = vsubq_s16(v3961, v3963); + int16x8_t v3971 = vsubq_s16(v3957, v3959); + int16x8_t v3972 = vsubq_s16(v3953, v3955); + int16x8_t v3973 = vsubq_s16(v3949, v3951); + int16x8_t v3974 = vsubq_s16(v3945, v3947); + int16x8_t v3975 = vsubq_s16(v3941, v3943); + int16x8_t v3976 = vsubq_s16(v3937, v3939); + int16x8_t v3977 = vsubq_s16(v3933, v3935); + int16x8_t v3978 = vsubq_s16(v3929, v3931); + int16x8_t v3979 = vsubq_s16(v3925, v3927); + int16x8_t v3980 = vsubq_s16(v3921, v3923); + int16x8_t v3981 = vsubq_s16(v3917, v3919); + int16x8_t v3982 = vsubq_s16(v3913, v3915); + int16x8_t v3983 = vsubq_s16(v3909, v3911); + int16x8_t v3984 = vsubq_s16(v3905, v3907); + int16x8_t v3985 = vsubq_s16(v3901, v3903); + int16x8_t v3986 = vsubq_s16(v3897, v3899); + int16x8_t v3987 = vsubq_s16(v3893, v3895); + int16x8_t v3988 = vsubq_s16(v3889, v3891); + int16x8_t v3989 = vsubq_s16(v3885, v3887); + int16x8_t v3990 = vsubq_s16(v3881, v3883); + int16x8_t v3991 = vsubq_s16(v3877, v3879); + int16x8_t v3992 = vsubq_s16(v3873, v3875); + int16x8_t v3993 = vsubq_s16(v3869, v3871); + int16x8_t v3994 = vsubq_s16(v3865, v3867); + int16x8_t v3995 = vsubq_s16(v3861, v3863); + int16x8_t v3996 = vsubq_s16(v3857, v3859); + int16x8_t v3997 = vsubq_s16(v3853, v3855); + int16x8_t v3998 = vsubq_s16(v3849, v3851); + int16x8_t v3999 = vsubq_s16(v3845, v3847); + int16x8_t v4000 = vsubq_s16(v3841, v3843); + int16x8_t v4001 = vsubq_s16(v3837, v3839); + int16x8_t v4002 = vsubq_s16(v3833, v3835); + int16x8_t v4003 = vsubq_s16(v3829, v3831); + int16x8_t v4004 = vsubq_s16(v3825, v3827); + int16x8_t v4005 = vsubq_s16(v3821, v3823); + int16x8_t v4006 = vsubq_s16(v3817, v3819); + int16x8_t v4007 = vsubq_s16(v3813, v3815); + int16x8_t v4008 = vsubq_s16(v3809, v3811); + int16x8_t v4009 = vsubq_s16(v3805, v3807); + int16x8_t v4010 = vsubq_s16(v3801, v3803); + int16x8_t v4011 = vsubq_s16(v3797, v3799); + int16x8_t v4012 = vsubq_s16(v3793, v3795); + int16x8_t v4013 = vsubq_s16(v3789, v3791); + int16x8_t v4014 = vsubq_s16(v3785, v3787); + int16x8_t v4015 = vsubq_s16(v3781, v3783); + int16x8_t v4016 = vsubq_s16(v3777, v3779); + int16x8_t v4017 = vsubq_s16(v3773, v3775); + int16x8_t v4018 = vsubq_s16(v3769, v3771); + int16x8_t v4019 = vsubq_s16(v3765, v3767); + int16x8_t v4020 = vsubq_s16(v3761, v3763); + int16x8_t v4021 = vsubq_s16(v3757, v3759); + int16x8_t v4022 = vsubq_s16(v3753, v3755); + int16x8_t v4023 = vsubq_s16(v3749, v3751); + int16x8_t v4024 = vsubq_s16(v3745, v3747); + int16x8_t v4025 = vsubq_s16(v3741, v3743); + int16x8_t v4026 = vsubq_s16(v3737, v3739); + int16x8_t v4027 = vsubq_s16(v3733, v3735); + int16x8_t v4028 = vsubq_s16(v3729, v3731); + int16x8_t v4029 = vsubq_s16(v3725, v3727); + int16x8_t v4030 = vsubq_s16(v3721, v3723); + int16x8_t v4031 = vsubq_s16(v3717, v3719); + int16x8_t v4032 = vsubq_s16(v3713, v3715); + int16x8_t v4033 = vsubq_s16(v3706, v3711); + int16x8_t v4034 = vsubq_s16(v3696, v3701); + int16x8_t v4035 = vsubq_s16(v3686, v3691); + int16x8_t v4036 = vsubq_s16(v3676, v3681); + int16x8_t v4037 = vsubq_s16(v3666, v3671); + int16x8_t v4038 = vsubq_s16(v3656, v3661); + int16x8_t v4039 = vsubq_s16(v3646, v3651); + int16x8_t v4040 = vsubq_s16(v3636, v3641); + int16x8_t v4041 = vsubq_s16(v3626, v3631); + int16x8_t v4042 = vsubq_s16(v3616, v3621); + int16x8_t v4043 = vsubq_s16(v3606, v3611); + int16x8_t v4044 = vsubq_s16(v3596, v3601); + int16x8_t v4045 = vsubq_s16(v3586, v3591); + int16x8_t v4046 = vsubq_s16(v3576, v3581); + int16x8_t v4047 = vsubq_s16(v3566, v3571); + int16x8_t v4048 = vsubq_s16(v3556, v3561); + int16x8_t v4049 = vsubq_s16(v3546, v3551); + int16x8_t v4050 = vsubq_s16(v3536, v3541); + int16x8_t v4051 = vsubq_s16(v3526, v3531); + int16x8_t v4052 = vsubq_s16(v3516, v3521); + int16x8_t v4053 = vsubq_s16(v3506, v3511); + int16x8_t v4054 = vsubq_s16(v3496, v3501); + int16x8_t v4055 = vsubq_s16(v3486, v3491); + int16x8_t v4056 = vsubq_s16(v3476, v3481); + int16x8_t v4057 = vsubq_s16(v3466, v3471); + int16x8_t v4058 = vsubq_s16(v3456, v3461); + int16x8_t v4059 = vsubq_s16(v3446, v3451); + int16x8_t v4060 = vsubq_s16(v3436, v3441); + int16x8_t v4061 = vsubq_s16(v3426, v3431); + int16x8_t v4062 = vsubq_s16(v3416, v3421); + int16x8_t v4063 = vsubq_s16(v3406, v3411); + int16x8_t v4064 = vsubq_s16(v3396, v3401); + int16x8_t v4065 = vsubq_s16(v3380, v3391); + int16x8_t v4066 = vsubq_s16(v3358, v3369); + int16x8_t v4067 = vsubq_s16(v3336, v3347); + int16x8_t v4068 = vsubq_s16(v3314, v3325); + int16x8_t v4069 = vsubq_s16(v3292, v3303); + int16x8_t v4070 = vsubq_s16(v3270, v3281); + int16x8_t v4071 = vsubq_s16(v3248, v3259); + int16x8_t v4072 = vsubq_s16(v3226, v3237); + int16x8_t v4073 = vsubq_s16(v3204, v3215); + int16x8_t v4074 = vsubq_s16(v3182, v3193); + int16x8_t v4075 = vsubq_s16(v3160, v3171); + int16x8_t v4076 = vsubq_s16(v3138, v3149); + int16x8_t v4077 = vsubq_s16(v3116, v3127); + int16x8_t v4078 = vsubq_s16(v3094, v3105); + int16x8_t v4079 = vsubq_s16(v3072, v3083); + int16x8_t v4080 = vsubq_s16(v3050, v3061); + int16x8_t v4081 = vsubq_s16(v3016, v3039); + int16x8_t v4082 = vsubq_s16(v2970, v2993); + int16x8_t v4083 = vsubq_s16(v2924, v2947); + int16x8_t v4084 = vsubq_s16(v2878, v2901); + int16x8_t v4085 = vsubq_s16(v2832, v2855); + int16x8_t v4086 = vsubq_s16(v2786, v2809); + int16x8_t v4087 = vsubq_s16(v2740, v2763); + int16x8_t v4088 = vsubq_s16(v2694, v2717); + int16x8_t v4089 = vsubq_s16(v2624, v2671); + int16x8_t v4090 = vsubq_s16(v2530, v2577); + int16x8_t v4091 = vsubq_s16(v2435, v2483); + int16x8_t v4092 = vsubq_s16(v2341, v2388); + int16x8_t v4093 = vsubq_s16(v2199, v2294); + int16x8_t v4094 = vsubq_s16(v2009, v2104); + int16x8_t v4095 = vsubq_s16(v1723, v1914); + int16x8_t v4096 = vsubq_s16(v701, v1532); + vst1q_s16(out + out_stride * 0 + i, v1533); + vst1q_s16(out + out_stride * 1 + i, v1915); + vst1q_s16(out + out_stride * 2 + i, v2105); + vst1q_s16(out + out_stride * 3 + i, v2295); + vst1q_s16(out + out_stride * 4 + i, v2389); + vst1q_s16(out + out_stride * 5 + i, v2484); + vst1q_s16(out + out_stride * 6 + i, v2578); + vst1q_s16(out + out_stride * 7 + i, v2672); + vst1q_s16(out + out_stride * 8 + i, v2718); + vst1q_s16(out + out_stride * 9 + i, v2764); + vst1q_s16(out + out_stride * 10 + i, v2810); + vst1q_s16(out + out_stride * 11 + i, v2856); + vst1q_s16(out + out_stride * 12 + i, v2902); + vst1q_s16(out + out_stride * 13 + i, v2948); + vst1q_s16(out + out_stride * 14 + i, v2994); + vst1q_s16(out + out_stride * 15 + i, v3040); + vst1q_s16(out + out_stride * 16 + i, v3062); + vst1q_s16(out + out_stride * 17 + i, v3084); + vst1q_s16(out + out_stride * 18 + i, v3106); + vst1q_s16(out + out_stride * 19 + i, v3128); + vst1q_s16(out + out_stride * 20 + i, v3150); + vst1q_s16(out + out_stride * 21 + i, v3172); + vst1q_s16(out + out_stride * 22 + i, v3194); + vst1q_s16(out + out_stride * 23 + i, v3216); + vst1q_s16(out + out_stride * 24 + i, v3238); + vst1q_s16(out + out_stride * 25 + i, v3260); + vst1q_s16(out + out_stride * 26 + i, v3282); + vst1q_s16(out + out_stride * 27 + i, v3304); + vst1q_s16(out + out_stride * 28 + i, v3326); + vst1q_s16(out + out_stride * 29 + i, v3348); + vst1q_s16(out + out_stride * 30 + i, v3370); + vst1q_s16(out + out_stride * 31 + i, v3392); + vst1q_s16(out + out_stride * 32 + i, v3402); + vst1q_s16(out + out_stride * 33 + i, v3412); + vst1q_s16(out + out_stride * 34 + i, v3422); + vst1q_s16(out + out_stride * 35 + i, v3432); + vst1q_s16(out + out_stride * 36 + i, v3442); + vst1q_s16(out + out_stride * 37 + i, v3452); + vst1q_s16(out + out_stride * 38 + i, v3462); + vst1q_s16(out + out_stride * 39 + i, v3472); + vst1q_s16(out + out_stride * 40 + i, v3482); + vst1q_s16(out + out_stride * 41 + i, v3492); + vst1q_s16(out + out_stride * 42 + i, v3502); + vst1q_s16(out + out_stride * 43 + i, v3512); + vst1q_s16(out + out_stride * 44 + i, v3522); + vst1q_s16(out + out_stride * 45 + i, v3532); + vst1q_s16(out + out_stride * 46 + i, v3542); + vst1q_s16(out + out_stride * 47 + i, v3552); + vst1q_s16(out + out_stride * 48 + i, v3562); + vst1q_s16(out + out_stride * 49 + i, v3572); + vst1q_s16(out + out_stride * 50 + i, v3582); + vst1q_s16(out + out_stride * 51 + i, v3592); + vst1q_s16(out + out_stride * 52 + i, v3602); + vst1q_s16(out + out_stride * 53 + i, v3612); + vst1q_s16(out + out_stride * 54 + i, v3622); + vst1q_s16(out + out_stride * 55 + i, v3632); + vst1q_s16(out + out_stride * 56 + i, v3642); + vst1q_s16(out + out_stride * 57 + i, v3652); + vst1q_s16(out + out_stride * 58 + i, v3662); + vst1q_s16(out + out_stride * 59 + i, v3672); + vst1q_s16(out + out_stride * 60 + i, v3682); + vst1q_s16(out + out_stride * 61 + i, v3692); + vst1q_s16(out + out_stride * 62 + i, v3702); + vst1q_s16(out + out_stride * 63 + i, v3712); + vst1q_s16(out + out_stride * 64 + i, v3716); + vst1q_s16(out + out_stride * 65 + i, v3720); + vst1q_s16(out + out_stride * 66 + i, v3724); + vst1q_s16(out + out_stride * 67 + i, v3728); + vst1q_s16(out + out_stride * 68 + i, v3732); + vst1q_s16(out + out_stride * 69 + i, v3736); + vst1q_s16(out + out_stride * 70 + i, v3740); + vst1q_s16(out + out_stride * 71 + i, v3744); + vst1q_s16(out + out_stride * 72 + i, v3748); + vst1q_s16(out + out_stride * 73 + i, v3752); + vst1q_s16(out + out_stride * 74 + i, v3756); + vst1q_s16(out + out_stride * 75 + i, v3760); + vst1q_s16(out + out_stride * 76 + i, v3764); + vst1q_s16(out + out_stride * 77 + i, v3768); + vst1q_s16(out + out_stride * 78 + i, v3772); + vst1q_s16(out + out_stride * 79 + i, v3776); + vst1q_s16(out + out_stride * 80 + i, v3780); + vst1q_s16(out + out_stride * 81 + i, v3784); + vst1q_s16(out + out_stride * 82 + i, v3788); + vst1q_s16(out + out_stride * 83 + i, v3792); + vst1q_s16(out + out_stride * 84 + i, v3796); + vst1q_s16(out + out_stride * 85 + i, v3800); + vst1q_s16(out + out_stride * 86 + i, v3804); + vst1q_s16(out + out_stride * 87 + i, v3808); + vst1q_s16(out + out_stride * 88 + i, v3812); + vst1q_s16(out + out_stride * 89 + i, v3816); + vst1q_s16(out + out_stride * 90 + i, v3820); + vst1q_s16(out + out_stride * 91 + i, v3824); + vst1q_s16(out + out_stride * 92 + i, v3828); + vst1q_s16(out + out_stride * 93 + i, v3832); + vst1q_s16(out + out_stride * 94 + i, v3836); + vst1q_s16(out + out_stride * 95 + i, v3840); + vst1q_s16(out + out_stride * 96 + i, v3844); + vst1q_s16(out + out_stride * 97 + i, v3848); + vst1q_s16(out + out_stride * 98 + i, v3852); + vst1q_s16(out + out_stride * 99 + i, v3856); + vst1q_s16(out + out_stride * 100 + i, v3860); + vst1q_s16(out + out_stride * 101 + i, v3864); + vst1q_s16(out + out_stride * 102 + i, v3868); + vst1q_s16(out + out_stride * 103 + i, v3872); + vst1q_s16(out + out_stride * 104 + i, v3876); + vst1q_s16(out + out_stride * 105 + i, v3880); + vst1q_s16(out + out_stride * 106 + i, v3884); + vst1q_s16(out + out_stride * 107 + i, v3888); + vst1q_s16(out + out_stride * 108 + i, v3892); + vst1q_s16(out + out_stride * 109 + i, v3896); + vst1q_s16(out + out_stride * 110 + i, v3900); + vst1q_s16(out + out_stride * 111 + i, v3904); + vst1q_s16(out + out_stride * 112 + i, v3908); + vst1q_s16(out + out_stride * 113 + i, v3912); + vst1q_s16(out + out_stride * 114 + i, v3916); + vst1q_s16(out + out_stride * 115 + i, v3920); + vst1q_s16(out + out_stride * 116 + i, v3924); + vst1q_s16(out + out_stride * 117 + i, v3928); + vst1q_s16(out + out_stride * 118 + i, v3932); + vst1q_s16(out + out_stride * 119 + i, v3936); + vst1q_s16(out + out_stride * 120 + i, v3940); + vst1q_s16(out + out_stride * 121 + i, v3944); + vst1q_s16(out + out_stride * 122 + i, v3948); + vst1q_s16(out + out_stride * 123 + i, v3952); + vst1q_s16(out + out_stride * 124 + i, v3956); + vst1q_s16(out + out_stride * 125 + i, v3960); + vst1q_s16(out + out_stride * 126 + i, v3964); + vst1q_s16(out + out_stride * 127 + i, v3968); + vst1q_s16(out + out_stride * 128 + i, v3969); + vst1q_s16(out + out_stride * 129 + i, v3970); + vst1q_s16(out + out_stride * 130 + i, v3971); + vst1q_s16(out + out_stride * 131 + i, v3972); + vst1q_s16(out + out_stride * 132 + i, v3973); + vst1q_s16(out + out_stride * 133 + i, v3974); + vst1q_s16(out + out_stride * 134 + i, v3975); + vst1q_s16(out + out_stride * 135 + i, v3976); + vst1q_s16(out + out_stride * 136 + i, v3977); + vst1q_s16(out + out_stride * 137 + i, v3978); + vst1q_s16(out + out_stride * 138 + i, v3979); + vst1q_s16(out + out_stride * 139 + i, v3980); + vst1q_s16(out + out_stride * 140 + i, v3981); + vst1q_s16(out + out_stride * 141 + i, v3982); + vst1q_s16(out + out_stride * 142 + i, v3983); + vst1q_s16(out + out_stride * 143 + i, v3984); + vst1q_s16(out + out_stride * 144 + i, v3985); + vst1q_s16(out + out_stride * 145 + i, v3986); + vst1q_s16(out + out_stride * 146 + i, v3987); + vst1q_s16(out + out_stride * 147 + i, v3988); + vst1q_s16(out + out_stride * 148 + i, v3989); + vst1q_s16(out + out_stride * 149 + i, v3990); + vst1q_s16(out + out_stride * 150 + i, v3991); + vst1q_s16(out + out_stride * 151 + i, v3992); + vst1q_s16(out + out_stride * 152 + i, v3993); + vst1q_s16(out + out_stride * 153 + i, v3994); + vst1q_s16(out + out_stride * 154 + i, v3995); + vst1q_s16(out + out_stride * 155 + i, v3996); + vst1q_s16(out + out_stride * 156 + i, v3997); + vst1q_s16(out + out_stride * 157 + i, v3998); + vst1q_s16(out + out_stride * 158 + i, v3999); + vst1q_s16(out + out_stride * 159 + i, v4000); + vst1q_s16(out + out_stride * 160 + i, v4001); + vst1q_s16(out + out_stride * 161 + i, v4002); + vst1q_s16(out + out_stride * 162 + i, v4003); + vst1q_s16(out + out_stride * 163 + i, v4004); + vst1q_s16(out + out_stride * 164 + i, v4005); + vst1q_s16(out + out_stride * 165 + i, v4006); + vst1q_s16(out + out_stride * 166 + i, v4007); + vst1q_s16(out + out_stride * 167 + i, v4008); + vst1q_s16(out + out_stride * 168 + i, v4009); + vst1q_s16(out + out_stride * 169 + i, v4010); + vst1q_s16(out + out_stride * 170 + i, v4011); + vst1q_s16(out + out_stride * 171 + i, v4012); + vst1q_s16(out + out_stride * 172 + i, v4013); + vst1q_s16(out + out_stride * 173 + i, v4014); + vst1q_s16(out + out_stride * 174 + i, v4015); + vst1q_s16(out + out_stride * 175 + i, v4016); + vst1q_s16(out + out_stride * 176 + i, v4017); + vst1q_s16(out + out_stride * 177 + i, v4018); + vst1q_s16(out + out_stride * 178 + i, v4019); + vst1q_s16(out + out_stride * 179 + i, v4020); + vst1q_s16(out + out_stride * 180 + i, v4021); + vst1q_s16(out + out_stride * 181 + i, v4022); + vst1q_s16(out + out_stride * 182 + i, v4023); + vst1q_s16(out + out_stride * 183 + i, v4024); + vst1q_s16(out + out_stride * 184 + i, v4025); + vst1q_s16(out + out_stride * 185 + i, v4026); + vst1q_s16(out + out_stride * 186 + i, v4027); + vst1q_s16(out + out_stride * 187 + i, v4028); + vst1q_s16(out + out_stride * 188 + i, v4029); + vst1q_s16(out + out_stride * 189 + i, v4030); + vst1q_s16(out + out_stride * 190 + i, v4031); + vst1q_s16(out + out_stride * 191 + i, v4032); + vst1q_s16(out + out_stride * 192 + i, v4033); + vst1q_s16(out + out_stride * 193 + i, v4034); + vst1q_s16(out + out_stride * 194 + i, v4035); + vst1q_s16(out + out_stride * 195 + i, v4036); + vst1q_s16(out + out_stride * 196 + i, v4037); + vst1q_s16(out + out_stride * 197 + i, v4038); + vst1q_s16(out + out_stride * 198 + i, v4039); + vst1q_s16(out + out_stride * 199 + i, v4040); + vst1q_s16(out + out_stride * 200 + i, v4041); + vst1q_s16(out + out_stride * 201 + i, v4042); + vst1q_s16(out + out_stride * 202 + i, v4043); + vst1q_s16(out + out_stride * 203 + i, v4044); + vst1q_s16(out + out_stride * 204 + i, v4045); + vst1q_s16(out + out_stride * 205 + i, v4046); + vst1q_s16(out + out_stride * 206 + i, v4047); + vst1q_s16(out + out_stride * 207 + i, v4048); + vst1q_s16(out + out_stride * 208 + i, v4049); + vst1q_s16(out + out_stride * 209 + i, v4050); + vst1q_s16(out + out_stride * 210 + i, v4051); + vst1q_s16(out + out_stride * 211 + i, v4052); + vst1q_s16(out + out_stride * 212 + i, v4053); + vst1q_s16(out + out_stride * 213 + i, v4054); + vst1q_s16(out + out_stride * 214 + i, v4055); + vst1q_s16(out + out_stride * 215 + i, v4056); + vst1q_s16(out + out_stride * 216 + i, v4057); + vst1q_s16(out + out_stride * 217 + i, v4058); + vst1q_s16(out + out_stride * 218 + i, v4059); + vst1q_s16(out + out_stride * 219 + i, v4060); + vst1q_s16(out + out_stride * 220 + i, v4061); + vst1q_s16(out + out_stride * 221 + i, v4062); + vst1q_s16(out + out_stride * 222 + i, v4063); + vst1q_s16(out + out_stride * 223 + i, v4064); + vst1q_s16(out + out_stride * 224 + i, v4065); + vst1q_s16(out + out_stride * 225 + i, v4066); + vst1q_s16(out + out_stride * 226 + i, v4067); + vst1q_s16(out + out_stride * 227 + i, v4068); + vst1q_s16(out + out_stride * 228 + i, v4069); + vst1q_s16(out + out_stride * 229 + i, v4070); + vst1q_s16(out + out_stride * 230 + i, v4071); + vst1q_s16(out + out_stride * 231 + i, v4072); + vst1q_s16(out + out_stride * 232 + i, v4073); + vst1q_s16(out + out_stride * 233 + i, v4074); + vst1q_s16(out + out_stride * 234 + i, v4075); + vst1q_s16(out + out_stride * 235 + i, v4076); + vst1q_s16(out + out_stride * 236 + i, v4077); + vst1q_s16(out + out_stride * 237 + i, v4078); + vst1q_s16(out + out_stride * 238 + i, v4079); + vst1q_s16(out + out_stride * 239 + i, v4080); + vst1q_s16(out + out_stride * 240 + i, v4081); + vst1q_s16(out + out_stride * 241 + i, v4082); + vst1q_s16(out + out_stride * 242 + i, v4083); + vst1q_s16(out + out_stride * 243 + i, v4084); + vst1q_s16(out + out_stride * 244 + i, v4085); + vst1q_s16(out + out_stride * 245 + i, v4086); + vst1q_s16(out + out_stride * 246 + i, v4087); + vst1q_s16(out + out_stride * 247 + i, v4088); + vst1q_s16(out + out_stride * 248 + i, v4089); + vst1q_s16(out + out_stride * 249 + i, v4090); + vst1q_s16(out + out_stride * 250 + i, v4091); + vst1q_s16(out + out_stride * 251 + i, v4092); + vst1q_s16(out + out_stride * 252 + i, v4093); + vst1q_s16(out + out_stride * 253 + i, v4094); + vst1q_s16(out + out_stride * 254 + i, v4095); + vst1q_s16(out + out_stride * 255 + i, v4096); + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct32-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct32-inl.h new file mode 100644 index 0000000000..0f3b31cfea --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct32-inl.h @@ -0,0 +1,419 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* This file is automatically generated. Do not modify it directly. */ +#if HWY_TARGET != HWY_NEON +#error "only include this file from fast_dct-inl.h" +#endif + +constexpr size_t FastIDCTIntegerBits(FastDCTTag<32>) { return 1; } + +void FastIDCT(FastDCTTag<32>, const int16_t* in, size_t in_stride, int16_t* out, + size_t out_stride, size_t count) { + JXL_ASSERT(count % 8 == 0); + for (size_t i = 0; i < count; i += 8) { + int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i); + int16x8_t v1 = vld1q_s16(in + in_stride * 16 + i); + int16x8_t v2 = vaddq_s16(v0, v1); + int16x8_t v3 = vld1q_s16(in + in_stride * 8 + i); + int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573); + int16x8_t v4 = vaddq_s16(v4_tmp, v3); + int16x8_t v5 = vld1q_s16(in + in_stride * 24 + i); + int16x8_t v6 = vaddq_s16(v5, v3); + int16x8_t v7 = vaddq_s16(v4, v6); + int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734); + int16x8_t v9 = vaddq_s16(v2, v8); + int16x8_t v10 = vld1q_s16(in + in_stride * 4 + i); + int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573); + int16x8_t v11 = vaddq_s16(v11_tmp, v10); + int16x8_t v12 = vld1q_s16(in + in_stride * 20 + i); + int16x8_t v13 = vld1q_s16(in + in_stride * 12 + i); + int16x8_t v14 = vaddq_s16(v12, v13); + int16x8_t v15 = vaddq_s16(v11, v14); + int16x8_t v16 = vld1q_s16(in + in_stride * 28 + i); + int16x8_t v17 = vaddq_s16(v16, v12); + int16x8_t v18 = vaddq_s16(v13, v10); + int16x8_t v19 = vaddq_s16(v17, v18); + int16x8_t v20 = vqrdmulhq_n_s16(v19, 17734); + int16x8_t v21 = vqrdmulhq_n_s16(v18, 25080); + int16x8_t v22 = vaddq_s16(v20, v21); + int16x8_t v23 = vaddq_s16(v15, v22); + int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705); + int16x8_t v25 = vaddq_s16(v9, v24); + int16x8_t v26 = vld1q_s16(in + in_stride * 2 + i); + int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573); + int16x8_t v27 = vaddq_s16(v27_tmp, v26); + int16x8_t v28 = vld1q_s16(in + in_stride * 18 + i); + int16x8_t v29 = vld1q_s16(in + in_stride * 14 + i); + int16x8_t v30 = vaddq_s16(v28, v29); + int16x8_t v31 = vaddq_s16(v27, v30); + int16x8_t v32 = vld1q_s16(in + in_stride * 10 + i); + int16x8_t v33 = vld1q_s16(in + in_stride * 6 + i); + int16x8_t v34 = vaddq_s16(v32, v33); + int16x8_t v35 = vqrdmulhq_n_s16(v34, 25080); + int16x8_t v36 = vld1q_s16(in + in_stride * 26 + i); + int16x8_t v37 = vld1q_s16(in + in_stride * 22 + i); + int16x8_t v38 = vaddq_s16(v36, v37); + int16x8_t v39 = vaddq_s16(v38, v34); + int16x8_t v40 = vqrdmulhq_n_s16(v39, 17734); + int16x8_t v41 = vaddq_s16(v35, v40); + int16x8_t v42 = vaddq_s16(v31, v41); + int16x8_t v43 = vaddq_s16(v33, v26); + int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573); + int16x8_t v44 = vaddq_s16(v44_tmp, v43); + int16x8_t v45 = vaddq_s16(v29, v32); + int16x8_t v46 = vaddq_s16(v37, v28); + int16x8_t v47 = vaddq_s16(v45, v46); + int16x8_t v48 = vaddq_s16(v44, v47); + int16x8_t v49 = vqrdmulhq_n_s16(v48, 16705); + int16x8_t v50 = vld1q_s16(in + in_stride * 30 + i); + int16x8_t v51 = vaddq_s16(v50, v36); + int16x8_t v52 = vaddq_s16(v51, v46); + int16x8_t v53 = vqrdmulhq_n_s16(v52, 17734); + int16x8_t v54 = vaddq_s16(v45, v43); + int16x8_t v55_tmp = vqrdmulhq_n_s16(v54, 10045); + int16x8_t v55 = vaddq_s16(v55_tmp, v54); + int16x8_t v56 = vaddq_s16(v53, v55); + int16x8_t v57 = vqrdmulhq_n_s16(v56, 16705); + int16x8_t v58 = vaddq_s16(v49, v57); + int16x8_t v59 = vaddq_s16(v42, v58); + int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463); + int16x8_t v61 = vaddq_s16(v25, v60); + int16x8_t v62 = vld1q_s16(in + in_stride * 13 + i); + int16x8_t v63 = vld1q_s16(in + in_stride * 11 + i); + int16x8_t v64 = vaddq_s16(v62, v63); + int16x8_t v65 = vld1q_s16(in + in_stride * 5 + i); + int16x8_t v66 = vld1q_s16(in + in_stride * 3 + i); + int16x8_t v67 = vaddq_s16(v65, v66); + int16x8_t v68 = vaddq_s16(v64, v67); + int16x8_t v69_tmp = vqrdmulhq_n_s16(v68, 10045); + int16x8_t v69 = vaddq_s16(v69_tmp, v68); + int16x8_t v70 = vld1q_s16(in + in_stride * 21 + i); + int16x8_t v71 = vld1q_s16(in + in_stride * 19 + i); + int16x8_t v72 = vaddq_s16(v70, v71); + int16x8_t v73 = vld1q_s16(in + in_stride * 29 + i); + int16x8_t v74 = vld1q_s16(in + in_stride * 27 + i); + int16x8_t v75 = vaddq_s16(v73, v74); + int16x8_t v76 = vaddq_s16(v72, v75); + int16x8_t v77 = vqrdmulhq_n_s16(v76, 17734); + int16x8_t v78 = vaddq_s16(v69, v77); + int16x8_t v79 = vqrdmulhq_n_s16(v78, 16705); + int16x8_t v80_tmp = vqrdmulhq_n_s16(v67, 13573); + int16x8_t v80 = vaddq_s16(v80_tmp, v67); + int16x8_t v81 = vaddq_s16(v64, v72); + int16x8_t v82 = vaddq_s16(v80, v81); + int16x8_t v83 = vqrdmulhq_n_s16(v82, 16705); + int16x8_t v84 = vaddq_s16(v79, v83); + int16x8_t v85 = vld1q_s16(in + in_stride * 1 + i); + int16x8_t v86_tmp = vqrdmulhq_n_s16(v85, 13573); + int16x8_t v86 = vaddq_s16(v86_tmp, v85); + int16x8_t v87 = vld1q_s16(in + in_stride * 17 + i); + int16x8_t v88 = vld1q_s16(in + in_stride * 15 + i); + int16x8_t v89 = vaddq_s16(v87, v88); + int16x8_t v90 = vaddq_s16(v86, v89); + int16x8_t v91 = vld1q_s16(in + in_stride * 9 + i); + int16x8_t v92 = vld1q_s16(in + in_stride * 7 + i); + int16x8_t v93 = vaddq_s16(v91, v92); + int16x8_t v94 = vqrdmulhq_n_s16(v93, 25080); + int16x8_t v95 = vld1q_s16(in + in_stride * 25 + i); + int16x8_t v96 = vld1q_s16(in + in_stride * 23 + i); + int16x8_t v97 = vaddq_s16(v95, v96); + int16x8_t v98 = vaddq_s16(v97, v93); + int16x8_t v99 = vqrdmulhq_n_s16(v98, 17734); + int16x8_t v100 = vaddq_s16(v94, v99); + int16x8_t v101 = vaddq_s16(v90, v100); + int16x8_t v102 = vaddq_s16(v84, v101); + int16x8_t v103 = vaddq_s16(v92, v65); + int16x8_t v104 = vaddq_s16(v66, v85); + int16x8_t v105 = vaddq_s16(v103, v104); + int16x8_t v106_tmp = vqrdmulhq_n_s16(v105, 13573); + int16x8_t v106 = vaddq_s16(v106_tmp, v105); + int16x8_t v107 = vaddq_s16(v96, v70); + int16x8_t v108 = vaddq_s16(v71, v87); + int16x8_t v109 = vaddq_s16(v107, v108); + int16x8_t v110 = vaddq_s16(v63, v91); + int16x8_t v111 = vaddq_s16(v88, v62); + int16x8_t v112 = vaddq_s16(v110, v111); + int16x8_t v113 = vaddq_s16(v109, v112); + int16x8_t v114 = vaddq_s16(v106, v113); + int16x8_t v115 = vqrdmulhq_n_s16(v114, 16705); + int16x8_t v116 = vaddq_s16(v112, v105); + int16x8_t v117 = vqrdmulhq_n_s16(v116, 25080); + int16x8_t v118 = vqrdmulhq_n_s16(v116, 17734); + int16x8_t v119 = vaddq_s16(v74, v95); + int16x8_t v120 = vld1q_s16(in + in_stride * 31 + i); + int16x8_t v121 = vaddq_s16(v120, v73); + int16x8_t v122 = vaddq_s16(v119, v121); + int16x8_t v123 = vaddq_s16(v122, v109); + int16x8_t v124 = vqrdmulhq_n_s16(v123, 17734); + int16x8_t v125 = vaddq_s16(v118, v124); + int16x8_t v126 = vaddq_s16(v117, v125); + int16x8_t v127 = vqrdmulhq_n_s16(v126, 16705); + int16x8_t v128 = vaddq_s16(v115, v127); + int16x8_t v129 = vqrdmulhq_n_s16(v128, 16463); + int16x8_t v130_tmp = vqrdmulhq_n_s16(v104, 13573); + int16x8_t v130 = vaddq_s16(v130_tmp, v104); + int16x8_t v131 = vaddq_s16(v108, v111); + int16x8_t v132 = vaddq_s16(v130, v131); + int16x8_t v133 = vaddq_s16(v119, v107); + int16x8_t v134 = vqrdmulhq_n_s16(v133, 17734); + int16x8_t v135 = vaddq_s16(v110, v103); + int16x8_t v136_tmp = vqrdmulhq_n_s16(v135, 10045); + int16x8_t v136 = vaddq_s16(v136_tmp, v135); + int16x8_t v137 = vaddq_s16(v134, v136); + int16x8_t v138 = vaddq_s16(v132, v137); + int16x8_t v139 = vqrdmulhq_n_s16(v138, 16463); + int16x8_t v140 = vaddq_s16(v129, v139); + int16x8_t v141 = vaddq_s16(v102, v140); + int16x8_t v142 = vqrdmulhq_n_s16(v141, 16404); + int16x8_t v143 = vaddq_s16(v61, v142); + int16x8_t v144 = vsubq_s16(v0, v1); + int16x8_t v145 = vsubq_s16(v4, v6); + int16x8_t v146_tmp = vqrdmulhq_n_s16(v145, 10045); + int16x8_t v146 = vaddq_s16(v146_tmp, v145); + int16x8_t v147 = vaddq_s16(v144, v146); + int16x8_t v148 = vsubq_s16(v11, v14); + int16x8_t v149 = vqrdmulhq_n_s16(v18, 17734); + int16x8_t v150_tmp = vqrdmulhq_n_s16(v17, 10045); + int16x8_t v150 = vaddq_s16(v150_tmp, v17); + int16x8_t v151 = vsubq_s16(v149, v150); + int16x8_t v152 = vaddq_s16(v148, v151); + int16x8_t v153 = vqrdmulhq_n_s16(v152, 19705); + int16x8_t v154 = vaddq_s16(v147, v153); + int16x8_t v155 = vsubq_s16(v27, v30); + int16x8_t v156 = vqrdmulhq_n_s16(v34, 17734); + int16x8_t v157_tmp = vqrdmulhq_n_s16(v38, 10045); + int16x8_t v157 = vaddq_s16(v157_tmp, v38); + int16x8_t v158 = vsubq_s16(v156, v157); + int16x8_t v159 = vaddq_s16(v155, v158); + int16x8_t v160 = vqrdmulhq_n_s16(v54, 13573); + int16x8_t v161 = vsubq_s16(v160, v52); + int16x8_t v162 = vqrdmulhq_n_s16(v161, 25746); + int16x8_t v163 = vsubq_s16(v44, v47); + int16x8_t v164 = vqrdmulhq_n_s16(v163, 19705); + int16x8_t v165 = vaddq_s16(v162, v164); + int16x8_t v166 = vaddq_s16(v159, v165); + int16x8_t v167 = vqrdmulhq_n_s16(v166, 17121); + int16x8_t v168 = vaddq_s16(v154, v167); + int16x8_t v169 = vsubq_s16(v86, v89); + int16x8_t v170 = vqrdmulhq_n_s16(v93, 17734); + int16x8_t v171_tmp = vqrdmulhq_n_s16(v97, 10045); + int16x8_t v171 = vaddq_s16(v171_tmp, v97); + int16x8_t v172 = vsubq_s16(v170, v171); + int16x8_t v173 = vaddq_s16(v169, v172); + int16x8_t v174 = vsubq_s16(v80, v81); + int16x8_t v175 = vqrdmulhq_n_s16(v174, 19705); + int16x8_t v176 = vqrdmulhq_n_s16(v68, 13573); + int16x8_t v177 = vsubq_s16(v176, v76); + int16x8_t v178 = vqrdmulhq_n_s16(v177, 25746); + int16x8_t v179 = vaddq_s16(v175, v178); + int16x8_t v180 = vaddq_s16(v173, v179); + int16x8_t v181 = vsubq_s16(v130, v131); + int16x8_t v182 = vqrdmulhq_n_s16(v135, 13573); + int16x8_t v183 = vsubq_s16(v182, v133); + int16x8_t v184_tmp = vqrdmulhq_n_s16(v183, 10045); + int16x8_t v184 = vaddq_s16(v184_tmp, v183); + int16x8_t v185 = vaddq_s16(v181, v184); + int16x8_t v186 = vqrdmulhq_n_s16(v185, 17121); + int16x8_t v187 = vqrdmulhq_n_s16(v105, 27867); + int16x8_t v188 = vqrdmulhq_n_s16(v113, 19705); + int16x8_t v189 = vsubq_s16(v187, v188); + int16x8_t v190 = vqrdmulhq_n_s16(v116, 13573); + int16x8_t v191 = vsubq_s16(v190, v123); + int16x8_t v192 = vqrdmulhq_n_s16(v191, 25746); + int16x8_t v193 = vaddq_s16(v189, v192); + int16x8_t v194 = vqrdmulhq_n_s16(v193, 17121); + int16x8_t v195 = vaddq_s16(v186, v194); + int16x8_t v196 = vaddq_s16(v180, v195); + int16x8_t v197 = vqrdmulhq_n_s16(v196, 16563); + int16x8_t v198 = vaddq_s16(v168, v197); + int16x8_t v199 = vsubq_s16(v144, v146); + int16x8_t v200 = vsubq_s16(v148, v151); + int16x8_t v201 = vqrdmulhq_n_s16(v200, 29490); + int16x8_t v202 = vaddq_s16(v199, v201); + int16x8_t v203 = vsubq_s16(v155, v158); + int16x8_t v204 = vqrdmulhq_n_s16(v163, 29490); + int16x8_t v205_tmp = vqrdmulhq_n_s16(v161, 5763); + int16x8_t v205 = vaddq_s16(v205_tmp, v161); + int16x8_t v206 = vsubq_s16(v204, v205); + int16x8_t v207 = vaddq_s16(v203, v206); + int16x8_t v208 = vqrdmulhq_n_s16(v207, 18578); + int16x8_t v209 = vaddq_s16(v202, v208); + int16x8_t v210 = vsubq_s16(v169, v172); + int16x8_t v211 = vqrdmulhq_n_s16(v174, 29490); + int16x8_t v212_tmp = vqrdmulhq_n_s16(v177, 5763); + int16x8_t v212 = vaddq_s16(v212_tmp, v177); + int16x8_t v213 = vsubq_s16(v211, v212); + int16x8_t v214 = vaddq_s16(v210, v213); + int16x8_t v215 = vsubq_s16(v181, v184); + int16x8_t v216 = vqrdmulhq_n_s16(v215, 18578); + int16x8_t v217 = vqrdmulhq_n_s16(v189, 27803); + int16x8_t v218 = vqrdmulhq_n_s16(v191, 21845); + int16x8_t v219 = vsubq_s16(v217, v218); + int16x8_t v220 = vaddq_s16(v216, v219); + int16x8_t v221 = vaddq_s16(v214, v220); + int16x8_t v222 = vqrdmulhq_n_s16(v221, 16890); + int16x8_t v223 = vaddq_s16(v209, v222); + int16x8_t v224 = vsubq_s16(v2, v8); + int16x8_t v225 = vsubq_s16(v15, v22); + int16x8_t v226_tmp = vqrdmulhq_n_s16(v225, 18446); + int16x8_t v226 = vmlaq_n_s16(v226_tmp, v225, 2); + int16x8_t v227 = vaddq_s16(v224, v226); + int16x8_t v228 = vsubq_s16(v31, v41); + int16x8_t v229 = vsubq_s16(v48, v56); + int16x8_t v230_tmp = vqrdmulhq_n_s16(v229, 18446); + int16x8_t v230 = vmlaq_n_s16(v230_tmp, v229, 2); + int16x8_t v231 = vaddq_s16(v228, v230); + int16x8_t v232 = vqrdmulhq_n_s16(v231, 21195); + int16x8_t v233 = vaddq_s16(v227, v232); + int16x8_t v234 = vsubq_s16(v82, v78); + int16x8_t v235_tmp = vqrdmulhq_n_s16(v234, 18446); + int16x8_t v235 = vmlaq_n_s16(v235_tmp, v234, 2); + int16x8_t v236 = vsubq_s16(v90, v100); + int16x8_t v237 = vaddq_s16(v235, v236); + int16x8_t v238 = vsubq_s16(v132, v137); + int16x8_t v239 = vsubq_s16(v114, v126); + int16x8_t v240_tmp = vqrdmulhq_n_s16(v239, 18446); + int16x8_t v240 = vmlaq_n_s16(v240_tmp, v239, 2); + int16x8_t v241 = vaddq_s16(v238, v240); + int16x8_t v242 = vqrdmulhq_n_s16(v241, 21195); + int16x8_t v243 = vaddq_s16(v237, v242); + int16x8_t v244 = vqrdmulhq_n_s16(v243, 17401); + int16x8_t v245 = vaddq_s16(v233, v244); + int16x8_t v246 = vsubq_s16(v228, v230); + int16x8_t v247 = vqrdmulhq_n_s16(v246, 25826); + int16x8_t v248 = vsubq_s16(v224, v226); + int16x8_t v249 = vaddq_s16(v247, v248); + int16x8_t v250 = vsubq_s16(v238, v240); + int16x8_t v251 = vqrdmulhq_n_s16(v250, 25826); + int16x8_t v252 = vsubq_s16(v236, v235); + int16x8_t v253 = vaddq_s16(v251, v252); + int16x8_t v254 = vqrdmulhq_n_s16(v253, 18124); + int16x8_t v255 = vaddq_s16(v249, v254); + int16x8_t v256 = vsubq_s16(v199, v201); + int16x8_t v257 = vsubq_s16(v203, v206); + int16x8_t v258_tmp = vqrdmulhq_n_s16(v257, 1988); + int16x8_t v258 = vaddq_s16(v258_tmp, v257); + int16x8_t v259 = vaddq_s16(v256, v258); + int16x8_t v260 = vsubq_s16(v210, v213); + int16x8_t v261_tmp = vqrdmulhq_n_s16(v219, 25030); + int16x8_t v261 = vaddq_s16(v261_tmp, v219); + int16x8_t v262 = vsubq_s16(v215, v261); + int16x8_t v263_tmp = vqrdmulhq_n_s16(v262, 1988); + int16x8_t v263 = vaddq_s16(v263_tmp, v262); + int16x8_t v264 = vaddq_s16(v260, v263); + int16x8_t v265 = vqrdmulhq_n_s16(v264, 19102); + int16x8_t v266 = vaddq_s16(v259, v265); + int16x8_t v267 = vsubq_s16(v147, v153); + int16x8_t v268 = vsubq_s16(v159, v165); + int16x8_t v269_tmp = vqrdmulhq_n_s16(v268, 23673); + int16x8_t v269 = vaddq_s16(v269_tmp, v268); + int16x8_t v270 = vaddq_s16(v267, v269); + int16x8_t v271 = vsubq_s16(v173, v179); + int16x8_t v272 = vsubq_s16(v185, v193); + int16x8_t v273_tmp = vqrdmulhq_n_s16(v272, 23673); + int16x8_t v273 = vaddq_s16(v273_tmp, v272); + int16x8_t v274 = vaddq_s16(v271, v273); + int16x8_t v275 = vqrdmulhq_n_s16(v274, 20398); + int16x8_t v276 = vaddq_s16(v270, v275); + int16x8_t v277 = vsubq_s16(v9, v24); + int16x8_t v278 = vsubq_s16(v42, v58); + int16x8_t v279_tmp = vqrdmulhq_n_s16(v278, 3314); + int16x8_t v279 = vmlaq_n_s16(v279_tmp, v278, 5); + int16x8_t v280 = vaddq_s16(v277, v279); + int16x8_t v281 = vsubq_s16(v138, v128); + int16x8_t v282_tmp = vqrdmulhq_n_s16(v281, 3314); + int16x8_t v282 = vmlaq_n_s16(v282_tmp, v281, 5); + int16x8_t v283 = vsubq_s16(v101, v84); + int16x8_t v284 = vaddq_s16(v282, v283); + int16x8_t v285 = vqrdmulhq_n_s16(v284, 22112); + int16x8_t v286 = vaddq_s16(v280, v285); + int16x8_t v287 = vsubq_s16(v277, v279); + int16x8_t v288 = vsubq_s16(v283, v282); + int16x8_t v289 = vqrdmulhq_n_s16(v288, 24397); + int16x8_t v290 = vaddq_s16(v287, v289); + int16x8_t v291 = vsubq_s16(v267, v269); + int16x8_t v292 = vsubq_s16(v271, v273); + int16x8_t v293 = vqrdmulhq_n_s16(v292, 27504); + int16x8_t v294 = vaddq_s16(v291, v293); + int16x8_t v295 = vsubq_s16(v260, v263); + int16x8_t v296 = vqrdmulhq_n_s16(v295, 31869); + int16x8_t v297 = vsubq_s16(v256, v258); + int16x8_t v298 = vaddq_s16(v296, v297); + int16x8_t v299 = vsubq_s16(v248, v247); + int16x8_t v300 = vsubq_s16(v252, v251); + int16x8_t v301_tmp = vqrdmulhq_n_s16(v300, 5552); + int16x8_t v301 = vaddq_s16(v301_tmp, v300); + int16x8_t v302 = vaddq_s16(v299, v301); + int16x8_t v303 = vsubq_s16(v227, v232); + int16x8_t v304 = vsubq_s16(v237, v242); + int16x8_t v305_tmp = vqrdmulhq_n_s16(v304, 15865); + int16x8_t v305 = vaddq_s16(v305_tmp, v304); + int16x8_t v306 = vaddq_s16(v303, v305); + int16x8_t v307 = vsubq_s16(v202, v208); + int16x8_t v308 = vsubq_s16(v214, v220); + int16x8_t v309_tmp = vqrdmulhq_n_s16(v308, 1893); + int16x8_t v309 = vmlaq_n_s16(v309_tmp, v308, 2); + int16x8_t v310 = vaddq_s16(v307, v309); + int16x8_t v311 = vsubq_s16(v154, v167); + int16x8_t v312 = vsubq_s16(v180, v195); + int16x8_t v313_tmp = vqrdmulhq_n_s16(v312, 13357); + int16x8_t v313 = vmlaq_n_s16(v313_tmp, v312, 3); + int16x8_t v314 = vaddq_s16(v311, v313); + int16x8_t v315 = vsubq_s16(v102, v140); + int16x8_t v316_tmp = vqrdmulhq_n_s16(v315, 6226); + int16x8_t v316 = vmlaq_n_s16(v316_tmp, v315, 10); + int16x8_t v317 = vsubq_s16(v25, v60); + int16x8_t v318 = vaddq_s16(v316, v317); + int16x8_t v319 = vsubq_s16(v317, v316); + int16x8_t v320 = vsubq_s16(v311, v313); + int16x8_t v321 = vsubq_s16(v307, v309); + int16x8_t v322 = vsubq_s16(v303, v305); + int16x8_t v323 = vsubq_s16(v299, v301); + int16x8_t v324 = vsubq_s16(v297, v296); + int16x8_t v325 = vsubq_s16(v291, v293); + int16x8_t v326 = vsubq_s16(v287, v289); + int16x8_t v327 = vsubq_s16(v280, v285); + int16x8_t v328 = vsubq_s16(v270, v275); + int16x8_t v329 = vsubq_s16(v259, v265); + int16x8_t v330 = vsubq_s16(v249, v254); + int16x8_t v331 = vsubq_s16(v233, v244); + int16x8_t v332 = vsubq_s16(v209, v222); + int16x8_t v333 = vsubq_s16(v168, v197); + int16x8_t v334 = vsubq_s16(v61, v142); + vst1q_s16(out + out_stride * 0 + i, v143); + vst1q_s16(out + out_stride * 1 + i, v198); + vst1q_s16(out + out_stride * 2 + i, v223); + vst1q_s16(out + out_stride * 3 + i, v245); + vst1q_s16(out + out_stride * 4 + i, v255); + vst1q_s16(out + out_stride * 5 + i, v266); + vst1q_s16(out + out_stride * 6 + i, v276); + vst1q_s16(out + out_stride * 7 + i, v286); + vst1q_s16(out + out_stride * 8 + i, v290); + vst1q_s16(out + out_stride * 9 + i, v294); + vst1q_s16(out + out_stride * 10 + i, v298); + vst1q_s16(out + out_stride * 11 + i, v302); + vst1q_s16(out + out_stride * 12 + i, v306); + vst1q_s16(out + out_stride * 13 + i, v310); + vst1q_s16(out + out_stride * 14 + i, v314); + vst1q_s16(out + out_stride * 15 + i, v318); + vst1q_s16(out + out_stride * 16 + i, v319); + vst1q_s16(out + out_stride * 17 + i, v320); + vst1q_s16(out + out_stride * 18 + i, v321); + vst1q_s16(out + out_stride * 19 + i, v322); + vst1q_s16(out + out_stride * 20 + i, v323); + vst1q_s16(out + out_stride * 21 + i, v324); + vst1q_s16(out + out_stride * 22 + i, v325); + vst1q_s16(out + out_stride * 23 + i, v326); + vst1q_s16(out + out_stride * 24 + i, v327); + vst1q_s16(out + out_stride * 25 + i, v328); + vst1q_s16(out + out_stride * 26 + i, v329); + vst1q_s16(out + out_stride * 27 + i, v330); + vst1q_s16(out + out_stride * 28 + i, v331); + vst1q_s16(out + out_stride * 29 + i, v332); + vst1q_s16(out + out_stride * 30 + i, v333); + vst1q_s16(out + out_stride * 31 + i, v334); + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct64-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct64-inl.h new file mode 100644 index 0000000000..400da1a9de --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct64-inl.h @@ -0,0 +1,985 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* This file is automatically generated. Do not modify it directly. */ +#if HWY_TARGET != HWY_NEON +#error "only include this file from fast_dct-inl.h" +#endif + +constexpr size_t FastIDCTIntegerBits(FastDCTTag<64>) { return 1; } + +void FastIDCT(FastDCTTag<64>, const int16_t* in, size_t in_stride, int16_t* out, + size_t out_stride, size_t count) { + JXL_ASSERT(count % 8 == 0); + for (size_t i = 0; i < count; i += 8) { + int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i); + int16x8_t v1 = vld1q_s16(in + in_stride * 32 + i); + int16x8_t v2 = vaddq_s16(v0, v1); + int16x8_t v3 = vld1q_s16(in + in_stride * 16 + i); + int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573); + int16x8_t v4 = vaddq_s16(v4_tmp, v3); + int16x8_t v5 = vld1q_s16(in + in_stride * 48 + i); + int16x8_t v6 = vaddq_s16(v5, v3); + int16x8_t v7 = vaddq_s16(v4, v6); + int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734); + int16x8_t v9 = vaddq_s16(v2, v8); + int16x8_t v10 = vld1q_s16(in + in_stride * 8 + i); + int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573); + int16x8_t v11 = vaddq_s16(v11_tmp, v10); + int16x8_t v12 = vld1q_s16(in + in_stride * 40 + i); + int16x8_t v13 = vld1q_s16(in + in_stride * 24 + i); + int16x8_t v14 = vaddq_s16(v12, v13); + int16x8_t v15 = vaddq_s16(v11, v14); + int16x8_t v16 = vld1q_s16(in + in_stride * 56 + i); + int16x8_t v17 = vaddq_s16(v16, v12); + int16x8_t v18 = vaddq_s16(v13, v10); + int16x8_t v19 = vaddq_s16(v17, v18); + int16x8_t v20 = vqrdmulhq_n_s16(v19, 17734); + int16x8_t v21 = vqrdmulhq_n_s16(v18, 25080); + int16x8_t v22 = vaddq_s16(v20, v21); + int16x8_t v23 = vaddq_s16(v15, v22); + int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705); + int16x8_t v25 = vaddq_s16(v9, v24); + int16x8_t v26 = vld1q_s16(in + in_stride * 4 + i); + int16x8_t v27_tmp = vqrdmulhq_n_s16(v26, 13573); + int16x8_t v27 = vaddq_s16(v27_tmp, v26); + int16x8_t v28 = vld1q_s16(in + in_stride * 36 + i); + int16x8_t v29 = vld1q_s16(in + in_stride * 28 + i); + int16x8_t v30 = vaddq_s16(v28, v29); + int16x8_t v31 = vaddq_s16(v27, v30); + int16x8_t v32 = vld1q_s16(in + in_stride * 20 + i); + int16x8_t v33 = vld1q_s16(in + in_stride * 12 + i); + int16x8_t v34 = vaddq_s16(v32, v33); + int16x8_t v35 = vqrdmulhq_n_s16(v34, 25080); + int16x8_t v36 = vld1q_s16(in + in_stride * 52 + i); + int16x8_t v37 = vld1q_s16(in + in_stride * 44 + i); + int16x8_t v38 = vaddq_s16(v36, v37); + int16x8_t v39 = vaddq_s16(v38, v34); + int16x8_t v40 = vqrdmulhq_n_s16(v39, 17734); + int16x8_t v41 = vaddq_s16(v35, v40); + int16x8_t v42 = vaddq_s16(v31, v41); + int16x8_t v43 = vaddq_s16(v33, v26); + int16x8_t v44_tmp = vqrdmulhq_n_s16(v43, 13573); + int16x8_t v44 = vaddq_s16(v44_tmp, v43); + int16x8_t v45 = vaddq_s16(v37, v28); + int16x8_t v46 = vaddq_s16(v29, v32); + int16x8_t v47 = vaddq_s16(v45, v46); + int16x8_t v48 = vaddq_s16(v44, v47); + int16x8_t v49 = vqrdmulhq_n_s16(v48, 16705); + int16x8_t v50 = vaddq_s16(v46, v43); + int16x8_t v51_tmp = vqrdmulhq_n_s16(v50, 10045); + int16x8_t v51 = vaddq_s16(v51_tmp, v50); + int16x8_t v52 = vld1q_s16(in + in_stride * 60 + i); + int16x8_t v53 = vaddq_s16(v52, v36); + int16x8_t v54 = vaddq_s16(v53, v45); + int16x8_t v55 = vqrdmulhq_n_s16(v54, 17734); + int16x8_t v56 = vaddq_s16(v51, v55); + int16x8_t v57 = vqrdmulhq_n_s16(v56, 16705); + int16x8_t v58 = vaddq_s16(v49, v57); + int16x8_t v59 = vaddq_s16(v42, v58); + int16x8_t v60 = vqrdmulhq_n_s16(v59, 16463); + int16x8_t v61 = vaddq_s16(v25, v60); + int16x8_t v62 = vld1q_s16(in + in_stride * 2 + i); + int16x8_t v63_tmp = vqrdmulhq_n_s16(v62, 13573); + int16x8_t v63 = vaddq_s16(v63_tmp, v62); + int16x8_t v64 = vld1q_s16(in + in_stride * 34 + i); + int16x8_t v65 = vld1q_s16(in + in_stride * 30 + i); + int16x8_t v66 = vaddq_s16(v64, v65); + int16x8_t v67 = vaddq_s16(v63, v66); + int16x8_t v68 = vld1q_s16(in + in_stride * 18 + i); + int16x8_t v69 = vld1q_s16(in + in_stride * 14 + i); + int16x8_t v70 = vaddq_s16(v68, v69); + int16x8_t v71 = vqrdmulhq_n_s16(v70, 25080); + int16x8_t v72 = vld1q_s16(in + in_stride * 50 + i); + int16x8_t v73 = vld1q_s16(in + in_stride * 46 + i); + int16x8_t v74 = vaddq_s16(v72, v73); + int16x8_t v75 = vaddq_s16(v74, v70); + int16x8_t v76 = vqrdmulhq_n_s16(v75, 17734); + int16x8_t v77 = vaddq_s16(v71, v76); + int16x8_t v78 = vaddq_s16(v67, v77); + int16x8_t v79 = vld1q_s16(in + in_stride * 10 + i); + int16x8_t v80 = vld1q_s16(in + in_stride * 6 + i); + int16x8_t v81 = vaddq_s16(v79, v80); + int16x8_t v82_tmp = vqrdmulhq_n_s16(v81, 13573); + int16x8_t v82 = vaddq_s16(v82_tmp, v81); + int16x8_t v83 = vld1q_s16(in + in_stride * 42 + i); + int16x8_t v84 = vld1q_s16(in + in_stride * 38 + i); + int16x8_t v85 = vaddq_s16(v83, v84); + int16x8_t v86 = vld1q_s16(in + in_stride * 26 + i); + int16x8_t v87 = vld1q_s16(in + in_stride * 22 + i); + int16x8_t v88 = vaddq_s16(v86, v87); + int16x8_t v89 = vaddq_s16(v85, v88); + int16x8_t v90 = vaddq_s16(v82, v89); + int16x8_t v91 = vqrdmulhq_n_s16(v90, 16705); + int16x8_t v92 = vaddq_s16(v88, v81); + int16x8_t v93_tmp = vqrdmulhq_n_s16(v92, 10045); + int16x8_t v93 = vaddq_s16(v93_tmp, v92); + int16x8_t v94 = vld1q_s16(in + in_stride * 58 + i); + int16x8_t v95 = vld1q_s16(in + in_stride * 54 + i); + int16x8_t v96 = vaddq_s16(v94, v95); + int16x8_t v97 = vaddq_s16(v96, v85); + int16x8_t v98 = vqrdmulhq_n_s16(v97, 17734); + int16x8_t v99 = vaddq_s16(v93, v98); + int16x8_t v100 = vqrdmulhq_n_s16(v99, 16705); + int16x8_t v101 = vaddq_s16(v91, v100); + int16x8_t v102 = vaddq_s16(v78, v101); + int16x8_t v103 = vaddq_s16(v69, v79); + int16x8_t v104 = vaddq_s16(v80, v62); + int16x8_t v105 = vaddq_s16(v103, v104); + int16x8_t v106_tmp = vqrdmulhq_n_s16(v105, 13573); + int16x8_t v106 = vaddq_s16(v106_tmp, v105); + int16x8_t v107 = vaddq_s16(v73, v83); + int16x8_t v108 = vaddq_s16(v84, v64); + int16x8_t v109 = vaddq_s16(v107, v108); + int16x8_t v110 = vaddq_s16(v65, v86); + int16x8_t v111 = vaddq_s16(v87, v68); + int16x8_t v112 = vaddq_s16(v110, v111); + int16x8_t v113 = vaddq_s16(v109, v112); + int16x8_t v114 = vaddq_s16(v106, v113); + int16x8_t v115 = vqrdmulhq_n_s16(v114, 16705); + int16x8_t v116 = vaddq_s16(v112, v105); + int16x8_t v117 = vqrdmulhq_n_s16(v116, 25080); + int16x8_t v118 = vqrdmulhq_n_s16(v116, 17734); + int16x8_t v119 = vld1q_s16(in + in_stride * 62 + i); + int16x8_t v120 = vaddq_s16(v119, v94); + int16x8_t v121 = vaddq_s16(v95, v72); + int16x8_t v122 = vaddq_s16(v120, v121); + int16x8_t v123 = vaddq_s16(v122, v109); + int16x8_t v124 = vqrdmulhq_n_s16(v123, 17734); + int16x8_t v125 = vaddq_s16(v118, v124); + int16x8_t v126 = vaddq_s16(v117, v125); + int16x8_t v127 = vqrdmulhq_n_s16(v126, 16705); + int16x8_t v128 = vaddq_s16(v115, v127); + int16x8_t v129 = vqrdmulhq_n_s16(v128, 16463); + int16x8_t v130_tmp = vqrdmulhq_n_s16(v104, 13573); + int16x8_t v130 = vaddq_s16(v130_tmp, v104); + int16x8_t v131 = vaddq_s16(v108, v110); + int16x8_t v132 = vaddq_s16(v130, v131); + int16x8_t v133 = vaddq_s16(v111, v103); + int16x8_t v134_tmp = vqrdmulhq_n_s16(v133, 10045); + int16x8_t v134 = vaddq_s16(v134_tmp, v133); + int16x8_t v135 = vaddq_s16(v121, v107); + int16x8_t v136 = vqrdmulhq_n_s16(v135, 17734); + int16x8_t v137 = vaddq_s16(v134, v136); + int16x8_t v138 = vaddq_s16(v132, v137); + int16x8_t v139 = vqrdmulhq_n_s16(v138, 16463); + int16x8_t v140 = vaddq_s16(v129, v139); + int16x8_t v141 = vaddq_s16(v102, v140); + int16x8_t v142 = vqrdmulhq_n_s16(v141, 16404); + int16x8_t v143 = vaddq_s16(v61, v142); + int16x8_t v144 = vld1q_s16(in + in_stride * 1 + i); + int16x8_t v145_tmp = vqrdmulhq_n_s16(v144, 13573); + int16x8_t v145 = vaddq_s16(v145_tmp, v144); + int16x8_t v146 = vld1q_s16(in + in_stride * 33 + i); + int16x8_t v147 = vld1q_s16(in + in_stride * 31 + i); + int16x8_t v148 = vaddq_s16(v146, v147); + int16x8_t v149 = vaddq_s16(v145, v148); + int16x8_t v150 = vld1q_s16(in + in_stride * 17 + i); + int16x8_t v151 = vld1q_s16(in + in_stride * 15 + i); + int16x8_t v152 = vaddq_s16(v150, v151); + int16x8_t v153 = vqrdmulhq_n_s16(v152, 25080); + int16x8_t v154 = vld1q_s16(in + in_stride * 49 + i); + int16x8_t v155 = vld1q_s16(in + in_stride * 47 + i); + int16x8_t v156 = vaddq_s16(v154, v155); + int16x8_t v157 = vaddq_s16(v156, v152); + int16x8_t v158 = vqrdmulhq_n_s16(v157, 17734); + int16x8_t v159 = vaddq_s16(v153, v158); + int16x8_t v160 = vaddq_s16(v149, v159); + int16x8_t v161 = vld1q_s16(in + in_stride * 9 + i); + int16x8_t v162 = vld1q_s16(in + in_stride * 7 + i); + int16x8_t v163 = vaddq_s16(v161, v162); + int16x8_t v164_tmp = vqrdmulhq_n_s16(v163, 13573); + int16x8_t v164 = vaddq_s16(v164_tmp, v163); + int16x8_t v165 = vld1q_s16(in + in_stride * 41 + i); + int16x8_t v166 = vld1q_s16(in + in_stride * 39 + i); + int16x8_t v167 = vaddq_s16(v165, v166); + int16x8_t v168 = vld1q_s16(in + in_stride * 25 + i); + int16x8_t v169 = vld1q_s16(in + in_stride * 23 + i); + int16x8_t v170 = vaddq_s16(v168, v169); + int16x8_t v171 = vaddq_s16(v167, v170); + int16x8_t v172 = vaddq_s16(v164, v171); + int16x8_t v173 = vqrdmulhq_n_s16(v172, 16705); + int16x8_t v174 = vaddq_s16(v170, v163); + int16x8_t v175_tmp = vqrdmulhq_n_s16(v174, 10045); + int16x8_t v175 = vaddq_s16(v175_tmp, v174); + int16x8_t v176 = vld1q_s16(in + in_stride * 57 + i); + int16x8_t v177 = vld1q_s16(in + in_stride * 55 + i); + int16x8_t v178 = vaddq_s16(v176, v177); + int16x8_t v179 = vaddq_s16(v178, v167); + int16x8_t v180 = vqrdmulhq_n_s16(v179, 17734); + int16x8_t v181 = vaddq_s16(v175, v180); + int16x8_t v182 = vqrdmulhq_n_s16(v181, 16705); + int16x8_t v183 = vaddq_s16(v173, v182); + int16x8_t v184 = vaddq_s16(v160, v183); + int16x8_t v185 = vld1q_s16(in + in_stride * 37 + i); + int16x8_t v186 = vld1q_s16(in + in_stride * 35 + i); + int16x8_t v187 = vaddq_s16(v185, v186); + int16x8_t v188 = vld1q_s16(in + in_stride * 45 + i); + int16x8_t v189 = vld1q_s16(in + in_stride * 43 + i); + int16x8_t v190 = vaddq_s16(v188, v189); + int16x8_t v191 = vaddq_s16(v187, v190); + int16x8_t v192 = vld1q_s16(in + in_stride * 29 + i); + int16x8_t v193 = vld1q_s16(in + in_stride * 27 + i); + int16x8_t v194 = vaddq_s16(v192, v193); + int16x8_t v195 = vld1q_s16(in + in_stride * 21 + i); + int16x8_t v196 = vld1q_s16(in + in_stride * 19 + i); + int16x8_t v197 = vaddq_s16(v195, v196); + int16x8_t v198 = vaddq_s16(v194, v197); + int16x8_t v199 = vaddq_s16(v191, v198); + int16x8_t v200 = vld1q_s16(in + in_stride * 5 + i); + int16x8_t v201 = vld1q_s16(in + in_stride * 3 + i); + int16x8_t v202 = vaddq_s16(v200, v201); + int16x8_t v203 = vld1q_s16(in + in_stride * 13 + i); + int16x8_t v204 = vld1q_s16(in + in_stride * 11 + i); + int16x8_t v205 = vaddq_s16(v203, v204); + int16x8_t v206 = vaddq_s16(v202, v205); + int16x8_t v207_tmp = vqrdmulhq_n_s16(v206, 13573); + int16x8_t v207 = vaddq_s16(v207_tmp, v206); + int16x8_t v208 = vaddq_s16(v199, v207); + int16x8_t v209 = vqrdmulhq_n_s16(v208, 16705); + int16x8_t v210 = vaddq_s16(v198, v206); + int16x8_t v211 = vqrdmulhq_n_s16(v210, 25080); + int16x8_t v212 = vqrdmulhq_n_s16(v210, 17734); + int16x8_t v213 = vld1q_s16(in + in_stride * 53 + i); + int16x8_t v214 = vld1q_s16(in + in_stride * 51 + i); + int16x8_t v215 = vaddq_s16(v213, v214); + int16x8_t v216 = vld1q_s16(in + in_stride * 61 + i); + int16x8_t v217 = vld1q_s16(in + in_stride * 59 + i); + int16x8_t v218 = vaddq_s16(v216, v217); + int16x8_t v219 = vaddq_s16(v215, v218); + int16x8_t v220 = vaddq_s16(v219, v191); + int16x8_t v221 = vqrdmulhq_n_s16(v220, 17734); + int16x8_t v222 = vaddq_s16(v212, v221); + int16x8_t v223 = vaddq_s16(v211, v222); + int16x8_t v224 = vqrdmulhq_n_s16(v223, 16705); + int16x8_t v225 = vaddq_s16(v209, v224); + int16x8_t v226 = vqrdmulhq_n_s16(v225, 16463); + int16x8_t v227_tmp = vqrdmulhq_n_s16(v202, 13573); + int16x8_t v227 = vaddq_s16(v227_tmp, v202); + int16x8_t v228 = vaddq_s16(v187, v194); + int16x8_t v229 = vaddq_s16(v227, v228); + int16x8_t v230 = vaddq_s16(v215, v190); + int16x8_t v231 = vqrdmulhq_n_s16(v230, 17734); + int16x8_t v232 = vaddq_s16(v197, v205); + int16x8_t v233_tmp = vqrdmulhq_n_s16(v232, 10045); + int16x8_t v233 = vaddq_s16(v233_tmp, v232); + int16x8_t v234 = vaddq_s16(v231, v233); + int16x8_t v235 = vaddq_s16(v229, v234); + int16x8_t v236 = vqrdmulhq_n_s16(v235, 16463); + int16x8_t v237 = vaddq_s16(v226, v236); + int16x8_t v238 = vaddq_s16(v184, v237); + int16x8_t v239 = vaddq_s16(v201, v144); + int16x8_t v240_tmp = vqrdmulhq_n_s16(v239, 13573); + int16x8_t v240 = vaddq_s16(v240_tmp, v239); + int16x8_t v241 = vaddq_s16(v186, v146); + int16x8_t v242 = vaddq_s16(v147, v192); + int16x8_t v243 = vaddq_s16(v241, v242); + int16x8_t v244 = vaddq_s16(v240, v243); + int16x8_t v245 = vaddq_s16(v196, v150); + int16x8_t v246 = vaddq_s16(v151, v203); + int16x8_t v247 = vaddq_s16(v245, v246); + int16x8_t v248_tmp = vqrdmulhq_n_s16(v247, 10045); + int16x8_t v248 = vaddq_s16(v248_tmp, v247); + int16x8_t v249 = vaddq_s16(v155, v188); + int16x8_t v250 = vaddq_s16(v214, v154); + int16x8_t v251 = vaddq_s16(v249, v250); + int16x8_t v252 = vqrdmulhq_n_s16(v251, 17734); + int16x8_t v253 = vaddq_s16(v248, v252); + int16x8_t v254 = vaddq_s16(v244, v253); + int16x8_t v255 = vaddq_s16(v204, v161); + int16x8_t v256 = vaddq_s16(v162, v200); + int16x8_t v257 = vaddq_s16(v255, v256); + int16x8_t v258_tmp = vqrdmulhq_n_s16(v257, 13573); + int16x8_t v258 = vaddq_s16(v258_tmp, v257); + int16x8_t v259 = vaddq_s16(v189, v165); + int16x8_t v260 = vaddq_s16(v166, v185); + int16x8_t v261 = vaddq_s16(v259, v260); + int16x8_t v262 = vaddq_s16(v169, v195); + int16x8_t v263 = vaddq_s16(v193, v168); + int16x8_t v264 = vaddq_s16(v262, v263); + int16x8_t v265 = vaddq_s16(v261, v264); + int16x8_t v266 = vaddq_s16(v258, v265); + int16x8_t v267 = vqrdmulhq_n_s16(v266, 16705); + int16x8_t v268 = vaddq_s16(v264, v257); + int16x8_t v269 = vqrdmulhq_n_s16(v268, 25080); + int16x8_t v270 = vaddq_s16(v217, v176); + int16x8_t v271 = vaddq_s16(v177, v213); + int16x8_t v272 = vaddq_s16(v270, v271); + int16x8_t v273 = vaddq_s16(v272, v261); + int16x8_t v274 = vqrdmulhq_n_s16(v273, 17734); + int16x8_t v275 = vqrdmulhq_n_s16(v268, 17734); + int16x8_t v276 = vaddq_s16(v274, v275); + int16x8_t v277 = vaddq_s16(v269, v276); + int16x8_t v278 = vqrdmulhq_n_s16(v277, 16705); + int16x8_t v279 = vaddq_s16(v267, v278); + int16x8_t v280 = vaddq_s16(v254, v279); + int16x8_t v281 = vqrdmulhq_n_s16(v280, 16404); + int16x8_t v282 = vaddq_s16(v256, v239); + int16x8_t v283_tmp = vqrdmulhq_n_s16(v282, 13573); + int16x8_t v283 = vaddq_s16(v283_tmp, v282); + int16x8_t v284 = vaddq_s16(v260, v241); + int16x8_t v285 = vaddq_s16(v242, v263); + int16x8_t v286 = vaddq_s16(v284, v285); + int16x8_t v287 = vaddq_s16(v283, v286); + int16x8_t v288 = vaddq_s16(v262, v245); + int16x8_t v289 = vaddq_s16(v246, v255); + int16x8_t v290 = vaddq_s16(v288, v289); + int16x8_t v291 = vqrdmulhq_n_s16(v290, 25080); + int16x8_t v292 = vqrdmulhq_n_s16(v290, 17734); + int16x8_t v293 = vaddq_s16(v271, v250); + int16x8_t v294 = vaddq_s16(v249, v259); + int16x8_t v295 = vaddq_s16(v293, v294); + int16x8_t v296 = vqrdmulhq_n_s16(v295, 17734); + int16x8_t v297 = vaddq_s16(v292, v296); + int16x8_t v298 = vaddq_s16(v291, v297); + int16x8_t v299 = vaddq_s16(v287, v298); + int16x8_t v300 = vqrdmulhq_n_s16(v299, 16463); + int16x8_t v301 = vaddq_s16(v289, v282); + int16x8_t v302 = vqrdmulhq_n_s16(v301, 23624); + int16x8_t v303 = vaddq_s16(v294, v284); + int16x8_t v304 = vqrdmulhq_n_s16(v303, 19705); + int16x8_t v305 = vaddq_s16(v285, v288); + int16x8_t v306 = vqrdmulhq_n_s16(v305, 19705); + int16x8_t v307 = vaddq_s16(v304, v306); + int16x8_t v308 = vqrdmulhq_n_s16(v307, 27779); + int16x8_t v309 = vaddq_s16(v302, v308); + int16x8_t v310 = vaddq_s16(v305, v301); + int16x8_t v311 = vqrdmulhq_n_s16(v310, 25080); + int16x8_t v312 = vqrdmulhq_n_s16(v310, 17734); + int16x8_t v313 = vld1q_s16(in + in_stride * 63 + i); + int16x8_t v314 = vaddq_s16(v313, v216); + int16x8_t v315 = vaddq_s16(v314, v270); + int16x8_t v316 = vaddq_s16(v315, v293); + int16x8_t v317 = vqrdmulhq_n_s16(v316, 25746); + int16x8_t v318 = vqrdmulhq_n_s16(v303, 25746); + int16x8_t v319 = vaddq_s16(v317, v318); + int16x8_t v320 = vqrdmulhq_n_s16(v319, 22571); + int16x8_t v321 = vaddq_s16(v312, v320); + int16x8_t v322 = vaddq_s16(v311, v321); + int16x8_t v323 = vqrdmulhq_n_s16(v322, 16705); + int16x8_t v324 = vaddq_s16(v309, v323); + int16x8_t v325 = vqrdmulhq_n_s16(v324, 16463); + int16x8_t v326 = vaddq_s16(v300, v325); + int16x8_t v327 = vqrdmulhq_n_s16(v326, 16404); + int16x8_t v328 = vaddq_s16(v281, v327); + int16x8_t v329 = vaddq_s16(v238, v328); + int16x8_t v330 = vqrdmulhq_n_s16(v329, 16389); + int16x8_t v331 = vaddq_s16(v143, v330); + int16x8_t v332 = vsubq_s16(v82, v89); + int16x8_t v333 = vqrdmulhq_n_s16(v332, 19705); + int16x8_t v334 = vqrdmulhq_n_s16(v92, 13573); + int16x8_t v335 = vsubq_s16(v334, v97); + int16x8_t v336 = vqrdmulhq_n_s16(v335, 25746); + int16x8_t v337 = vaddq_s16(v333, v336); + int16x8_t v338 = vsubq_s16(v63, v66); + int16x8_t v339 = vqrdmulhq_n_s16(v70, 17734); + int16x8_t v340_tmp = vqrdmulhq_n_s16(v74, 10045); + int16x8_t v340 = vaddq_s16(v340_tmp, v74); + int16x8_t v341 = vsubq_s16(v339, v340); + int16x8_t v342 = vaddq_s16(v338, v341); + int16x8_t v343 = vaddq_s16(v337, v342); + int16x8_t v344 = vsubq_s16(v130, v131); + int16x8_t v345 = vqrdmulhq_n_s16(v133, 13573); + int16x8_t v346 = vsubq_s16(v345, v135); + int16x8_t v347_tmp = vqrdmulhq_n_s16(v346, 10045); + int16x8_t v347 = vaddq_s16(v347_tmp, v346); + int16x8_t v348 = vaddq_s16(v344, v347); + int16x8_t v349 = vqrdmulhq_n_s16(v348, 17121); + int16x8_t v350 = vqrdmulhq_n_s16(v105, 27867); + int16x8_t v351 = vqrdmulhq_n_s16(v113, 19705); + int16x8_t v352 = vsubq_s16(v350, v351); + int16x8_t v353 = vqrdmulhq_n_s16(v116, 13573); + int16x8_t v354 = vsubq_s16(v353, v123); + int16x8_t v355 = vqrdmulhq_n_s16(v354, 25746); + int16x8_t v356 = vaddq_s16(v352, v355); + int16x8_t v357 = vqrdmulhq_n_s16(v356, 17121); + int16x8_t v358 = vaddq_s16(v349, v357); + int16x8_t v359 = vaddq_s16(v343, v358); + int16x8_t v360 = vqrdmulhq_n_s16(v359, 16563); + int16x8_t v361 = vsubq_s16(v27, v30); + int16x8_t v362 = vqrdmulhq_n_s16(v34, 17734); + int16x8_t v363_tmp = vqrdmulhq_n_s16(v38, 10045); + int16x8_t v363 = vaddq_s16(v363_tmp, v38); + int16x8_t v364 = vsubq_s16(v362, v363); + int16x8_t v365 = vaddq_s16(v361, v364); + int16x8_t v366 = vsubq_s16(v44, v47); + int16x8_t v367 = vqrdmulhq_n_s16(v366, 19705); + int16x8_t v368 = vqrdmulhq_n_s16(v50, 13573); + int16x8_t v369 = vsubq_s16(v368, v54); + int16x8_t v370 = vqrdmulhq_n_s16(v369, 25746); + int16x8_t v371 = vaddq_s16(v367, v370); + int16x8_t v372 = vaddq_s16(v365, v371); + int16x8_t v373 = vqrdmulhq_n_s16(v372, 17121); + int16x8_t v374 = vsubq_s16(v0, v1); + int16x8_t v375 = vsubq_s16(v4, v6); + int16x8_t v376_tmp = vqrdmulhq_n_s16(v375, 10045); + int16x8_t v376 = vaddq_s16(v376_tmp, v375); + int16x8_t v377 = vaddq_s16(v374, v376); + int16x8_t v378 = vsubq_s16(v11, v14); + int16x8_t v379 = vqrdmulhq_n_s16(v18, 17734); + int16x8_t v380_tmp = vqrdmulhq_n_s16(v17, 10045); + int16x8_t v380 = vaddq_s16(v380_tmp, v17); + int16x8_t v381 = vsubq_s16(v379, v380); + int16x8_t v382 = vaddq_s16(v378, v381); + int16x8_t v383 = vqrdmulhq_n_s16(v382, 19705); + int16x8_t v384 = vaddq_s16(v377, v383); + int16x8_t v385 = vaddq_s16(v373, v384); + int16x8_t v386 = vaddq_s16(v360, v385); + int16x8_t v387 = vsubq_s16(v145, v148); + int16x8_t v388 = vqrdmulhq_n_s16(v152, 17734); + int16x8_t v389_tmp = vqrdmulhq_n_s16(v156, 10045); + int16x8_t v389 = vaddq_s16(v389_tmp, v156); + int16x8_t v390 = vsubq_s16(v388, v389); + int16x8_t v391 = vaddq_s16(v387, v390); + int16x8_t v392 = vsubq_s16(v164, v171); + int16x8_t v393 = vqrdmulhq_n_s16(v392, 19705); + int16x8_t v394 = vqrdmulhq_n_s16(v174, 13573); + int16x8_t v395 = vsubq_s16(v394, v179); + int16x8_t v396 = vqrdmulhq_n_s16(v395, 25746); + int16x8_t v397 = vaddq_s16(v393, v396); + int16x8_t v398 = vaddq_s16(v391, v397); + int16x8_t v399 = vsubq_s16(v227, v228); + int16x8_t v400 = vqrdmulhq_n_s16(v232, 13573); + int16x8_t v401 = vsubq_s16(v400, v230); + int16x8_t v402_tmp = vqrdmulhq_n_s16(v401, 10045); + int16x8_t v402 = vaddq_s16(v402_tmp, v401); + int16x8_t v403 = vaddq_s16(v399, v402); + int16x8_t v404 = vqrdmulhq_n_s16(v403, 17121); + int16x8_t v405 = vqrdmulhq_n_s16(v206, 27867); + int16x8_t v406 = vqrdmulhq_n_s16(v199, 19705); + int16x8_t v407 = vsubq_s16(v405, v406); + int16x8_t v408 = vqrdmulhq_n_s16(v210, 13573); + int16x8_t v409 = vsubq_s16(v408, v220); + int16x8_t v410 = vqrdmulhq_n_s16(v409, 25746); + int16x8_t v411 = vaddq_s16(v407, v410); + int16x8_t v412 = vqrdmulhq_n_s16(v411, 17121); + int16x8_t v413 = vaddq_s16(v404, v412); + int16x8_t v414 = vaddq_s16(v398, v413); + int16x8_t v415 = vsubq_s16(v240, v243); + int16x8_t v416 = vqrdmulhq_n_s16(v247, 13573); + int16x8_t v417 = vsubq_s16(v416, v251); + int16x8_t v418_tmp = vqrdmulhq_n_s16(v417, 10045); + int16x8_t v418 = vaddq_s16(v418_tmp, v417); + int16x8_t v419 = vaddq_s16(v415, v418); + int16x8_t v420 = vqrdmulhq_n_s16(v257, 27867); + int16x8_t v421 = vqrdmulhq_n_s16(v265, 19705); + int16x8_t v422 = vsubq_s16(v420, v421); + int16x8_t v423 = vqrdmulhq_n_s16(v268, 13573); + int16x8_t v424 = vsubq_s16(v423, v273); + int16x8_t v425 = vqrdmulhq_n_s16(v424, 25746); + int16x8_t v426 = vaddq_s16(v422, v425); + int16x8_t v427 = vaddq_s16(v419, v426); + int16x8_t v428 = vqrdmulhq_n_s16(v427, 16563); + int16x8_t v429 = vqrdmulhq_n_s16(v301, 27867); + int16x8_t v430 = vsubq_s16(v429, v307); + int16x8_t v431 = vqrdmulhq_n_s16(v310, 10664); + int16x8_t v432 = vsubq_s16(v431, v319); + int16x8_t v433 = vaddq_s16(v430, v432); + int16x8_t v434 = vqrdmulhq_n_s16(v433, 17121); + int16x8_t v435 = vsubq_s16(v283, v286); + int16x8_t v436 = vqrdmulhq_n_s16(v290, 13573); + int16x8_t v437 = vsubq_s16(v436, v295); + int16x8_t v438_tmp = vqrdmulhq_n_s16(v437, 10045); + int16x8_t v438 = vaddq_s16(v438_tmp, v437); + int16x8_t v439 = vaddq_s16(v435, v438); + int16x8_t v440 = vqrdmulhq_n_s16(v439, 17121); + int16x8_t v441 = vaddq_s16(v434, v440); + int16x8_t v442 = vqrdmulhq_n_s16(v441, 16563); + int16x8_t v443 = vaddq_s16(v428, v442); + int16x8_t v444 = vaddq_s16(v414, v443); + int16x8_t v445 = vqrdmulhq_n_s16(v444, 16429); + int16x8_t v446 = vaddq_s16(v386, v445); + int16x8_t v447 = vsubq_s16(v374, v376); + int16x8_t v448 = vsubq_s16(v378, v381); + int16x8_t v449 = vqrdmulhq_n_s16(v448, 29490); + int16x8_t v450 = vaddq_s16(v447, v449); + int16x8_t v451 = vsubq_s16(v361, v364); + int16x8_t v452 = vqrdmulhq_n_s16(v366, 29490); + int16x8_t v453_tmp = vqrdmulhq_n_s16(v369, 5763); + int16x8_t v453 = vaddq_s16(v453_tmp, v369); + int16x8_t v454 = vsubq_s16(v452, v453); + int16x8_t v455 = vaddq_s16(v451, v454); + int16x8_t v456 = vqrdmulhq_n_s16(v455, 18578); + int16x8_t v457 = vaddq_s16(v450, v456); + int16x8_t v458 = vsubq_s16(v338, v341); + int16x8_t v459 = vqrdmulhq_n_s16(v332, 29490); + int16x8_t v460_tmp = vqrdmulhq_n_s16(v335, 5763); + int16x8_t v460 = vaddq_s16(v460_tmp, v335); + int16x8_t v461 = vsubq_s16(v459, v460); + int16x8_t v462 = vaddq_s16(v458, v461); + int16x8_t v463 = vqrdmulhq_n_s16(v352, 27803); + int16x8_t v464 = vqrdmulhq_n_s16(v354, 21845); + int16x8_t v465 = vsubq_s16(v463, v464); + int16x8_t v466 = vsubq_s16(v344, v347); + int16x8_t v467 = vqrdmulhq_n_s16(v466, 18578); + int16x8_t v468 = vaddq_s16(v465, v467); + int16x8_t v469 = vaddq_s16(v462, v468); + int16x8_t v470 = vqrdmulhq_n_s16(v469, 16890); + int16x8_t v471 = vaddq_s16(v457, v470); + int16x8_t v472 = vsubq_s16(v415, v418); + int16x8_t v473_tmp = vqrdmulhq_n_s16(v422, 16273); + int16x8_t v473 = vaddq_s16(v473_tmp, v422); + int16x8_t v474_tmp = vqrdmulhq_n_s16(v424, 5763); + int16x8_t v474 = vaddq_s16(v474_tmp, v424); + int16x8_t v475 = vsubq_s16(v473, v474); + int16x8_t v476 = vaddq_s16(v472, v475); + int16x8_t v477 = vqrdmulhq_n_s16(v476, 16890); + int16x8_t v478 = vqrdmulhq_n_s16(v435, 20261); + int16x8_t v479 = vqrdmulhq_n_s16(v437, 26472); + int16x8_t v480 = vsubq_s16(v478, v479); + int16x8_t v481 = vqrdmulhq_n_s16(v480, 30046); + int16x8_t v482 = vqrdmulhq_n_s16(v430, 30322); + int16x8_t v483 = vqrdmulhq_n_s16(v432, 30322); + int16x8_t v484 = vsubq_s16(v482, v483); + int16x8_t v485 = vqrdmulhq_n_s16(v484, 30046); + int16x8_t v486 = vaddq_s16(v481, v485); + int16x8_t v487 = vqrdmulhq_n_s16(v486, 16890); + int16x8_t v488 = vaddq_s16(v477, v487); + int16x8_t v489 = vsubq_s16(v387, v390); + int16x8_t v490 = vqrdmulhq_n_s16(v392, 29490); + int16x8_t v491_tmp = vqrdmulhq_n_s16(v395, 5763); + int16x8_t v491 = vaddq_s16(v491_tmp, v395); + int16x8_t v492 = vsubq_s16(v490, v491); + int16x8_t v493 = vaddq_s16(v489, v492); + int16x8_t v494 = vsubq_s16(v399, v402); + int16x8_t v495 = vqrdmulhq_n_s16(v494, 18578); + int16x8_t v496 = vqrdmulhq_n_s16(v407, 27803); + int16x8_t v497 = vqrdmulhq_n_s16(v409, 21845); + int16x8_t v498 = vsubq_s16(v496, v497); + int16x8_t v499 = vaddq_s16(v495, v498); + int16x8_t v500 = vaddq_s16(v493, v499); + int16x8_t v501 = vaddq_s16(v488, v500); + int16x8_t v502 = vqrdmulhq_n_s16(v501, 16508); + int16x8_t v503 = vaddq_s16(v471, v502); + int16x8_t v504 = vsubq_s16(v2, v8); + int16x8_t v505 = vsubq_s16(v15, v22); + int16x8_t v506_tmp = vqrdmulhq_n_s16(v505, 18446); + int16x8_t v506 = vmlaq_n_s16(v506_tmp, v505, 2); + int16x8_t v507 = vaddq_s16(v504, v506); + int16x8_t v508 = vsubq_s16(v31, v41); + int16x8_t v509 = vsubq_s16(v48, v56); + int16x8_t v510_tmp = vqrdmulhq_n_s16(v509, 18446); + int16x8_t v510 = vmlaq_n_s16(v510_tmp, v509, 2); + int16x8_t v511 = vaddq_s16(v508, v510); + int16x8_t v512 = vqrdmulhq_n_s16(v511, 21195); + int16x8_t v513 = vaddq_s16(v507, v512); + int16x8_t v514 = vsubq_s16(v67, v77); + int16x8_t v515 = vsubq_s16(v90, v99); + int16x8_t v516_tmp = vqrdmulhq_n_s16(v515, 18446); + int16x8_t v516 = vmlaq_n_s16(v516_tmp, v515, 2); + int16x8_t v517 = vaddq_s16(v514, v516); + int16x8_t v518 = vsubq_s16(v114, v126); + int16x8_t v519_tmp = vqrdmulhq_n_s16(v518, 18446); + int16x8_t v519 = vmlaq_n_s16(v519_tmp, v518, 2); + int16x8_t v520 = vsubq_s16(v132, v137); + int16x8_t v521 = vaddq_s16(v519, v520); + int16x8_t v522 = vqrdmulhq_n_s16(v521, 21195); + int16x8_t v523 = vaddq_s16(v517, v522); + int16x8_t v524 = vqrdmulhq_n_s16(v523, 17401); + int16x8_t v525 = vaddq_s16(v513, v524); + int16x8_t v526 = vsubq_s16(v172, v181); + int16x8_t v527_tmp = vqrdmulhq_n_s16(v526, 18446); + int16x8_t v527 = vmlaq_n_s16(v527_tmp, v526, 2); + int16x8_t v528 = vsubq_s16(v149, v159); + int16x8_t v529 = vaddq_s16(v527, v528); + int16x8_t v530 = vsubq_s16(v229, v234); + int16x8_t v531 = vsubq_s16(v208, v223); + int16x8_t v532_tmp = vqrdmulhq_n_s16(v531, 18446); + int16x8_t v532 = vmlaq_n_s16(v532_tmp, v531, 2); + int16x8_t v533 = vaddq_s16(v530, v532); + int16x8_t v534 = vqrdmulhq_n_s16(v533, 21195); + int16x8_t v535 = vaddq_s16(v529, v534); + int16x8_t v536 = vsubq_s16(v244, v253); + int16x8_t v537 = vsubq_s16(v266, v277); + int16x8_t v538_tmp = vqrdmulhq_n_s16(v537, 18446); + int16x8_t v538 = vmlaq_n_s16(v538_tmp, v537, 2); + int16x8_t v539 = vaddq_s16(v536, v538); + int16x8_t v540 = vqrdmulhq_n_s16(v539, 17401); + int16x8_t v541 = vqrdmulhq_n_s16(v287, 25826); + int16x8_t v542 = vqrdmulhq_n_s16(v298, 25826); + int16x8_t v543 = vsubq_s16(v541, v542); + int16x8_t v544 = vqrdmulhq_n_s16(v543, 14281); + int16x8_t v545_tmp = vqrdmulhq_n_s16(v309, 31509); + int16x8_t v545 = vaddq_s16(v545_tmp, v309); + int16x8_t v546 = vsubq_s16(v545, v322); + int16x8_t v547 = vqrdmulhq_n_s16(v546, 28847); + int16x8_t v548 = vaddq_s16(v544, v547); + int16x8_t v549 = vaddq_s16(v540, v548); + int16x8_t v550 = vaddq_s16(v535, v549); + int16x8_t v551 = vqrdmulhq_n_s16(v550, 16629); + int16x8_t v552 = vaddq_s16(v525, v551); + int16x8_t v553 = vsubq_s16(v504, v506); + int16x8_t v554 = vsubq_s16(v508, v510); + int16x8_t v555 = vqrdmulhq_n_s16(v554, 25826); + int16x8_t v556 = vaddq_s16(v553, v555); + int16x8_t v557 = vsubq_s16(v514, v516); + int16x8_t v558 = vsubq_s16(v520, v519); + int16x8_t v559 = vqrdmulhq_n_s16(v558, 25826); + int16x8_t v560 = vaddq_s16(v557, v559); + int16x8_t v561 = vqrdmulhq_n_s16(v560, 18124); + int16x8_t v562 = vaddq_s16(v556, v561); + int16x8_t v563 = vsubq_s16(v528, v527); + int16x8_t v564 = vsubq_s16(v530, v532); + int16x8_t v565 = vqrdmulhq_n_s16(v564, 25826); + int16x8_t v566 = vaddq_s16(v563, v565); + int16x8_t v567 = vsubq_s16(v536, v538); + int16x8_t v568 = vqrdmulhq_n_s16(v567, 18124); + int16x8_t v569_tmp = vqrdmulhq_n_s16(v546, 654); + int16x8_t v569 = vmlaq_n_s16(v569_tmp, v546, 2); + int16x8_t v570 = vsubq_s16(v543, v569); + int16x8_t v571 = vqrdmulhq_n_s16(v570, 18124); + int16x8_t v572 = vaddq_s16(v568, v571); + int16x8_t v573 = vaddq_s16(v566, v572); + int16x8_t v574 = vqrdmulhq_n_s16(v573, 16792); + int16x8_t v575 = vaddq_s16(v562, v574); + int16x8_t v576 = vsubq_s16(v458, v461); + int16x8_t v577_tmp = vqrdmulhq_n_s16(v465, 25030); + int16x8_t v577 = vaddq_s16(v577_tmp, v465); + int16x8_t v578 = vsubq_s16(v466, v577); + int16x8_t v579_tmp = vqrdmulhq_n_s16(v578, 1988); + int16x8_t v579 = vaddq_s16(v579_tmp, v578); + int16x8_t v580 = vaddq_s16(v576, v579); + int16x8_t v581 = vqrdmulhq_n_s16(v580, 19102); + int16x8_t v582 = vsubq_s16(v447, v449); + int16x8_t v583 = vsubq_s16(v451, v454); + int16x8_t v584_tmp = vqrdmulhq_n_s16(v583, 1988); + int16x8_t v584 = vaddq_s16(v584_tmp, v583); + int16x8_t v585 = vaddq_s16(v582, v584); + int16x8_t v586 = vaddq_s16(v581, v585); + int16x8_t v587 = vsubq_s16(v489, v492); + int16x8_t v588_tmp = vqrdmulhq_n_s16(v498, 25030); + int16x8_t v588 = vaddq_s16(v588_tmp, v498); + int16x8_t v589 = vsubq_s16(v494, v588); + int16x8_t v590_tmp = vqrdmulhq_n_s16(v589, 1988); + int16x8_t v590 = vaddq_s16(v590_tmp, v589); + int16x8_t v591 = vaddq_s16(v587, v590); + int16x8_t v592 = vsubq_s16(v472, v475); + int16x8_t v593 = vqrdmulhq_n_s16(v592, 19102); + int16x8_t v594 = vsubq_s16(v480, v484); + int16x8_t v595 = vaddq_s16(v593, v594); + int16x8_t v596 = vaddq_s16(v591, v595); + int16x8_t v597 = vqrdmulhq_n_s16(v596, 17000); + int16x8_t v598 = vaddq_s16(v586, v597); + int16x8_t v599 = vsubq_s16(v365, v371); + int16x8_t v600_tmp = vqrdmulhq_n_s16(v599, 23673); + int16x8_t v600 = vaddq_s16(v600_tmp, v599); + int16x8_t v601 = vsubq_s16(v377, v383); + int16x8_t v602 = vaddq_s16(v600, v601); + int16x8_t v603 = vsubq_s16(v348, v356); + int16x8_t v604_tmp = vqrdmulhq_n_s16(v603, 23673); + int16x8_t v604 = vaddq_s16(v604_tmp, v603); + int16x8_t v605 = vsubq_s16(v342, v337); + int16x8_t v606 = vaddq_s16(v604, v605); + int16x8_t v607 = vqrdmulhq_n_s16(v606, 20398); + int16x8_t v608 = vaddq_s16(v602, v607); + int16x8_t v609 = vsubq_s16(v391, v397); + int16x8_t v610 = vsubq_s16(v403, v411); + int16x8_t v611_tmp = vqrdmulhq_n_s16(v610, 23673); + int16x8_t v611 = vaddq_s16(v611_tmp, v610); + int16x8_t v612 = vaddq_s16(v609, v611); + int16x8_t v613 = vsubq_s16(v419, v426); + int16x8_t v614 = vqrdmulhq_n_s16(v613, 20398); + int16x8_t v615 = vsubq_s16(v439, v433); + int16x8_t v616_tmp = vqrdmulhq_n_s16(v615, 2367); + int16x8_t v616 = vaddq_s16(v616_tmp, v615); + int16x8_t v617 = vaddq_s16(v614, v616); + int16x8_t v618 = vaddq_s16(v612, v617); + int16x8_t v619 = vqrdmulhq_n_s16(v618, 17255); + int16x8_t v620 = vaddq_s16(v608, v619); + int16x8_t v621 = vsubq_s16(v160, v183); + int16x8_t v622 = vsubq_s16(v235, v225); + int16x8_t v623_tmp = vqrdmulhq_n_s16(v622, 3314); + int16x8_t v623 = vmlaq_n_s16(v623_tmp, v622, 5); + int16x8_t v624 = vaddq_s16(v621, v623); + int16x8_t v625 = vsubq_s16(v254, v279); + int16x8_t v626 = vsubq_s16(v299, v324); + int16x8_t v627_tmp = vqrdmulhq_n_s16(v626, 3314); + int16x8_t v627 = vmlaq_n_s16(v627_tmp, v626, 5); + int16x8_t v628 = vaddq_s16(v625, v627); + int16x8_t v629 = vqrdmulhq_n_s16(v628, 22112); + int16x8_t v630 = vaddq_s16(v624, v629); + int16x8_t v631 = vqrdmulhq_n_s16(v630, 17561); + int16x8_t v632 = vsubq_s16(v9, v24); + int16x8_t v633 = vsubq_s16(v42, v58); + int16x8_t v634_tmp = vqrdmulhq_n_s16(v633, 3314); + int16x8_t v634 = vmlaq_n_s16(v634_tmp, v633, 5); + int16x8_t v635 = vaddq_s16(v632, v634); + int16x8_t v636 = vsubq_s16(v78, v101); + int16x8_t v637 = vsubq_s16(v138, v128); + int16x8_t v638_tmp = vqrdmulhq_n_s16(v637, 3314); + int16x8_t v638 = vmlaq_n_s16(v638_tmp, v637, 5); + int16x8_t v639 = vaddq_s16(v636, v638); + int16x8_t v640 = vqrdmulhq_n_s16(v639, 22112); + int16x8_t v641 = vaddq_s16(v635, v640); + int16x8_t v642 = vaddq_s16(v631, v641); + int16x8_t v643 = vsubq_s16(v632, v634); + int16x8_t v644 = vsubq_s16(v636, v638); + int16x8_t v645 = vqrdmulhq_n_s16(v644, 24397); + int16x8_t v646 = vaddq_s16(v643, v645); + int16x8_t v647 = vsubq_s16(v621, v623); + int16x8_t v648 = vsubq_s16(v625, v627); + int16x8_t v649 = vqrdmulhq_n_s16(v648, 24397); + int16x8_t v650 = vaddq_s16(v647, v649); + int16x8_t v651 = vqrdmulhq_n_s16(v650, 17921); + int16x8_t v652 = vaddq_s16(v646, v651); + int16x8_t v653 = vsubq_s16(v601, v600); + int16x8_t v654 = vsubq_s16(v605, v604); + int16x8_t v655 = vqrdmulhq_n_s16(v654, 27504); + int16x8_t v656 = vaddq_s16(v653, v655); + int16x8_t v657 = vsubq_s16(v609, v611); + int16x8_t v658 = vqrdmulhq_n_s16(v613, 27504); + int16x8_t v659_tmp = vqrdmulhq_n_s16(v615, 14606); + int16x8_t v659 = vaddq_s16(v659_tmp, v615); + int16x8_t v660 = vsubq_s16(v658, v659); + int16x8_t v661 = vaddq_s16(v657, v660); + int16x8_t v662 = vqrdmulhq_n_s16(v661, 18343); + int16x8_t v663 = vaddq_s16(v656, v662); + int16x8_t v664 = vsubq_s16(v582, v584); + int16x8_t v665 = vsubq_s16(v576, v579); + int16x8_t v666 = vqrdmulhq_n_s16(v665, 31869); + int16x8_t v667 = vaddq_s16(v664, v666); + int16x8_t v668 = vsubq_s16(v587, v590); + int16x8_t v669_tmp = vqrdmulhq_n_s16(v594, 23444); + int16x8_t v669 = vaddq_s16(v669_tmp, v594); + int16x8_t v670 = vsubq_s16(v592, v669); + int16x8_t v671 = vqrdmulhq_n_s16(v670, 31869); + int16x8_t v672 = vaddq_s16(v668, v671); + int16x8_t v673 = vqrdmulhq_n_s16(v672, 18830); + int16x8_t v674 = vaddq_s16(v667, v673); + int16x8_t v675 = vsubq_s16(v553, v555); + int16x8_t v676 = vsubq_s16(v557, v559); + int16x8_t v677_tmp = vqrdmulhq_n_s16(v676, 5552); + int16x8_t v677 = vaddq_s16(v677_tmp, v676); + int16x8_t v678 = vaddq_s16(v675, v677); + int16x8_t v679 = vsubq_s16(v563, v565); + int16x8_t v680 = vsubq_s16(v567, v570); + int16x8_t v681_tmp = vqrdmulhq_n_s16(v680, 5552); + int16x8_t v681 = vaddq_s16(v681_tmp, v680); + int16x8_t v682 = vaddq_s16(v679, v681); + int16x8_t v683 = vqrdmulhq_n_s16(v682, 19393); + int16x8_t v684 = vaddq_s16(v678, v683); + int16x8_t v685 = vsubq_s16(v507, v512); + int16x8_t v686 = vsubq_s16(v517, v522); + int16x8_t v687_tmp = vqrdmulhq_n_s16(v686, 15865); + int16x8_t v687 = vaddq_s16(v687_tmp, v686); + int16x8_t v688 = vaddq_s16(v685, v687); + int16x8_t v689 = vsubq_s16(v529, v534); + int16x8_t v690_tmp = vqrdmulhq_n_s16(v548, 28937); + int16x8_t v690 = vaddq_s16(v690_tmp, v548); + int16x8_t v691 = vsubq_s16(v539, v690); + int16x8_t v692_tmp = vqrdmulhq_n_s16(v691, 15865); + int16x8_t v692 = vaddq_s16(v692_tmp, v691); + int16x8_t v693 = vaddq_s16(v689, v692); + int16x8_t v694 = vqrdmulhq_n_s16(v693, 20040); + int16x8_t v695 = vaddq_s16(v688, v694); + int16x8_t v696 = vsubq_s16(v476, v486); + int16x8_t v697_tmp = vqrdmulhq_n_s16(v696, 1893); + int16x8_t v697 = vmlaq_n_s16(v697_tmp, v696, 2); + int16x8_t v698 = vsubq_s16(v493, v499); + int16x8_t v699 = vaddq_s16(v697, v698); + int16x8_t v700 = vqrdmulhq_n_s16(v699, 20783); + int16x8_t v701 = vsubq_s16(v450, v456); + int16x8_t v702 = vsubq_s16(v462, v468); + int16x8_t v703_tmp = vqrdmulhq_n_s16(v702, 1893); + int16x8_t v703 = vmlaq_n_s16(v703_tmp, v702, 2); + int16x8_t v704 = vaddq_s16(v701, v703); + int16x8_t v705 = vaddq_s16(v700, v704); + int16x8_t v706 = vsubq_s16(v384, v373); + int16x8_t v707 = vsubq_s16(v343, v358); + int16x8_t v708_tmp = vqrdmulhq_n_s16(v707, 13357); + int16x8_t v708 = vmlaq_n_s16(v708_tmp, v707, 3); + int16x8_t v709 = vaddq_s16(v706, v708); + int16x8_t v710 = vsubq_s16(v398, v413); + int16x8_t v711 = vsubq_s16(v427, v441); + int16x8_t v712_tmp = vqrdmulhq_n_s16(v711, 13357); + int16x8_t v712 = vmlaq_n_s16(v712_tmp, v711, 3); + int16x8_t v713 = vaddq_s16(v710, v712); + int16x8_t v714 = vqrdmulhq_n_s16(v713, 21637); + int16x8_t v715 = vaddq_s16(v709, v714); + int16x8_t v716 = vsubq_s16(v25, v60); + int16x8_t v717 = vsubq_s16(v102, v140); + int16x8_t v718_tmp = vqrdmulhq_n_s16(v717, 6226); + int16x8_t v718 = vmlaq_n_s16(v718_tmp, v717, 10); + int16x8_t v719 = vaddq_s16(v716, v718); + int16x8_t v720 = vsubq_s16(v280, v326); + int16x8_t v721_tmp = vqrdmulhq_n_s16(v720, 6226); + int16x8_t v721 = vmlaq_n_s16(v721_tmp, v720, 10); + int16x8_t v722 = vsubq_s16(v184, v237); + int16x8_t v723 = vaddq_s16(v721, v722); + int16x8_t v724 = vqrdmulhq_n_s16(v723, 22622); + int16x8_t v725 = vaddq_s16(v719, v724); + int16x8_t v726 = vsubq_s16(v716, v718); + int16x8_t v727 = vsubq_s16(v722, v721); + int16x8_t v728 = vqrdmulhq_n_s16(v727, 23761); + int16x8_t v729 = vaddq_s16(v726, v728); + int16x8_t v730 = vsubq_s16(v706, v708); + int16x8_t v731 = vsubq_s16(v710, v712); + int16x8_t v732 = vqrdmulhq_n_s16(v731, 25084); + int16x8_t v733 = vaddq_s16(v730, v732); + int16x8_t v734 = vsubq_s16(v701, v703); + int16x8_t v735 = vsubq_s16(v698, v697); + int16x8_t v736 = vqrdmulhq_n_s16(v735, 26631); + int16x8_t v737 = vaddq_s16(v734, v736); + int16x8_t v738 = vsubq_s16(v685, v687); + int16x8_t v739 = vsubq_s16(v689, v692); + int16x8_t v740 = vqrdmulhq_n_s16(v739, 28454); + int16x8_t v741 = vaddq_s16(v738, v740); + int16x8_t v742 = vsubq_s16(v675, v677); + int16x8_t v743 = vsubq_s16(v679, v681); + int16x8_t v744 = vqrdmulhq_n_s16(v743, 30624); + int16x8_t v745 = vaddq_s16(v742, v744); + int16x8_t v746 = vsubq_s16(v664, v666); + int16x8_t v747 = vsubq_s16(v668, v671); + int16x8_t v748_tmp = vqrdmulhq_n_s16(v747, 472); + int16x8_t v748 = vaddq_s16(v748_tmp, v747); + int16x8_t v749 = vaddq_s16(v746, v748); + int16x8_t v750 = vsubq_s16(v653, v655); + int16x8_t v751 = vsubq_s16(v657, v660); + int16x8_t v752_tmp = vqrdmulhq_n_s16(v751, 3672); + int16x8_t v752 = vaddq_s16(v752_tmp, v751); + int16x8_t v753 = vaddq_s16(v750, v752); + int16x8_t v754 = vsubq_s16(v643, v645); + int16x8_t v755 = vsubq_s16(v647, v649); + int16x8_t v756_tmp = vqrdmulhq_n_s16(v755, 7662); + int16x8_t v756 = vaddq_s16(v756_tmp, v755); + int16x8_t v757 = vaddq_s16(v754, v756); + int16x8_t v758 = vsubq_s16(v635, v640); + int16x8_t v759 = vsubq_s16(v624, v629); + int16x8_t v760_tmp = vqrdmulhq_n_s16(v759, 12756); + int16x8_t v760 = vaddq_s16(v760_tmp, v759); + int16x8_t v761 = vaddq_s16(v758, v760); + int16x8_t v762 = vsubq_s16(v602, v607); + int16x8_t v763 = vsubq_s16(v612, v617); + int16x8_t v764_tmp = vqrdmulhq_n_s16(v763, 19463); + int16x8_t v764 = vaddq_s16(v764_tmp, v763); + int16x8_t v765 = vaddq_s16(v762, v764); + int16x8_t v766 = vsubq_s16(v585, v581); + int16x8_t v767 = vsubq_s16(v591, v595); + int16x8_t v768_tmp = vqrdmulhq_n_s16(v767, 28661); + int16x8_t v768 = vaddq_s16(v768_tmp, v767); + int16x8_t v769 = vaddq_s16(v766, v768); + int16x8_t v770 = vsubq_s16(v556, v561); + int16x8_t v771 = vsubq_s16(v566, v572); + int16x8_t v772_tmp = vqrdmulhq_n_s16(v771, 9242); + int16x8_t v772 = vmlaq_n_s16(v772_tmp, v771, 2); + int16x8_t v773 = vaddq_s16(v770, v772); + int16x8_t v774 = vsubq_s16(v513, v524); + int16x8_t v775 = vsubq_s16(v535, v549); + int16x8_t v776_tmp = vqrdmulhq_n_s16(v775, 30298); + int16x8_t v776 = vmlaq_n_s16(v776_tmp, v775, 2); + int16x8_t v777 = vaddq_s16(v774, v776); + int16x8_t v778 = vsubq_s16(v457, v470); + int16x8_t v779 = vsubq_s16(v500, v488); + int16x8_t v780_tmp = vqrdmulhq_n_s16(v779, 2773); + int16x8_t v780 = vmlaq_n_s16(v780_tmp, v779, 4); + int16x8_t v781 = vaddq_s16(v778, v780); + int16x8_t v782 = vsubq_s16(v385, v360); + int16x8_t v783 = vsubq_s16(v414, v443); + int16x8_t v784_tmp = vqrdmulhq_n_s16(v783, 26108); + int16x8_t v784 = vmlaq_n_s16(v784_tmp, v783, 6); + int16x8_t v785 = vaddq_s16(v782, v784); + int16x8_t v786 = vsubq_s16(v61, v142); + int16x8_t v787 = vsubq_s16(v238, v328); + int16x8_t v788_tmp = vqrdmulhq_n_s16(v787, 12251); + int16x8_t v788 = vmlaq_n_s16(v788_tmp, v787, 20); + int16x8_t v789 = vaddq_s16(v786, v788); + int16x8_t v790 = vsubq_s16(v786, v788); + int16x8_t v791 = vsubq_s16(v782, v784); + int16x8_t v792 = vsubq_s16(v778, v780); + int16x8_t v793 = vsubq_s16(v774, v776); + int16x8_t v794 = vsubq_s16(v770, v772); + int16x8_t v795 = vsubq_s16(v766, v768); + int16x8_t v796 = vsubq_s16(v762, v764); + int16x8_t v797 = vsubq_s16(v758, v760); + int16x8_t v798 = vsubq_s16(v754, v756); + int16x8_t v799 = vsubq_s16(v750, v752); + int16x8_t v800 = vsubq_s16(v746, v748); + int16x8_t v801 = vsubq_s16(v742, v744); + int16x8_t v802 = vsubq_s16(v738, v740); + int16x8_t v803 = vsubq_s16(v734, v736); + int16x8_t v804 = vsubq_s16(v730, v732); + int16x8_t v805 = vsubq_s16(v726, v728); + int16x8_t v806 = vsubq_s16(v719, v724); + int16x8_t v807 = vsubq_s16(v709, v714); + int16x8_t v808 = vsubq_s16(v704, v700); + int16x8_t v809 = vsubq_s16(v688, v694); + int16x8_t v810 = vsubq_s16(v678, v683); + int16x8_t v811 = vsubq_s16(v667, v673); + int16x8_t v812 = vsubq_s16(v656, v662); + int16x8_t v813 = vsubq_s16(v646, v651); + int16x8_t v814 = vsubq_s16(v641, v631); + int16x8_t v815 = vsubq_s16(v608, v619); + int16x8_t v816 = vsubq_s16(v586, v597); + int16x8_t v817 = vsubq_s16(v562, v574); + int16x8_t v818 = vsubq_s16(v525, v551); + int16x8_t v819 = vsubq_s16(v471, v502); + int16x8_t v820 = vsubq_s16(v386, v445); + int16x8_t v821 = vsubq_s16(v143, v330); + vst1q_s16(out + out_stride * 0 + i, v331); + vst1q_s16(out + out_stride * 1 + i, v446); + vst1q_s16(out + out_stride * 2 + i, v503); + vst1q_s16(out + out_stride * 3 + i, v552); + vst1q_s16(out + out_stride * 4 + i, v575); + vst1q_s16(out + out_stride * 5 + i, v598); + vst1q_s16(out + out_stride * 6 + i, v620); + vst1q_s16(out + out_stride * 7 + i, v642); + vst1q_s16(out + out_stride * 8 + i, v652); + vst1q_s16(out + out_stride * 9 + i, v663); + vst1q_s16(out + out_stride * 10 + i, v674); + vst1q_s16(out + out_stride * 11 + i, v684); + vst1q_s16(out + out_stride * 12 + i, v695); + vst1q_s16(out + out_stride * 13 + i, v705); + vst1q_s16(out + out_stride * 14 + i, v715); + vst1q_s16(out + out_stride * 15 + i, v725); + vst1q_s16(out + out_stride * 16 + i, v729); + vst1q_s16(out + out_stride * 17 + i, v733); + vst1q_s16(out + out_stride * 18 + i, v737); + vst1q_s16(out + out_stride * 19 + i, v741); + vst1q_s16(out + out_stride * 20 + i, v745); + vst1q_s16(out + out_stride * 21 + i, v749); + vst1q_s16(out + out_stride * 22 + i, v753); + vst1q_s16(out + out_stride * 23 + i, v757); + vst1q_s16(out + out_stride * 24 + i, v761); + vst1q_s16(out + out_stride * 25 + i, v765); + vst1q_s16(out + out_stride * 26 + i, v769); + vst1q_s16(out + out_stride * 27 + i, v773); + vst1q_s16(out + out_stride * 28 + i, v777); + vst1q_s16(out + out_stride * 29 + i, v781); + vst1q_s16(out + out_stride * 30 + i, v785); + vst1q_s16(out + out_stride * 31 + i, v789); + vst1q_s16(out + out_stride * 32 + i, v790); + vst1q_s16(out + out_stride * 33 + i, v791); + vst1q_s16(out + out_stride * 34 + i, v792); + vst1q_s16(out + out_stride * 35 + i, v793); + vst1q_s16(out + out_stride * 36 + i, v794); + vst1q_s16(out + out_stride * 37 + i, v795); + vst1q_s16(out + out_stride * 38 + i, v796); + vst1q_s16(out + out_stride * 39 + i, v797); + vst1q_s16(out + out_stride * 40 + i, v798); + vst1q_s16(out + out_stride * 41 + i, v799); + vst1q_s16(out + out_stride * 42 + i, v800); + vst1q_s16(out + out_stride * 43 + i, v801); + vst1q_s16(out + out_stride * 44 + i, v802); + vst1q_s16(out + out_stride * 45 + i, v803); + vst1q_s16(out + out_stride * 46 + i, v804); + vst1q_s16(out + out_stride * 47 + i, v805); + vst1q_s16(out + out_stride * 48 + i, v806); + vst1q_s16(out + out_stride * 49 + i, v807); + vst1q_s16(out + out_stride * 50 + i, v808); + vst1q_s16(out + out_stride * 51 + i, v809); + vst1q_s16(out + out_stride * 52 + i, v810); + vst1q_s16(out + out_stride * 53 + i, v811); + vst1q_s16(out + out_stride * 54 + i, v812); + vst1q_s16(out + out_stride * 55 + i, v813); + vst1q_s16(out + out_stride * 56 + i, v814); + vst1q_s16(out + out_stride * 57 + i, v815); + vst1q_s16(out + out_stride * 58 + i, v816); + vst1q_s16(out + out_stride * 59 + i, v817); + vst1q_s16(out + out_stride * 60 + i, v818); + vst1q_s16(out + out_stride * 61 + i, v819); + vst1q_s16(out + out_stride * 62 + i, v820); + vst1q_s16(out + out_stride * 63 + i, v821); + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct8-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_dct8-inl.h new file mode 100644 index 0000000000..946ace4a0c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct8-inl.h @@ -0,0 +1,80 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* This file is automatically generated. Do not modify it directly. */ +#if HWY_TARGET != HWY_NEON +#error "only include this file from fast_dct-inl.h" +#endif + +constexpr size_t FastIDCTIntegerBits(FastDCTTag<8>) { return 1; } + +void FastIDCT(FastDCTTag<8>, const int16_t* in, size_t in_stride, int16_t* out, + size_t out_stride, size_t count) { + JXL_ASSERT(count % 8 == 0); + for (size_t i = 0; i < count; i += 8) { + int16x8_t v0 = vld1q_s16(in + in_stride * 0 + i); + int16x8_t v1 = vld1q_s16(in + in_stride * 4 + i); + int16x8_t v2 = vaddq_s16(v0, v1); + int16x8_t v3 = vld1q_s16(in + in_stride * 2 + i); + int16x8_t v4_tmp = vqrdmulhq_n_s16(v3, 13573); + int16x8_t v4 = vaddq_s16(v4_tmp, v3); + int16x8_t v5 = vld1q_s16(in + in_stride * 6 + i); + int16x8_t v6 = vaddq_s16(v5, v3); + int16x8_t v7 = vaddq_s16(v4, v6); + int16x8_t v8 = vqrdmulhq_n_s16(v7, 17734); + int16x8_t v9 = vaddq_s16(v2, v8); + int16x8_t v10 = vld1q_s16(in + in_stride * 1 + i); + int16x8_t v11_tmp = vqrdmulhq_n_s16(v10, 13573); + int16x8_t v11 = vaddq_s16(v11_tmp, v10); + int16x8_t v12 = vld1q_s16(in + in_stride * 5 + i); + int16x8_t v13 = vld1q_s16(in + in_stride * 3 + i); + int16x8_t v14 = vaddq_s16(v12, v13); + int16x8_t v15 = vaddq_s16(v11, v14); + int16x8_t v16 = vaddq_s16(v13, v10); + int16x8_t v17 = vqrdmulhq_n_s16(v16, 25080); + int16x8_t v18 = vld1q_s16(in + in_stride * 7 + i); + int16x8_t v19 = vaddq_s16(v18, v12); + int16x8_t v20 = vaddq_s16(v16, v19); + int16x8_t v21 = vqrdmulhq_n_s16(v20, 17734); + int16x8_t v22 = vaddq_s16(v17, v21); + int16x8_t v23 = vaddq_s16(v15, v22); + int16x8_t v24 = vqrdmulhq_n_s16(v23, 16705); + int16x8_t v25 = vaddq_s16(v9, v24); + int16x8_t v26 = vsubq_s16(v0, v1); + int16x8_t v27 = vsubq_s16(v4, v6); + int16x8_t v28_tmp = vqrdmulhq_n_s16(v27, 10045); + int16x8_t v28 = vaddq_s16(v28_tmp, v27); + int16x8_t v29 = vaddq_s16(v26, v28); + int16x8_t v30 = vsubq_s16(v11, v14); + int16x8_t v31 = vqrdmulhq_n_s16(v16, 17734); + int16x8_t v32_tmp = vqrdmulhq_n_s16(v19, 10045); + int16x8_t v32 = vaddq_s16(v32_tmp, v19); + int16x8_t v33 = vsubq_s16(v31, v32); + int16x8_t v34 = vaddq_s16(v30, v33); + int16x8_t v35 = vqrdmulhq_n_s16(v34, 19705); + int16x8_t v36 = vaddq_s16(v29, v35); + int16x8_t v37 = vsubq_s16(v26, v28); + int16x8_t v38 = vsubq_s16(v30, v33); + int16x8_t v39 = vqrdmulhq_n_s16(v38, 29490); + int16x8_t v40 = vaddq_s16(v37, v39); + int16x8_t v41 = vsubq_s16(v2, v8); + int16x8_t v42 = vsubq_s16(v15, v22); + int16x8_t v43_tmp = vqrdmulhq_n_s16(v42, 18446); + int16x8_t v43 = vmlaq_n_s16(v43_tmp, v42, 2); + int16x8_t v44 = vaddq_s16(v41, v43); + int16x8_t v45 = vsubq_s16(v41, v43); + int16x8_t v46 = vsubq_s16(v37, v39); + int16x8_t v47 = vsubq_s16(v29, v35); + int16x8_t v48 = vsubq_s16(v9, v24); + vst1q_s16(out + out_stride * 0 + i, v25); + vst1q_s16(out + out_stride * 1 + i, v36); + vst1q_s16(out + out_stride * 2 + i, v40); + vst1q_s16(out + out_stride * 3 + i, v44); + vst1q_s16(out + out_stride * 4 + i, v45); + vst1q_s16(out + out_stride * 5 + i, v46); + vst1q_s16(out + out_stride * 6 + i, v47); + vst1q_s16(out + out_stride * 7 + i, v48); + } +} diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_dct_test.cc b/third-party/libjxl/libjxl/lib/jxl/fast_dct_test.cc new file mode 100644 index 0000000000..5bb1a79cc5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_dct_test.cc @@ -0,0 +1,378 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/fast_dct_test.cc" +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/dct-inl.h" +#include "lib/jxl/fast_dct-inl.h" +#include "lib/jxl/fast_dct.h" +#include "lib/jxl/transpose-inl.h" + +// Test utils +#include +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +template +HWY_NOINLINE void TestFastTranspose() { +#if HWY_TARGET == HWY_NEON + auto array_mem = hwy::AllocateAligned(N * M); + int16_t* array = array_mem.get(); + auto transposed_mem = hwy::AllocateAligned(N * M); + int16_t* transposed = transposed_mem.get(); + std::iota(array, array + N * M, 0); + for (size_t j = 0; j < 100000000 / (N * M); j++) { + FastTransposeBlock(array, M, N, M, transposed, N); + } + for (size_t i = 0; i < M; i++) { + for (size_t j = 0; j < N; j++) { + EXPECT_EQ(array[j * M + i], transposed[i * N + j]); + } + } +#endif +} + +template +HWY_NOINLINE void TestFloatTranspose() { + auto array_mem = hwy::AllocateAligned(N * M); + float* array = array_mem.get(); + auto transposed_mem = hwy::AllocateAligned(N * M); + float* transposed = transposed_mem.get(); + std::iota(array, array + N * M, 0); + for (size_t j = 0; j < 100000000 / (N * M); j++) { + Transpose::Run(DCTFrom(array, M), DCTTo(transposed, N)); + } + for (size_t i = 0; i < M; i++) { + for (size_t j = 0; j < N; j++) { + EXPECT_EQ(array[j * M + i], transposed[i * N + j]); + } + } +} + +// TODO(sboukortt): re-enable the FloatIDCT tests once we find out why they fail +// in ASAN mode in the CI runners and seemingly not locally. + +HWY_NOINLINE void TestFastTranspose8x8() { TestFastTranspose<8, 8>(); } +HWY_NOINLINE void TestFloatTranspose8x8() { TestFloatTranspose<8, 8>(); } +HWY_NOINLINE void TestFastIDCT8x8() { TestFastIDCT<8, 8>(); } +HWY_NOINLINE void TestFloatIDCT8x8() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<8, 8>(); +#endif +} +HWY_NOINLINE void TestFastTranspose8x16() { TestFastTranspose<8, 16>(); } +HWY_NOINLINE void TestFloatTranspose8x16() { TestFloatTranspose<8, 16>(); } +HWY_NOINLINE void TestFastIDCT8x16() { TestFastIDCT<8, 16>(); } +HWY_NOINLINE void TestFloatIDCT8x16() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<8, 16>(); +#endif +} +HWY_NOINLINE void TestFastTranspose8x32() { TestFastTranspose<8, 32>(); } +HWY_NOINLINE void TestFloatTranspose8x32() { TestFloatTranspose<8, 32>(); } +HWY_NOINLINE void TestFastIDCT8x32() { TestFastIDCT<8, 32>(); } +HWY_NOINLINE void TestFloatIDCT8x32() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<8, 32>(); +#endif +} +HWY_NOINLINE void TestFastTranspose16x8() { TestFastTranspose<16, 8>(); } +HWY_NOINLINE void TestFloatTranspose16x8() { TestFloatTranspose<16, 8>(); } +HWY_NOINLINE void TestFastIDCT16x8() { TestFastIDCT<16, 8>(); } +HWY_NOINLINE void TestFloatIDCT16x8() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<16, 8>(); +#endif +} +HWY_NOINLINE void TestFastTranspose16x16() { TestFastTranspose<16, 16>(); } +HWY_NOINLINE void TestFloatTranspose16x16() { TestFloatTranspose<16, 16>(); } +HWY_NOINLINE void TestFastIDCT16x16() { TestFastIDCT<16, 16>(); } +HWY_NOINLINE void TestFloatIDCT16x16() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<16, 16>(); +#endif +} +HWY_NOINLINE void TestFastTranspose16x32() { TestFastTranspose<16, 32>(); } +HWY_NOINLINE void TestFloatTranspose16x32() { TestFloatTranspose<16, 32>(); } +HWY_NOINLINE void TestFastIDCT16x32() { TestFastIDCT<16, 32>(); } +HWY_NOINLINE void TestFloatIDCT16x32() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<16, 32>(); +#endif +} +HWY_NOINLINE void TestFastTranspose32x8() { TestFastTranspose<32, 8>(); } +HWY_NOINLINE void TestFloatTranspose32x8() { TestFloatTranspose<32, 8>(); } +HWY_NOINLINE void TestFastIDCT32x8() { TestFastIDCT<32, 8>(); } +HWY_NOINLINE void TestFloatIDCT32x8() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<32, 8>(); +#endif +} +HWY_NOINLINE void TestFastTranspose32x16() { TestFastTranspose<32, 16>(); } +HWY_NOINLINE void TestFloatTranspose32x16() { TestFloatTranspose<32, 16>(); } +HWY_NOINLINE void TestFastIDCT32x16() { TestFastIDCT<32, 16>(); } +HWY_NOINLINE void TestFloatIDCT32x16() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<32, 16>(); +#endif +} +HWY_NOINLINE void TestFastTranspose32x32() { TestFastTranspose<32, 32>(); } +HWY_NOINLINE void TestFloatTranspose32x32() { TestFloatTranspose<32, 32>(); } +HWY_NOINLINE void TestFastIDCT32x32() { TestFastIDCT<32, 32>(); } +HWY_NOINLINE void TestFloatIDCT32x32() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<32, 32>(); +#endif +} +HWY_NOINLINE void TestFastTranspose32x64() { TestFastTranspose<32, 64>(); } +HWY_NOINLINE void TestFloatTranspose32x64() { TestFloatTranspose<32, 64>(); } +HWY_NOINLINE void TestFastIDCT32x64() { TestFastIDCT<32, 64>(); } +HWY_NOINLINE void TestFloatIDCT32x64() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<32, 64>(); +#endif +} +HWY_NOINLINE void TestFastTranspose64x32() { TestFastTranspose<64, 32>(); } +HWY_NOINLINE void TestFloatTranspose64x32() { TestFloatTranspose<64, 32>(); } +HWY_NOINLINE void TestFastIDCT64x32() { TestFastIDCT<64, 32>(); } +HWY_NOINLINE void TestFloatIDCT64x32() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<64, 32>(); +#endif +} +HWY_NOINLINE void TestFastTranspose64x64() { TestFastTranspose<64, 64>(); } +HWY_NOINLINE void TestFloatTranspose64x64() { TestFloatTranspose<64, 64>(); } +HWY_NOINLINE void TestFastIDCT64x64() { TestFastIDCT<64, 64>(); } +HWY_NOINLINE void TestFloatIDCT64x64() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<64, 64>(); +#endif +} +HWY_NOINLINE void TestFastTranspose64x128() { TestFastTranspose<64, 128>(); } +HWY_NOINLINE void TestFloatTranspose64x128() { TestFloatTranspose<64, 128>(); } +/* +HWY_NOINLINE void TestFastIDCT64x128() { TestFastIDCT<64, 128>(); } +HWY_NOINLINE void TestFloatIDCT64x128() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<64, 128>(); +#endif +} +*/ +HWY_NOINLINE void TestFastTranspose128x64() { TestFastTranspose<128, 64>(); } +HWY_NOINLINE void TestFloatTranspose128x64() { TestFloatTranspose<128, 64>(); } +/* +HWY_NOINLINE void TestFastIDCT128x64() { TestFastIDCT<128, 64>(); } +HWY_NOINLINE void TestFloatIDCT128x64() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<128, 64>(); +#endif +} +*/ +HWY_NOINLINE void TestFastTranspose128x128() { TestFastTranspose<128, 128>(); } +HWY_NOINLINE void TestFloatTranspose128x128() { + TestFloatTranspose<128, 128>(); +} +/* +HWY_NOINLINE void TestFastIDCT128x128() { TestFastIDCT<128, 128>(); } +HWY_NOINLINE void TestFloatIDCT128x128() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<128, 128>(); +#endif +} +*/ +HWY_NOINLINE void TestFastTranspose128x256() { TestFastTranspose<128, 256>(); } +HWY_NOINLINE void TestFloatTranspose128x256() { + TestFloatTranspose<128, 256>(); +} +/* +HWY_NOINLINE void TestFastIDCT128x256() { TestFastIDCT<128, 256>(); } +HWY_NOINLINE void TestFloatIDCT128x256() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<128, 256>(); +#endif +} +*/ +HWY_NOINLINE void TestFastTranspose256x128() { TestFastTranspose<256, 128>(); } +HWY_NOINLINE void TestFloatTranspose256x128() { + TestFloatTranspose<256, 128>(); +} +/* +HWY_NOINLINE void TestFastIDCT256x128() { TestFastIDCT<256, 128>(); } +HWY_NOINLINE void TestFloatIDCT256x128() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<256, 128>(); +#endif +} +*/ +HWY_NOINLINE void TestFastTranspose256x256() { TestFastTranspose<256, 256>(); } +HWY_NOINLINE void TestFloatTranspose256x256() { + TestFloatTranspose<256, 256>(); +} +/* +HWY_NOINLINE void TestFastIDCT256x256() { TestFastIDCT<256, 256>(); } +HWY_NOINLINE void TestFloatIDCT256x256() { +#if HWY_TARGET == HWY_SCALAR && \ + (defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER)) + GTEST_SKIP(); +#else + TestFloatIDCT<256, 256>(); +#endif +} +*/ + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class FastDCTTargetTest : public hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastDCTTargetTest); + +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose8x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose8x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose16x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose16x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose32x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose32x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose64x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose64x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose128x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose128x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatTranspose256x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastTranspose256x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT8x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT8x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT16x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT16x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x8); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x16); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT32x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT32x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x32); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x64); +/* + * DCT-128 and above have very large errors just by rounding inputs. +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT64x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT64x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x64); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT128x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT128x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x128); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFloatIDCT256x256); +HWY_EXPORT_AND_TEST_P(FastDCTTargetTest, TestFastIDCT256x256); +*/ + +TEST(FastDCTTest, TestWrapperFloat) { BenchmarkFloatIDCT32x32(); } +TEST(FastDCTTest, TestWrapperFast) { BenchmarkFastIDCT32x32(); } + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_math-inl.h b/third-party/libjxl/libjxl/lib/jxl/fast_math-inl.h new file mode 100644 index 0000000000..5c48034290 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_math-inl.h @@ -0,0 +1,236 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Fast SIMD math ops (log2, encoder only, cos, erf for splines) + +#if defined(LIB_JXL_FAST_MATH_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_FAST_MATH_INL_H_ +#undef LIB_JXL_FAST_MATH_INL_H_ +#else +#define LIB_JXL_FAST_MATH_INL_H_ +#endif + +#include + +#include "lib/jxl/common.h" +#include "lib/jxl/rational_polynomial-inl.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::Floor; +using hwy::HWY_NAMESPACE::Ge; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::IfThenZeroElse; +using hwy::HWY_NAMESPACE::Le; +using hwy::HWY_NAMESPACE::Min; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulAdd; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Xor; + +// Computes base-2 logarithm like std::log2. Undefined if negative / NaN. +// L1 error ~3.9E-6 +template +V FastLog2f(const DF df, V x) { + // 2,2 rational polynomial approximation of std::log1p(x) / std::log(2). + HWY_ALIGN const float p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06f), + HWY_REP4(1.4287160470083755E+00f), + HWY_REP4(7.4245873327820566E-01f)}; + HWY_ALIGN const float q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01f), + HWY_REP4(1.0096718572241148E+00f), + HWY_REP4(1.7409343003366853E-01f)}; + + const Rebind di; + const auto x_bits = BitCast(di, x); + + // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops + const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3 + // Shifted exponent = log2; also used to clear mantissa. + const auto exp_shifted = ShiftRight<23>(exp_bits); + const auto mantissa = BitCast(df, Sub(x_bits, ShiftLeft<23>(exp_shifted))); + const auto exp_val = ConvertTo(df, exp_shifted); + return Add(EvalRationalPolynomial(df, Sub(mantissa, Set(df, 1.0f)), p, q), + exp_val); +} + +// max relative error ~3e-7 +template +V FastPow2f(const DF df, V x) { + const Rebind di; + auto floorx = Floor(x); + auto exp = + BitCast(df, ShiftLeft<23>(Add(ConvertTo(di, floorx), Set(di, 127)))); + auto frac = Sub(x, floorx); + auto num = Add(frac, Set(df, 1.01749063e+01)); + num = MulAdd(num, frac, Set(df, 4.88687798e+01)); + num = MulAdd(num, frac, Set(df, 9.85506591e+01)); + num = Mul(num, exp); + auto den = MulAdd(frac, Set(df, 2.10242958e-01), Set(df, -2.22328856e-02)); + den = MulAdd(den, frac, Set(df, -1.94414990e+01)); + den = MulAdd(den, frac, Set(df, 9.85506633e+01)); + return Div(num, den); +} + +// max relative error ~3e-5 +template +V FastPowf(const DF df, V base, V exponent) { + return FastPow2f(df, Mul(FastLog2f(df, base), exponent)); +} + +// Computes cosine like std::cos. +// L1 error 7e-5. +template +V FastCosf(const DF df, V x) { + // Step 1: range reduction to [0, 2pi) + const auto pi2 = Set(df, kPi * 2.0f); + const auto pi2_inv = Set(df, 0.5f / kPi); + const auto npi2 = Mul(Floor(Mul(x, pi2_inv)), pi2); + const auto xmodpi2 = Sub(x, npi2); + // Step 2: range reduction to [0, pi] + const auto x_pi = Min(xmodpi2, Sub(pi2, xmodpi2)); + // Step 3: range reduction to [0, pi/2] + const auto above_pihalf = Ge(x_pi, Set(df, kPi / 2.0f)); + const auto x_pihalf = IfThenElse(above_pihalf, Sub(Set(df, kPi), x_pi), x_pi); + // Step 4: Taylor-like approximation, scaled by 2**0.75 to make angle + // duplication steps faster, on x/4. + const auto xs = Mul(x_pihalf, Set(df, 0.25f)); + const auto x2 = Mul(xs, xs); + const auto x4 = Mul(x2, x2); + const auto cosx_prescaling = + MulAdd(x4, Set(df, 0.06960438), + MulAdd(x2, Set(df, -0.84087373), Set(df, 1.68179268))); + // Step 5: angle duplication. + const auto cosx_scale1 = + MulAdd(cosx_prescaling, cosx_prescaling, Set(df, -1.414213562)); + const auto cosx_scale2 = MulAdd(cosx_scale1, cosx_scale1, Set(df, -1)); + // Step 6: change sign if needed. + const Rebind du; + auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, above_pihalf))); + return BitCast(df, Xor(signbit, BitCast(du, cosx_scale2))); +} + +// Computes the error function like std::erf. +// L1 error 7e-4. +template +V FastErff(const DF df, V x) { + // Formula from + // https://en.wikipedia.org/wiki/Error_function#Numerical_approximations + // but constants have been recomputed. + const auto xle0 = Le(x, Zero(df)); + const auto absx = Abs(x); + // Compute 1 - 1 / ((((x * a + b) * x + c) * x + d) * x + 1)**4 + const auto denom1 = + MulAdd(absx, Set(df, 7.77394369e-02), Set(df, 2.05260015e-04)); + const auto denom2 = MulAdd(denom1, absx, Set(df, 2.32120216e-01)); + const auto denom3 = MulAdd(denom2, absx, Set(df, 2.77820801e-01)); + const auto denom4 = MulAdd(denom3, absx, Set(df, 1.0f)); + const auto denom5 = Mul(denom4, denom4); + const auto inv_denom5 = Div(Set(df, 1.0f), denom5); + const auto result = NegMulAdd(inv_denom5, inv_denom5, Set(df, 1.0f)); + // Change sign if needed. + const Rebind du; + auto signbit = ShiftLeft<31>(BitCast(du, VecFromMask(df, xle0))); + return BitCast(df, Xor(signbit, BitCast(du, result))); +} + +inline float FastLog2f(float f) { + HWY_CAPPED(float, 1) D; + return GetLane(FastLog2f(D, Set(D, f))); +} + +inline float FastPow2f(float f) { + HWY_CAPPED(float, 1) D; + return GetLane(FastPow2f(D, Set(D, f))); +} + +inline float FastPowf(float b, float e) { + HWY_CAPPED(float, 1) D; + return GetLane(FastPowf(D, Set(D, b), Set(D, e))); +} + +inline float FastCosf(float f) { + HWY_CAPPED(float, 1) D; + return GetLane(FastCosf(D, Set(D, f))); +} + +inline float FastErff(float f) { + HWY_CAPPED(float, 1) D; + return GetLane(FastErff(D, Set(D, f))); +} + +// Returns cbrt(x) + add with 6 ulp max error. +// Modified from vectormath_exp.h, Apache 2 license. +// https://www.agner.org/optimize/vectorclass.zip +template +V CubeRootAndAdd(const V x, const V add) { + const HWY_FULL(float) df; + const HWY_FULL(int32_t) di; + + const auto kExpBias = Set(di, 0x54800000); // cast(1.) + cast(1.) / 3 + const auto kExpMul = Set(di, 0x002AAAAA); // shifted 1/3 + const auto k1_3 = Set(df, 1.0f / 3); + const auto k4_3 = Set(df, 4.0f / 3); + + const auto xa = x; // assume inputs never negative + const auto xa_3 = Mul(k1_3, xa); + + // Multiply exponent by -1/3 + const auto m1 = BitCast(di, xa); + // Special case for 0. 0 is represented with an exponent of 0, so the + // "kExpBias - 1/3 * exp" below gives the wrong result. The IfThenZeroElse() + // sets those values as 0, which prevents having NaNs in the computations + // below. + // TODO(eustas): use fused op + const auto m2 = IfThenZeroElse( + Eq(m1, Zero(di)), Sub(kExpBias, Mul((ShiftRight<23>(m1)), kExpMul))); + auto r = BitCast(df, m2); + + // Newton-Raphson iterations + for (int i = 0; i < 3; i++) { + const auto r2 = Mul(r, r); + r = NegMulAdd(xa_3, Mul(r2, r2), Mul(k4_3, r)); + } + // Final iteration + auto r2 = Mul(r, r); + r = MulAdd(k1_3, NegMulAdd(xa, Mul(r2, r2), r), r); + r2 = Mul(r, r); + r = MulAdd(r2, x, add); + + return r; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_FAST_MATH_INL_H_ + +#if HWY_ONCE +#ifndef FAST_MATH_ONCE +#define FAST_MATH_ONCE + +namespace jxl { +inline float FastLog2f(float f) { return HWY_STATIC_DISPATCH(FastLog2f)(f); } +inline float FastPow2f(float f) { return HWY_STATIC_DISPATCH(FastPow2f)(f); } +inline float FastPowf(float b, float e) { + return HWY_STATIC_DISPATCH(FastPowf)(b, e); +} +inline float FastCosf(float f) { return HWY_STATIC_DISPATCH(FastCosf)(f); } +inline float FastErff(float f) { return HWY_STATIC_DISPATCH(FastErff)(f); } +} // namespace jxl + +#endif // FAST_MATH_ONCE +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/fast_math_test.cc b/third-party/libjxl/libjxl/lib/jxl/fast_math_test.cc new file mode 100644 index 0000000000..897aadc120 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fast_math_test.cc @@ -0,0 +1,288 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/fast_math_test.cc" +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/dec_xyb-inl.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_xyb.h" +#include "lib/jxl/transfer_functions-inl.h" + +// Test utils +#include +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +HWY_NOINLINE void TestFastLog2() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(1e-7f, 1e3f); + const auto actual_v = FastLog2f(d, Set(d, f)); + const float actual = GetLane(actual_v); + const float abs_err = std::abs(std::log2(f) - actual); + EXPECT_LT(abs_err, 3.1E-6) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFastPow2() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_rel_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(-100, 100); + const auto actual_v = FastPow2f(d, Set(d, f)); + const float actual = GetLane(actual_v); + const float expected = std::pow(2, f); + const float rel_err = std::abs(expected - actual) / expected; + EXPECT_LT(rel_err, 3.1E-6) << "f = " << f; + max_rel_err = std::max(max_rel_err, rel_err); + } + printf("max rel err %e\n", static_cast(max_rel_err)); +} + +HWY_NOINLINE void TestFastPow() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_rel_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float b = rng.UniformF(1e-3f, 1e3f); + const float e = rng.UniformF(-10, 10); + const auto actual_v = FastPowf(d, Set(d, b), Set(d, e)); + const float actual = GetLane(actual_v); + const float expected = std::pow(b, e); + const float rel_err = std::abs(expected - actual) / expected; + EXPECT_LT(rel_err, 3E-5) << "b = " << b << " e = " << e; + max_rel_err = std::max(max_rel_err, rel_err); + } + printf("max rel err %e\n", static_cast(max_rel_err)); +} + +HWY_NOINLINE void TestFastCos() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(-1e3f, 1e3f); + const auto actual_v = FastCosf(d, Set(d, f)); + const float actual = GetLane(actual_v); + const float abs_err = std::abs(std::cos(f) - actual); + EXPECT_LT(abs_err, 7E-5) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFastErf() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(-5.f, 5.f); + const auto actual_v = FastErff(d, Set(d, f)); + const float actual = GetLane(actual_v); + const float abs_err = std::abs(std::erf(f) - actual); + EXPECT_LT(abs_err, 7E-4) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestCubeRoot() { + const HWY_FULL(float) d; + for (uint64_t x5 = 0; x5 < 2000000; x5++) { + const float x = x5 * 1E-5f; + const float expected = cbrtf(x); + HWY_ALIGN float approx[MaxLanes(d)]; + Store(CubeRootAndAdd(Set(d, x), Zero(d)), d, approx); + + // All lanes are same + for (size_t i = 1; i < Lanes(d); ++i) { + EXPECT_NEAR(approx[0], approx[i], 5E-7f); + } + EXPECT_NEAR(approx[0], expected, 8E-7f); + } +} + +HWY_NOINLINE void TestFastSRGB() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(0.0f, 1.0f); + const auto actual_v = FastLinearToSRGB(d, Set(d, f)); + const float actual = GetLane(actual_v); + const float expected = GetLane(TF_SRGB().EncodedFromDisplay(d, Set(d, f))); + const float abs_err = std::abs(expected - actual); + EXPECT_LT(abs_err, 1.2E-4) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFastPQEFD() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(0.0f, 1.0f); + const float actual = GetLane(TF_PQ().EncodedFromDisplay(d, Set(d, f))); + const float expected = TF_PQ().EncodedFromDisplay(f); + const float abs_err = std::abs(expected - actual); + EXPECT_LT(abs_err, 7e-7) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFastHLGEFD() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(0.0f, 1.0f); + const float actual = GetLane(TF_HLG().EncodedFromDisplay(d, Set(d, f))); + const float expected = TF_HLG().EncodedFromDisplay(f); + const float abs_err = std::abs(expected - actual); + EXPECT_LT(abs_err, 5e-7) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFast709EFD() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(0.0f, 1.0f); + const float actual = GetLane(TF_709().EncodedFromDisplay(d, Set(d, f))); + const float expected = TF_709().EncodedFromDisplay(f); + const float abs_err = std::abs(expected - actual); + EXPECT_LT(abs_err, 2e-6) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFastPQDFE() { + constexpr size_t kNumTrials = 1 << 23; + Rng rng(1); + float max_abs_err = 0; + HWY_FULL(float) d; + for (size_t i = 0; i < kNumTrials; i++) { + const float f = rng.UniformF(0.0f, 1.0f); + const float actual = GetLane(TF_PQ().DisplayFromEncoded(d, Set(d, f))); + const float expected = TF_PQ().DisplayFromEncoded(f); + const float abs_err = std::abs(expected - actual); + EXPECT_LT(abs_err, 3E-6) << "f = " << f; + max_abs_err = std::max(max_abs_err, abs_err); + } + printf("max abs err %e\n", static_cast(max_abs_err)); +} + +HWY_NOINLINE void TestFastXYB() { + if (!HasFastXYBTosRGB8()) return; + ImageMetadata metadata; + ImageBundle ib(&metadata); + int scaling = 1; + int n = 256 * scaling; + float inv_scaling = 1.0f / scaling; + int kChunk = 32; + // The image is divided in chunks to reduce total memory usage. + for (int cr = 0; cr < n; cr += kChunk) { + for (int cg = 0; cg < n; cg += kChunk) { + for (int cb = 0; cb < n; cb += kChunk) { + Image3F chunk(kChunk * kChunk, kChunk); + for (int ir = 0; ir < kChunk; ir++) { + for (int ig = 0; ig < kChunk; ig++) { + for (int ib = 0; ib < kChunk; ib++) { + float r = (cr + ir) * inv_scaling; + float g = (cg + ig) * inv_scaling; + float b = (cb + ib) * inv_scaling; + chunk.PlaneRow(0, ir)[ig * kChunk + ib] = r * (1.0f / 255); + chunk.PlaneRow(1, ir)[ig * kChunk + ib] = g * (1.0f / 255); + chunk.PlaneRow(2, ir)[ig * kChunk + ib] = b * (1.0f / 255); + } + } + } + ib.SetFromImage(std::move(chunk), ColorEncoding::SRGB()); + Image3F xyb(kChunk * kChunk, kChunk); + std::vector roundtrip(kChunk * kChunk * kChunk * 3); + ToXYB(ib, nullptr, &xyb, GetJxlCms()); + for (int y = 0; y < kChunk; y++) { + const float* xyba[4] = {xyb.PlaneRow(0, y), xyb.PlaneRow(1, y), + xyb.PlaneRow(2, y), nullptr}; + jxl::HWY_NAMESPACE::FastXYBTosRGB8( + xyba, roundtrip.data() + 3 * xyb.xsize() * y, false, xyb.xsize()); + } + for (int ir = 0; ir < kChunk; ir++) { + for (int ig = 0; ig < kChunk; ig++) { + for (int ib = 0; ib < kChunk; ib++) { + float r = (cr + ir) * inv_scaling; + float g = (cg + ig) * inv_scaling; + float b = (cb + ib) * inv_scaling; + size_t idx = ir * kChunk * kChunk + ig * kChunk + ib; + int rr = roundtrip[3 * idx]; + int rg = roundtrip[3 * idx + 1]; + int rb = roundtrip[3 * idx + 2]; + EXPECT_LT(abs(r - rr), 2) << "expected " << r << " got " << rr; + EXPECT_LT(abs(g - rg), 2) << "expected " << g << " got " << rg; + EXPECT_LT(abs(b - rb), 2) << "expected " << b << " got " << rb; + } + } + } + } + } + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class FastMathTargetTest : public hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(FastMathTargetTest); + +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastLog2); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow2); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPow); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastCos); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastErf); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestCubeRoot); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastSRGB); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQDFE); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastPQEFD); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastHLGEFD); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFast709EFD); +HWY_EXPORT_AND_TEST_P(FastMathTargetTest, TestFastXYB); + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/field_encodings.h b/third-party/libjxl/libjxl/lib/jxl/field_encodings.h new file mode 100644 index 0000000000..613e8fad33 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/field_encodings.h @@ -0,0 +1,134 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_FIELD_ENCODINGS_H_ +#define LIB_JXL_FIELD_ENCODINGS_H_ + +// Constants needed to encode/decode fields; avoids including the full fields.h. + +#include +#include + +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Macro to define the Fields' derived class Name when compiling with debug +// names. +#if JXL_IS_DEBUG_BUILD +#define JXL_FIELDS_NAME(X) \ + const char* Name() const override { return #X; } +#else +#define JXL_FIELDS_NAME(X) +#endif // JXL_IS_DEBUG_BUILD + +class Visitor; +class Fields { + public: + virtual ~Fields() = default; +#if JXL_IS_DEBUG_BUILD + virtual const char* Name() const = 0; +#endif // JXL_IS_DEBUG_BUILD + virtual Status VisitFields(Visitor* JXL_RESTRICT visitor) = 0; +}; + +// Distribution of U32 values for one particular selector. Represents either a +// power of two-sized range, or a single value. A separate type ensures this is +// only passed to the U32Enc ctor. +struct U32Distr { + // No need to validate - all `d` are legitimate. + constexpr explicit U32Distr(uint32_t d) : d(d) {} + + static constexpr uint32_t kDirect = 0x80000000u; + + constexpr bool IsDirect() const { return (d & kDirect) != 0; } + + // Only call if IsDirect(). + constexpr uint32_t Direct() const { return d & (kDirect - 1); } + + // Only call if !IsDirect(). + constexpr size_t ExtraBits() const { return (d & 0x1F) + 1; } + uint32_t Offset() const { return (d >> 5) & 0x3FFFFFF; } + + uint32_t d; +}; + +// A direct-coded 31-bit value occupying 2 bits in the bitstream. +constexpr U32Distr Val(uint32_t value) { + return U32Distr(value | U32Distr::kDirect); +} + +// Value - `offset` will be signaled in `bits` extra bits. +constexpr U32Distr BitsOffset(uint32_t bits, uint32_t offset) { + return U32Distr(((bits - 1) & 0x1F) + ((offset & 0x3FFFFFF) << 5)); +} + +// Value will be signaled in `bits` extra bits. +constexpr U32Distr Bits(uint32_t bits) { return BitsOffset(bits, 0); } + +// See U32Coder documentation in fields.h. +class U32Enc { + public: + constexpr U32Enc(const U32Distr d0, const U32Distr d1, const U32Distr d2, + const U32Distr d3) + : d_{d0, d1, d2, d3} {} + + // Returns the U32Distr at `selector` = 0..3, least-significant first. + U32Distr GetDistr(const uint32_t selector) const { + JXL_ASSERT(selector < 4); + return d_[selector]; + } + + private: + U32Distr d_[4]; +}; + +// Returns bit with the given `index` (0 = least significant). +template +static inline constexpr uint64_t MakeBit(T index) { + return 1ULL << static_cast(index); +} + +// Returns vector of all possible values of an Enum type. Relies on each Enum +// providing an overload of EnumBits() that returns a bit array of its values, +// which implies values must be in [0, 64). +template +std::vector Values() { + uint64_t bits = EnumBits(Enum()); + + std::vector values; + values.reserve(hwy::PopCount(bits)); + + // For each 1-bit in bits: add its index as value + while (bits != 0) { + const int index = Num0BitsBelowLS1Bit_Nonzero(bits); + values.push_back(static_cast(index)); + bits &= bits - 1; // clear least-significant bit + } + return values; +} + +// Returns true if value is one of Values(). +template +Status EnumValid(const Enum value) { + if (static_cast(value) >= 64) { + return JXL_FAILURE("Value %u too large for %s\n", + static_cast(value), EnumName(Enum())); + } + const uint64_t bit = MakeBit(value); + if ((EnumBits(Enum()) & bit) == 0) { + return JXL_FAILURE("Invalid value %u for %s\n", + static_cast(value), EnumName(Enum())); + } + return true; +} + +} // namespace jxl + +#endif // LIB_JXL_FIELD_ENCODINGS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/fields.cc b/third-party/libjxl/libjxl/lib/jxl/fields.cc new file mode 100644 index 0000000000..47a75638c2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fields.cc @@ -0,0 +1,656 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/fields.h" + +#include + +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/printf_macros.h" + +namespace jxl { + +namespace { + +using ::jxl::fields_internal::VisitorBase; + +struct InitVisitor : public VisitorBase { + Status Bits(const size_t /*unused*/, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status U32(const U32Enc /*unused*/, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status U64(const uint64_t default_value, + uint64_t* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status Bool(bool default_value, bool* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status F16(const float default_value, float* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + // Always visit conditional fields to ensure they are initialized. + Status Conditional(bool /*condition*/) override { return true; } + + Status AllDefault(const Fields& /*fields*/, + bool* JXL_RESTRICT all_default) override { + // Just initialize this field and don't skip initializing others. + JXL_RETURN_IF_ERROR(Bool(true, all_default)); + return false; + } + + Status VisitNested(Fields* /*fields*/) override { + // Avoid re-initializing nested bundles (their ctors already called + // Bundle::Init for their fields). + return true; + } +}; + +// Similar to InitVisitor, but also initializes nested fields. +struct SetDefaultVisitor : public VisitorBase { + Status Bits(const size_t /*unused*/, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status U32(const U32Enc /*unused*/, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status U64(const uint64_t default_value, + uint64_t* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status Bool(bool default_value, bool* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + Status F16(const float default_value, float* JXL_RESTRICT value) override { + *value = default_value; + return true; + } + + // Always visit conditional fields to ensure they are initialized. + Status Conditional(bool /*condition*/) override { return true; } + + Status AllDefault(const Fields& /*fields*/, + bool* JXL_RESTRICT all_default) override { + // Just initialize this field and don't skip initializing others. + JXL_RETURN_IF_ERROR(Bool(true, all_default)); + return false; + } +}; + +class AllDefaultVisitor : public VisitorBase { + public: + explicit AllDefaultVisitor() : VisitorBase() {} + + Status Bits(const size_t bits, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) override { + all_default_ &= *value == default_value; + return true; + } + + Status U32(const U32Enc /*unused*/, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) override { + all_default_ &= *value == default_value; + return true; + } + + Status U64(const uint64_t default_value, + uint64_t* JXL_RESTRICT value) override { + all_default_ &= *value == default_value; + return true; + } + + Status F16(const float default_value, float* JXL_RESTRICT value) override { + all_default_ &= std::abs(*value - default_value) < 1E-6f; + return true; + } + + Status AllDefault(const Fields& /*fields*/, + bool* JXL_RESTRICT /*all_default*/) override { + // Visit all fields so we can compute the actual all_default_ value. + return false; + } + + bool AllDefault() const { return all_default_; } + + private: + bool all_default_ = true; +}; + +class ReadVisitor : public VisitorBase { + public: + explicit ReadVisitor(BitReader* reader) : VisitorBase(), reader_(reader) {} + + Status Bits(const size_t bits, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT value) override { + *value = BitsCoder::Read(bits, reader_); + if (!reader_->AllReadsWithinBounds()) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for header"); + } + return true; + } + + Status U32(const U32Enc dist, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT value) override { + *value = U32Coder::Read(dist, reader_); + if (!reader_->AllReadsWithinBounds()) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for header"); + } + return true; + } + + Status U64(const uint64_t /*default_value*/, + uint64_t* JXL_RESTRICT value) override { + *value = U64Coder::Read(reader_); + if (!reader_->AllReadsWithinBounds()) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for header"); + } + return true; + } + + Status F16(const float /*default_value*/, + float* JXL_RESTRICT value) override { + ok_ &= F16Coder::Read(reader_, value); + if (!reader_->AllReadsWithinBounds()) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for header"); + } + return true; + } + + void SetDefault(Fields* fields) override { Bundle::SetDefault(fields); } + + bool IsReading() const override { return true; } + + // This never fails because visitors are expected to keep reading until + // EndExtensions, see comment there. + Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override { + JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions)); + if (*extensions == 0) return true; + + // For each nonzero bit, i.e. extension that is present: + for (uint64_t remaining_extensions = *extensions; remaining_extensions != 0; + remaining_extensions &= remaining_extensions - 1) { + const size_t idx_extension = + Num0BitsBelowLS1Bit_Nonzero(remaining_extensions); + // Read additional U64 (one per extension) indicating the number of bits + // (allows skipping individual extensions). + JXL_RETURN_IF_ERROR(U64(0, &extension_bits_[idx_extension])); + if (!SafeAdd(total_extension_bits_, extension_bits_[idx_extension], + total_extension_bits_)) { + return JXL_FAILURE("Extension bits overflowed, invalid codestream"); + } + } + // Used by EndExtensions to skip past any _remaining_ extensions. + pos_after_ext_size_ = reader_->TotalBitsConsumed(); + JXL_ASSERT(pos_after_ext_size_ != 0); + return true; + } + + Status EndExtensions() override { + JXL_QUIET_RETURN_IF_ERROR(VisitorBase::EndExtensions()); + // Happens if extensions == 0: don't read size, done. + if (pos_after_ext_size_ == 0) return true; + + // Not enough bytes as set by BeginExtensions or earlier. Do not return + // this as a JXL_FAILURE or false (which can also propagate to error + // through e.g. JXL_RETURN_IF_ERROR), since this may be used while + // silently checking whether there are enough bytes. If this case must be + // treated as an error, reader_>Close() will do this, just like is already + // done for non-extension fields. + if (!enough_bytes_) return true; + + // Skip new fields this (old?) decoder didn't know about, if any. + const size_t bits_read = reader_->TotalBitsConsumed(); + uint64_t end; + if (!SafeAdd(pos_after_ext_size_, total_extension_bits_, end)) { + return JXL_FAILURE("Invalid extension size, caused overflow"); + } + if (bits_read > end) { + return JXL_FAILURE("Read more extension bits than budgeted"); + } + const size_t remaining_bits = end - bits_read; + if (remaining_bits != 0) { + JXL_WARNING("Skipping %" PRIuS "-bit extension(s)", remaining_bits); + reader_->SkipBits(remaining_bits); + if (!reader_->AllReadsWithinBounds()) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for header"); + } + } + return true; + } + + Status OK() const { return ok_; } + + private: + // Whether any error other than not enough bytes occurred. + bool ok_ = true; + + // Whether there are enough input bytes to read from. + bool enough_bytes_ = true; + BitReader* const reader_; + // May be 0 even if the corresponding extension is present. + uint64_t extension_bits_[Bundle::kMaxExtensions] = {0}; + uint64_t total_extension_bits_ = 0; + size_t pos_after_ext_size_ = 0; // 0 iff extensions == 0. + + friend Status jxl::CheckHasEnoughBits(Visitor*, size_t); +}; + +class MaxBitsVisitor : public VisitorBase { + public: + Status Bits(const size_t bits, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT /*value*/) override { + max_bits_ += BitsCoder::MaxEncodedBits(bits); + return true; + } + + Status U32(const U32Enc enc, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT /*value*/) override { + max_bits_ += U32Coder::MaxEncodedBits(enc); + return true; + } + + Status U64(const uint64_t /*default_value*/, + uint64_t* JXL_RESTRICT /*value*/) override { + max_bits_ += U64Coder::MaxEncodedBits(); + return true; + } + + Status F16(const float /*default_value*/, + float* JXL_RESTRICT /*value*/) override { + max_bits_ += F16Coder::MaxEncodedBits(); + return true; + } + + Status AllDefault(const Fields& /*fields*/, + bool* JXL_RESTRICT all_default) override { + JXL_RETURN_IF_ERROR(Bool(true, all_default)); + return false; // For max bits, assume nothing is default + } + + // Always visit conditional fields to get a (loose) upper bound. + Status Conditional(bool /*condition*/) override { return true; } + + Status BeginExtensions(uint64_t* JXL_RESTRICT /*extensions*/) override { + // Skip - extensions are not included in "MaxBits" because their length + // is potentially unbounded. + return true; + } + + Status EndExtensions() override { return true; } + + size_t MaxBits() const { return max_bits_; } + + private: + size_t max_bits_ = 0; +}; + +class CanEncodeVisitor : public VisitorBase { + public: + explicit CanEncodeVisitor() : VisitorBase() {} + + Status Bits(const size_t bits, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT value) override { + size_t encoded_bits = 0; + ok_ &= BitsCoder::CanEncode(bits, *value, &encoded_bits); + encoded_bits_ += encoded_bits; + return true; + } + + Status U32(const U32Enc enc, const uint32_t /*default_value*/, + uint32_t* JXL_RESTRICT value) override { + size_t encoded_bits = 0; + ok_ &= U32Coder::CanEncode(enc, *value, &encoded_bits); + encoded_bits_ += encoded_bits; + return true; + } + + Status U64(const uint64_t /*default_value*/, + uint64_t* JXL_RESTRICT value) override { + size_t encoded_bits = 0; + ok_ &= U64Coder::CanEncode(*value, &encoded_bits); + encoded_bits_ += encoded_bits; + return true; + } + + Status F16(const float /*default_value*/, + float* JXL_RESTRICT value) override { + size_t encoded_bits = 0; + ok_ &= F16Coder::CanEncode(*value, &encoded_bits); + encoded_bits_ += encoded_bits; + return true; + } + + Status AllDefault(const Fields& fields, + bool* JXL_RESTRICT all_default) override { + *all_default = Bundle::AllDefault(fields); + JXL_RETURN_IF_ERROR(Bool(true, all_default)); + return *all_default; + } + + Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override { + JXL_QUIET_RETURN_IF_ERROR(VisitorBase::BeginExtensions(extensions)); + extensions_ = *extensions; + if (*extensions != 0) { + JXL_ASSERT(pos_after_ext_ == 0); + pos_after_ext_ = encoded_bits_; + JXL_ASSERT(pos_after_ext_ != 0); // visited "extensions" + } + return true; + } + // EndExtensions = default. + + Status GetSizes(size_t* JXL_RESTRICT extension_bits, + size_t* JXL_RESTRICT total_bits) { + JXL_RETURN_IF_ERROR(ok_); + *extension_bits = 0; + *total_bits = encoded_bits_; + // Only if extension field was nonzero will we encode their sizes. + if (pos_after_ext_ != 0) { + JXL_ASSERT(encoded_bits_ >= pos_after_ext_); + *extension_bits = encoded_bits_ - pos_after_ext_; + // Also need to encode *extension_bits and bill it to *total_bits. + size_t encoded_bits = 0; + ok_ &= U64Coder::CanEncode(*extension_bits, &encoded_bits); + *total_bits += encoded_bits; + + // TODO(janwas): support encoding individual extension sizes. We + // currently ascribe all bits to the first and send zeros for the + // others. + for (size_t i = 1; i < hwy::PopCount(extensions_); ++i) { + encoded_bits = 0; + ok_ &= U64Coder::CanEncode(0, &encoded_bits); + *total_bits += encoded_bits; + } + } + return true; + } + + private: + bool ok_ = true; + size_t encoded_bits_ = 0; + uint64_t extensions_ = 0; + // Snapshot of encoded_bits_ after visiting the extension field, but NOT + // including the hidden extension sizes. + uint64_t pos_after_ext_ = 0; +}; +} // namespace + +void Bundle::Init(Fields* fields) { + InitVisitor visitor; + if (!visitor.Visit(fields)) { + JXL_UNREACHABLE("Init should never fail"); + } +} +void Bundle::SetDefault(Fields* fields) { + SetDefaultVisitor visitor; + if (!visitor.Visit(fields)) { + JXL_UNREACHABLE("SetDefault should never fail"); + } +} +bool Bundle::AllDefault(const Fields& fields) { + AllDefaultVisitor visitor; + if (!visitor.VisitConst(fields)) { + JXL_UNREACHABLE("AllDefault should never fail"); + } + return visitor.AllDefault(); +} +size_t Bundle::MaxBits(const Fields& fields) { + MaxBitsVisitor visitor; +#if JXL_ENABLE_ASSERT + Status ret = +#else + (void) +#endif // JXL_ENABLE_ASSERT + visitor.VisitConst(fields); + JXL_ASSERT(ret); + return visitor.MaxBits(); +} +Status Bundle::CanEncode(const Fields& fields, size_t* extension_bits, + size_t* total_bits) { + CanEncodeVisitor visitor; + JXL_QUIET_RETURN_IF_ERROR(visitor.VisitConst(fields)); + JXL_QUIET_RETURN_IF_ERROR(visitor.GetSizes(extension_bits, total_bits)); + return true; +} +Status Bundle::Read(BitReader* reader, Fields* fields) { + ReadVisitor visitor(reader); + JXL_RETURN_IF_ERROR(visitor.Visit(fields)); + return visitor.OK(); +} +bool Bundle::CanRead(BitReader* reader, Fields* fields) { + ReadVisitor visitor(reader); + Status status = visitor.Visit(fields); + // We are only checking here whether there are enough bytes. We still return + // true for other errors because it means there are enough bytes to determine + // there's an error. Use Read() to determine which error it is. + return status.code() != StatusCode::kNotEnoughBytes; +} + +size_t BitsCoder::MaxEncodedBits(const size_t bits) { return bits; } + +Status BitsCoder::CanEncode(const size_t bits, const uint32_t value, + size_t* JXL_RESTRICT encoded_bits) { + *encoded_bits = bits; + if (value >= (1ULL << bits)) { + return JXL_FAILURE("Value %u too large for %" PRIu64 " bits", value, + static_cast(bits)); + } + return true; +} + +uint32_t BitsCoder::Read(const size_t bits, BitReader* JXL_RESTRICT reader) { + return reader->ReadBits(bits); +} + +size_t U32Coder::MaxEncodedBits(const U32Enc enc) { + size_t extra_bits = 0; + for (uint32_t selector = 0; selector < 4; ++selector) { + const U32Distr d = enc.GetDistr(selector); + if (d.IsDirect()) { + continue; + } else { + extra_bits = std::max(extra_bits, d.ExtraBits()); + } + } + return 2 + extra_bits; +} + +Status U32Coder::CanEncode(const U32Enc enc, const uint32_t value, + size_t* JXL_RESTRICT encoded_bits) { + uint32_t selector; + size_t total_bits; + const Status ok = ChooseSelector(enc, value, &selector, &total_bits); + *encoded_bits = ok ? total_bits : 0; + return ok; +} + +uint32_t U32Coder::Read(const U32Enc enc, BitReader* JXL_RESTRICT reader) { + const uint32_t selector = reader->ReadFixedBits<2>(); + const U32Distr d = enc.GetDistr(selector); + if (d.IsDirect()) { + return d.Direct(); + } else { + return reader->ReadBits(d.ExtraBits()) + d.Offset(); + } +} + +Status U32Coder::ChooseSelector(const U32Enc enc, const uint32_t value, + uint32_t* JXL_RESTRICT selector, + size_t* JXL_RESTRICT total_bits) { +#if JXL_ENABLE_ASSERT + const size_t bits_required = 32 - Num0BitsAboveMS1Bit(value); +#endif // JXL_ENABLE_ASSERT + JXL_ASSERT(bits_required <= 32); + + *selector = 0; + *total_bits = 0; + + // It is difficult to verify whether Dist32Byte are sorted, so check all + // selectors and keep the one with the fewest total_bits. + *total_bits = 64; // more than any valid encoding + for (uint32_t s = 0; s < 4; ++s) { + const U32Distr d = enc.GetDistr(s); + if (d.IsDirect()) { + if (d.Direct() == value) { + *selector = s; + *total_bits = 2; + return true; // Done, direct is always the best possible. + } + continue; + } + const size_t extra_bits = d.ExtraBits(); + const uint32_t offset = d.Offset(); + if (value < offset || value >= offset + (1ULL << extra_bits)) continue; + + // Better than prior encoding, remember it: + if (2 + extra_bits < *total_bits) { + *selector = s; + *total_bits = 2 + extra_bits; + } + } + + if (*total_bits == 64) { + return JXL_FAILURE("No feasible selector for %u", value); + } + + return true; +} + +uint64_t U64Coder::Read(BitReader* JXL_RESTRICT reader) { + uint64_t selector = reader->ReadFixedBits<2>(); + if (selector == 0) { + return 0; + } + if (selector == 1) { + return 1 + reader->ReadFixedBits<4>(); + } + if (selector == 2) { + return 17 + reader->ReadFixedBits<8>(); + } + + // selector 3, varint, groups have first 12, then 8, and last 4 bits. + uint64_t result = reader->ReadFixedBits<12>(); + + uint64_t shift = 12; + while (reader->ReadFixedBits<1>()) { + if (shift == 60) { + result |= static_cast(reader->ReadFixedBits<4>()) << shift; + break; + } + result |= static_cast(reader->ReadFixedBits<8>()) << shift; + shift += 8; + } + + return result; +} + +// Can always encode, but useful because it also returns bit size. +Status U64Coder::CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits) { + if (value == 0) { + *encoded_bits = 2; // 2 selector bits + } else if (value <= 16) { + *encoded_bits = 2 + 4; // 2 selector bits + 4 payload bits + } else if (value <= 272) { + *encoded_bits = 2 + 8; // 2 selector bits + 8 payload bits + } else { + *encoded_bits = 2 + 12; // 2 selector bits + 12 payload bits + value >>= 12; + int shift = 12; + while (value > 0 && shift < 60) { + *encoded_bits += 1 + 8; // 1 continuation bit + 8 payload bits + value >>= 8; + shift += 8; + } + if (value > 0) { + // This only could happen if shift == N - 4. + *encoded_bits += 1 + 4; // 1 continuation bit + 4 payload bits + } else { + *encoded_bits += 1; // 1 stop bit + } + } + + return true; +} + +Status F16Coder::Read(BitReader* JXL_RESTRICT reader, + float* JXL_RESTRICT value) { + const uint32_t bits16 = reader->ReadFixedBits<16>(); + const uint32_t sign = bits16 >> 15; + const uint32_t biased_exp = (bits16 >> 10) & 0x1F; + const uint32_t mantissa = bits16 & 0x3FF; + + if (JXL_UNLIKELY(biased_exp == 31)) { + return JXL_FAILURE("F16 infinity or NaN are not supported"); + } + + // Subnormal or zero + if (JXL_UNLIKELY(biased_exp == 0)) { + *value = (1.0f / 16384) * (mantissa * (1.0f / 1024)); + if (sign) *value = -*value; + return true; + } + + // Normalized: convert the representation directly (faster than ldexp/tables). + const uint32_t biased_exp32 = biased_exp + (127 - 15); + const uint32_t mantissa32 = mantissa << (23 - 10); + const uint32_t bits32 = (sign << 31) | (biased_exp32 << 23) | mantissa32; + memcpy(value, &bits32, sizeof(bits32)); + return true; +} + +Status F16Coder::CanEncode(float value, size_t* JXL_RESTRICT encoded_bits) { + *encoded_bits = MaxEncodedBits(); + if (std::isnan(value) || std::isinf(value)) { + return JXL_FAILURE("Should not attempt to store NaN and infinity"); + } + return std::abs(value) <= 65504.0f; +} + +Status CheckHasEnoughBits(Visitor* visitor, size_t bits) { + if (!visitor->IsReading()) return false; + ReadVisitor* rv = static_cast(visitor); + size_t have_bits = rv->reader_->TotalBytes() * kBitsPerByte; + size_t want_bits = bits + rv->reader_->TotalBitsConsumed(); + if (have_bits < want_bits) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for header"); + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/fields.h b/third-party/libjxl/libjxl/lib/jxl/fields.h new file mode 100644 index 0000000000..60fbbfcba0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fields.h @@ -0,0 +1,379 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_FIELDS_H_ +#define LIB_JXL_FIELDS_H_ + +// Forward/backward-compatible 'bundles' with auto-serialized 'fields'. + +#include +#include +#include +#include +#include +#include + +#include +#include // abs +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/field_encodings.h" + +namespace jxl { + +struct AuxOut; +struct BitWriter; + +// Integer coders: BitsCoder (raw), U32Coder (table), U64Coder (varint). + +// Reads/writes a given (fixed) number of bits <= 32. +namespace BitsCoder { +size_t MaxEncodedBits(size_t bits); + +Status CanEncode(size_t bits, uint32_t value, + size_t* JXL_RESTRICT encoded_bits); + +uint32_t Read(size_t bits, BitReader* JXL_RESTRICT reader); + +// Returns false if the value is too large to encode. +Status Write(size_t bits, uint32_t value, BitWriter* JXL_RESTRICT writer); +} // namespace BitsCoder + +// Encodes u32 using a lookup table and/or extra bits, governed by a per-field +// encoding `enc` which consists of four distributions `d` chosen via a 2-bit +// selector (least significant = 0). Each d may have two modes: +// - direct: if d.IsDirect(), the value is d.Direct(); +// - offset: the value is derived from d.ExtraBits() extra bits plus d.Offset(); +// This encoding is denser than Exp-Golomb or Gamma codes when both small and +// large values occur. +// +// Examples: +// Direct: U32Enc(Val(8), Val(16), Val(32), Bits(6)), value 32 => 10b. +// Offset: U32Enc(Val(0), BitsOffset(1, 1), BitsOffset(2, 3), BitsOffset(8, 8)) +// defines the following prefix code: +// 00 -> 0 +// 01x -> 1..2 +// 10xx -> 3..7 +// 11xxxxxxxx -> 8..263 +namespace U32Coder { +size_t MaxEncodedBits(U32Enc enc); +Status CanEncode(U32Enc enc, uint32_t value, size_t* JXL_RESTRICT encoded_bits); +uint32_t Read(U32Enc enc, BitReader* JXL_RESTRICT reader); + +// Returns false if the value is too large to encode. +Status Write(U32Enc enc, uint32_t value, BitWriter* JXL_RESTRICT writer); + +// "private" +Status ChooseSelector(U32Enc enc, uint32_t value, + uint32_t* JXL_RESTRICT selector, + size_t* JXL_RESTRICT total_bits); +} // namespace U32Coder + +// Encodes 64-bit unsigned integers with a fixed distribution, taking 2 bits +// to encode 0, 6 bits to encode 1 to 16, 10 bits to encode 17 to 272, 15 bits +// to encode up to 4095, and on the order of log2(value) * 1.125 bits for +// larger values. +namespace U64Coder { +constexpr size_t MaxEncodedBits() { return 2 + 12 + 6 * (8 + 1) + (4 + 1); } + +uint64_t Read(BitReader* JXL_RESTRICT reader); + +// Returns false if the value is too large to encode. +Status Write(uint64_t value, BitWriter* JXL_RESTRICT writer); + +// Can always encode, but useful because it also returns bit size. +Status CanEncode(uint64_t value, size_t* JXL_RESTRICT encoded_bits); +} // namespace U64Coder + +// IEEE 754 half-precision (binary16). Refuses to read/write NaN/Inf. +namespace F16Coder { +constexpr size_t MaxEncodedBits() { return 16; } + +// Returns false if the bit representation is NaN or infinity +Status Read(BitReader* JXL_RESTRICT reader, float* JXL_RESTRICT value); + +// Returns false if the value is too large to encode. +Status Write(float value, BitWriter* JXL_RESTRICT writer); +Status CanEncode(float value, size_t* JXL_RESTRICT encoded_bits); +} // namespace F16Coder + +// A "bundle" is a forward- and backward compatible collection of fields. +// They are used for SizeHeader/FrameHeader/GroupHeader. Bundles can be +// extended by appending(!) fields. Optional fields may be omitted from the +// bitstream by conditionally visiting them. When reading new bitstreams with +// old code, we skip unknown fields at the end of the bundle. This requires +// storing the amount of extra appended bits, and that fields are visited in +// chronological order of being added to the format, because old decoders +// cannot skip some future fields and resume reading old fields. Similarly, +// new readers query bits in an "extensions" field to skip (groups of) fields +// not present in old bitstreams. Note that each bundle must include an +// "extensions" field prior to freezing the format, otherwise it cannot be +// extended. +// +// To ensure interoperability, there will be no opaque fields. +// +// HOWTO: +// - basic usage: define a struct with member variables ("fields") and a +// VisitFields(v) member function that calls v->U32/Bool etc. for each +// field, specifying their default values. The ctor must call +// Bundle::Init(this). +// +// - print a trace of visitors: ensure each bundle has a static Name() member +// function, and change Bundle::Print* to return true. +// +// - optional fields: in VisitFields, add if (v->Conditional(your_condition)) +// { v->Bool(default, &field); }. This prevents reading/writing field +// if !your_condition, which is typically computed from a prior field. +// WARNING: to ensure all fields are initialized, do not add an else branch; +// instead add another if (v->Conditional(!your_condition)). +// +// - repeated fields: for dynamic sizes, use e.g. std::vector and in +// VisitFields, if (v->IsReading()) field.resize(size) before accessing field. +// For static or bounded sizes, use an array or std::array. In all cases, +// simply visit each array element as if it were a normal field. +// +// - nested bundles: add a bundle as a normal field and in VisitFields call +// JXL_RETURN_IF_ERROR(v->VisitNested(&nested)); +// +// - allow future extensions: define a "uint64_t extensions" field and call +// v->BeginExtensions(&extensions) after visiting all non-extension fields, +// and `return v->EndExtensions();` after the last extension field. +// +// - encode an entire bundle in one bit if ALL its fields equal their default +// values: add a "mutable bool all_default" field and as the first visitor: +// if (v->AllDefault(*this, &all_default)) { +// // Overwrite all serialized fields, but not any nonserialized_*. +// v->SetDefault(this); +// return true; +// } +// Note: if extensions are present, AllDefault() == false. + +namespace Bundle { +constexpr size_t kMaxExtensions = 64; // bits in u64 + +// Initializes fields to the default values. It is not recursive to nested +// fields, this function is intended to be called in the constructors so +// each nested field will already Init itself. +void Init(Fields* JXL_RESTRICT fields); + +// Similar to Init, but recursive to nested fields. +void SetDefault(Fields* JXL_RESTRICT fields); + +// Returns whether ALL fields (including `extensions`, if present) are equal +// to their default value. +bool AllDefault(const Fields& fields); + +// Returns max number of bits required to encode a T. +size_t MaxBits(const Fields& fields); + +// Returns whether a header's fields can all be encoded, i.e. they have a +// valid representation. If so, "*total_bits" is the exact number of bits +// required. Called by Write. +Status CanEncode(const Fields& fields, size_t* JXL_RESTRICT extension_bits, + size_t* JXL_RESTRICT total_bits); + +Status Read(BitReader* reader, Fields* JXL_RESTRICT fields); + +// Returns whether enough bits are available to fully read this bundle using +// Read. Also returns true in case of a codestream error (other than not being +// large enough): that means enough bits are available to determine there's an +// error, use Read to get such error status. +// NOTE: this advances the BitReader, a different one pointing back at the +// original bit position in the codestream must be created to use Read after +// this. +bool CanRead(BitReader* reader, Fields* JXL_RESTRICT fields); + +Status Write(const Fields& fields, BitWriter* JXL_RESTRICT writer, size_t layer, + AuxOut* aux_out); +} // namespace Bundle + +// Different subclasses of Visitor are passed to implementations of Fields +// throughout their lifetime. Templates used to be used for this but dynamic +// polymorphism produces more compact executables than template reification did. +class Visitor { + public: + virtual ~Visitor() = default; + virtual Status Visit(Fields* fields) = 0; + + virtual Status Bool(bool default_value, bool* JXL_RESTRICT value) = 0; + virtual Status U32(U32Enc, uint32_t, uint32_t*) = 0; + + // Helper to construct U32Enc from U32Distr. + Status U32(const U32Distr d0, const U32Distr d1, const U32Distr d2, + const U32Distr d3, const uint32_t default_value, + uint32_t* JXL_RESTRICT value) { + return U32(U32Enc(d0, d1, d2, d3), default_value, value); + } + + template + Status Enum(const EnumT default_value, EnumT* JXL_RESTRICT value) { + uint32_t u32 = static_cast(*value); + // 00 -> 0 + // 01 -> 1 + // 10xxxx -> 2..17 + // 11yyyyyy -> 18..81 + JXL_RETURN_IF_ERROR(U32(Val(0), Val(1), BitsOffset(4, 2), BitsOffset(6, 18), + static_cast(default_value), &u32)); + *value = static_cast(u32); + return EnumValid(*value); + } + + virtual Status Bits(size_t bits, uint32_t default_value, + uint32_t* JXL_RESTRICT value) = 0; + virtual Status U64(uint64_t default_value, uint64_t* JXL_RESTRICT value) = 0; + virtual Status F16(float default_value, float* JXL_RESTRICT value) = 0; + + // Returns whether VisitFields should visit some subsequent fields. + // "condition" is typically from prior fields, e.g. flags. + // Overridden by InitVisitor and MaxBitsVisitor. + virtual Status Conditional(bool condition) { return condition; } + + // Overridden by InitVisitor, AllDefaultVisitor and CanEncodeVisitor. + virtual Status AllDefault(const Fields& /*fields*/, + bool* JXL_RESTRICT all_default) { + JXL_RETURN_IF_ERROR(Bool(true, all_default)); + return *all_default; + } + + virtual void SetDefault(Fields* /*fields*/) { + // Do nothing by default, this is overridden by ReadVisitor. + } + + // Returns the result of visiting a nested Bundle. + // Overridden by InitVisitor. + virtual Status VisitNested(Fields* fields) { return Visit(fields); } + + // Overridden by ReadVisitor. Enables dynamically-sized fields. + virtual bool IsReading() const { return false; } + + virtual Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) = 0; + virtual Status EndExtensions() = 0; +}; + +namespace fields_internal { +// A bundle can be in one of three states concerning extensions: not-begun, +// active, ended. Bundles may be nested, so we need a stack of states. +class ExtensionStates { + public: + void Push() { + // Initial state = not-begun. + begun_ <<= 1; + ended_ <<= 1; + } + + // Clears current state; caller must check IsEnded beforehand. + void Pop() { + begun_ >>= 1; + ended_ >>= 1; + } + + // Returns true if state == active || state == ended. + Status IsBegun() const { return (begun_ & 1) != 0; } + // Returns true if state != not-begun && state != active. + Status IsEnded() const { return (ended_ & 1) != 0; } + + void Begin() { + JXL_ASSERT(!IsBegun()); + JXL_ASSERT(!IsEnded()); + begun_ += 1; + } + + void End() { + JXL_ASSERT(IsBegun()); + JXL_ASSERT(!IsEnded()); + ended_ += 1; + } + + private: + // Current state := least-significant bit of begun_ and ended_. + uint64_t begun_ = 0; + uint64_t ended_ = 0; +}; + +// Visitors generate Init/AllDefault/Read/Write logic for all fields. Each +// bundle's VisitFields member function calls visitor->U32 etc. We do not +// overload operator() because a function name is easier to search for. + +class VisitorBase : public Visitor { + public: + explicit VisitorBase() {} + ~VisitorBase() override { JXL_ASSERT(depth_ == 0); } + + // This is the only call site of Fields::VisitFields. + // Ensures EndExtensions was called. + Status Visit(Fields* fields) override { + depth_ += 1; + JXL_ASSERT(depth_ <= Bundle::kMaxExtensions); + extension_states_.Push(); + + const Status ok = fields->VisitFields(this); + + if (ok) { + // If VisitFields called BeginExtensions, must also call + // EndExtensions. + JXL_ASSERT(!extension_states_.IsBegun() || extension_states_.IsEnded()); + } else { + // Failed, undefined state: don't care whether EndExtensions was + // called. + } + + extension_states_.Pop(); + JXL_ASSERT(depth_ != 0); + depth_ -= 1; + + return ok; + } + + // For visitors accepting a const Visitor, need to const-cast so we can call + // the non-const Visitor::VisitFields. NOTE: C is not modified except the + // `all_default` field by CanEncodeVisitor. + Status VisitConst(const Fields& t) { return Visit(const_cast(&t)); } + + // Derived types (overridden by InitVisitor because it is unsafe to read + // from *value there) + + Status Bool(bool default_value, bool* JXL_RESTRICT value) override { + uint32_t bits = *value ? 1 : 0; + JXL_RETURN_IF_ERROR(Bits(1, static_cast(default_value), &bits)); + JXL_DASSERT(bits <= 1); + *value = bits == 1; + return true; + } + + // Overridden by ReadVisitor and WriteVisitor. + // Called before any conditional visit based on "extensions". + // Overridden by ReadVisitor, CanEncodeVisitor and WriteVisitor. + Status BeginExtensions(uint64_t* JXL_RESTRICT extensions) override { + JXL_RETURN_IF_ERROR(U64(0, extensions)); + + extension_states_.Begin(); + return true; + } + + // Called after all extension fields (if any). Although non-extension + // fields could be visited afterward, we prefer the convention that + // extension fields are always the last to be visited. Overridden by + // ReadVisitor. + Status EndExtensions() override { + extension_states_.End(); + return true; + } + + private: + size_t depth_ = 0; // to check nesting + ExtensionStates extension_states_; +}; +} // namespace fields_internal + +Status CheckHasEnoughBits(Visitor* visitor, size_t bits); + +} // namespace jxl + +#endif // LIB_JXL_FIELDS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/fields_test.cc b/third-party/libjxl/libjxl/lib/jxl/fields_test.cc new file mode 100644 index 0000000000..cf54c780ea --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/fields_test.cc @@ -0,0 +1,429 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/fields.h" + +#include +#include + +#include +#include + +#include "lib/jxl/base/span.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +// Ensures `value` round-trips and in exactly `expected_bits_written`. +void TestU32Coder(const uint32_t value, const size_t expected_bits_written) { + const U32Enc enc(Val(0), Bits(4), Val(0x7FFFFFFF), Bits(32)); + + BitWriter writer; + BitWriter::Allotment allotment( + &writer, RoundUpBitsToByteMultiple(U32Coder::MaxEncodedBits(enc))); + + size_t precheck_pos; + EXPECT_TRUE(U32Coder::CanEncode(enc, value, &precheck_pos)); + EXPECT_EQ(expected_bits_written, precheck_pos); + + EXPECT_TRUE(U32Coder::Write(enc, value, &writer)); + EXPECT_EQ(expected_bits_written, writer.BitsWritten()); + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + + BitReader reader(writer.GetSpan()); + const uint32_t decoded_value = U32Coder::Read(enc, &reader); + EXPECT_EQ(value, decoded_value); + EXPECT_TRUE(reader.Close()); +} + +TEST(FieldsTest, U32CoderTest) { + TestU32Coder(0, 2); + TestU32Coder(1, 6); + TestU32Coder(15, 6); + TestU32Coder(0x7FFFFFFF, 2); + TestU32Coder(128, 34); + TestU32Coder(0x7FFFFFFEu, 34); + TestU32Coder(0x80000000u, 34); + TestU32Coder(0xFFFFFFFFu, 34); +} + +void TestU64Coder(const uint64_t value, const size_t expected_bits_written) { + BitWriter writer; + BitWriter::Allotment allotment( + &writer, RoundUpBitsToByteMultiple(U64Coder::MaxEncodedBits())); + + size_t precheck_pos; + EXPECT_TRUE(U64Coder::CanEncode(value, &precheck_pos)); + EXPECT_EQ(expected_bits_written, precheck_pos); + + EXPECT_TRUE(U64Coder::Write(value, &writer)); + EXPECT_EQ(expected_bits_written, writer.BitsWritten()); + + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + + BitReader reader(writer.GetSpan()); + const uint64_t decoded_value = U64Coder::Read(&reader); + EXPECT_EQ(value, decoded_value); + EXPECT_TRUE(reader.Close()); +} + +TEST(FieldsTest, U64CoderTest) { + // Values that should take 2 bits (selector 00): 0 + TestU64Coder(0, 2); + + // Values that should take 6 bits (2 for selector, 4 for value): 1..16 + TestU64Coder(1, 6); + TestU64Coder(2, 6); + TestU64Coder(8, 6); + TestU64Coder(15, 6); + TestU64Coder(16, 6); + + // Values that should take 10 bits (2 for selector, 8 for value): 17..272 + TestU64Coder(17, 10); + TestU64Coder(18, 10); + TestU64Coder(100, 10); + TestU64Coder(271, 10); + TestU64Coder(272, 10); + + // Values that should take 15 bits (2 for selector, 12 for value, 1 for varint + // end): (0)..273..4095 + TestU64Coder(273, 15); + TestU64Coder(274, 15); + TestU64Coder(1000, 15); + TestU64Coder(4094, 15); + TestU64Coder(4095, 15); + + // Take 24 bits (of which 20 actual value): (0)..4096..1048575 + TestU64Coder(4096, 24); + TestU64Coder(4097, 24); + TestU64Coder(10000, 24); + TestU64Coder(1048574, 24); + TestU64Coder(1048575, 24); + + // Take 33 bits (of which 28 actual value): (0)..1048576..268435455 + TestU64Coder(1048576, 33); + TestU64Coder(1048577, 33); + TestU64Coder(10000000, 33); + TestU64Coder(268435454, 33); + TestU64Coder(268435455, 33); + + // Take 42 bits (of which 36 actual value): (0)..268435456..68719476735 + TestU64Coder(268435456ull, 42); + TestU64Coder(268435457ull, 42); + TestU64Coder(1000000000ull, 42); + TestU64Coder(68719476734ull, 42); + TestU64Coder(68719476735ull, 42); + + // Take 51 bits (of which 44 actual value): (0)..68719476736..17592186044415 + TestU64Coder(68719476736ull, 51); + TestU64Coder(68719476737ull, 51); + TestU64Coder(1000000000000ull, 51); + TestU64Coder(17592186044414ull, 51); + TestU64Coder(17592186044415ull, 51); + + // Take 60 bits (of which 52 actual value): + // (0)..17592186044416..4503599627370495 + TestU64Coder(17592186044416ull, 60); + TestU64Coder(17592186044417ull, 60); + TestU64Coder(100000000000000ull, 60); + TestU64Coder(4503599627370494ull, 60); + TestU64Coder(4503599627370495ull, 60); + + // Take 69 bits (of which 60 actual value): + // (0)..4503599627370496..1152921504606846975 + TestU64Coder(4503599627370496ull, 69); + TestU64Coder(4503599627370497ull, 69); + TestU64Coder(10000000000000000ull, 69); + TestU64Coder(1152921504606846974ull, 69); + TestU64Coder(1152921504606846975ull, 69); + + // Take 73 bits (of which 64 actual value): + // (0)..1152921504606846976..18446744073709551615 + TestU64Coder(1152921504606846976ull, 73); + TestU64Coder(1152921504606846977ull, 73); + TestU64Coder(10000000000000000000ull, 73); + TestU64Coder(18446744073709551614ull, 73); + TestU64Coder(18446744073709551615ull, 73); +} + +Status TestF16Coder(const float value) { + size_t max_encoded_bits; + // It is not a fatal error if it can't be encoded. + if (!F16Coder::CanEncode(value, &max_encoded_bits)) return false; + EXPECT_EQ(F16Coder::MaxEncodedBits(), max_encoded_bits); + + BitWriter writer; + BitWriter::Allotment allotment(&writer, + RoundUpBitsToByteMultiple(max_encoded_bits)); + + EXPECT_TRUE(F16Coder::Write(value, &writer)); + EXPECT_EQ(F16Coder::MaxEncodedBits(), writer.BitsWritten()); + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, 0, nullptr); + + BitReader reader(writer.GetSpan()); + float decoded_value; + EXPECT_TRUE(F16Coder::Read(&reader, &decoded_value)); + // All values we test can be represented exactly. + EXPECT_EQ(value, decoded_value); + EXPECT_TRUE(reader.Close()); + return true; +} + +TEST(FieldsTest, F16CoderTest) { + for (float sign : {-1.0f, 1.0f}) { + // (anything less than 1E-3 are subnormals) + for (float mag : {0.0f, 0.5f, 1.0f, 2.0f, 2.5f, 16.015625f, 1.0f / 4096, + 1.0f / 16384, 65504.0f}) { + EXPECT_TRUE(TestF16Coder(sign * mag)); + } + } + + // Out of range + EXPECT_FALSE(TestF16Coder(65504.01f)); + EXPECT_FALSE(TestF16Coder(-65505.0f)); +} + +// Ensures Read(Write()) returns the same fields. +TEST(FieldsTest, TestRoundtripSize) { + for (int i = 0; i < 8; i++) { + SizeHeader size; + ASSERT_TRUE(size.Set(123 + 77 * i, 7 + i)); + + size_t extension_bits = 999, total_bits = 999; // Initialize as garbage. + ASSERT_TRUE(Bundle::CanEncode(size, &extension_bits, &total_bits)); + EXPECT_EQ(0u, extension_bits); + + BitWriter writer; + ASSERT_TRUE(WriteSizeHeader(size, &writer, 0, nullptr)); + EXPECT_EQ(total_bits, writer.BitsWritten()); + writer.ZeroPadToByte(); + + SizeHeader size2; + BitReader reader(writer.GetSpan()); + ASSERT_TRUE(ReadSizeHeader(&reader, &size2)); + EXPECT_EQ(total_bits, reader.TotalBitsConsumed()); + EXPECT_TRUE(reader.Close()); + + EXPECT_EQ(size.xsize(), size2.xsize()); + EXPECT_EQ(size.ysize(), size2.ysize()); + } +} + +// Ensure all values can be reached by the encoding. +TEST(FieldsTest, TestCropRect) { + CodecMetadata metadata; + for (int32_t i = -999; i < 19000; ++i) { + FrameHeader f(&metadata); + f.custom_size_or_origin = true; + f.frame_origin.x0 = i; + f.frame_origin.y0 = i; + f.frame_size.xsize = 1000 + i; + f.frame_size.ysize = 1000 + i; + size_t extension_bits = 0, total_bits = 0; + ASSERT_TRUE(Bundle::CanEncode(f, &extension_bits, &total_bits)); + EXPECT_EQ(0u, extension_bits); + EXPECT_GE(total_bits, 9u); + } +} +TEST(FieldsTest, TestPreview) { + // (div8 cannot represent 4360, but !div8 can go a little higher) + for (uint32_t i = 1; i < 4360; ++i) { + PreviewHeader p; + ASSERT_TRUE(p.Set(i, i)); + size_t extension_bits = 0, total_bits = 0; + ASSERT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits)); + EXPECT_EQ(0u, extension_bits); + EXPECT_GE(total_bits, 6u); + } +} + +// Ensures Read(Write()) returns the same fields. +TEST(FieldsTest, TestRoundtripFrame) { + CodecMetadata metadata; + FrameHeader h(&metadata); + h.extensions = 0x800; + + size_t extension_bits = 999, total_bits = 999; // Initialize as garbage. + ASSERT_TRUE(Bundle::CanEncode(h, &extension_bits, &total_bits)); + EXPECT_EQ(0u, extension_bits); + BitWriter writer; + ASSERT_TRUE(WriteFrameHeader(h, &writer, nullptr)); + EXPECT_EQ(total_bits, writer.BitsWritten()); + writer.ZeroPadToByte(); + + FrameHeader h2(&metadata); + BitReader reader(writer.GetSpan()); + ASSERT_TRUE(ReadFrameHeader(&reader, &h2)); + EXPECT_EQ(total_bits, reader.TotalBitsConsumed()); + EXPECT_TRUE(reader.Close()); + + EXPECT_EQ(h.extensions, h2.extensions); + EXPECT_EQ(h.flags, h2.flags); +} + +#ifndef JXL_CRASH_ON_ERROR +// Ensure out-of-bounds values cause an error. +TEST(FieldsTest, TestOutOfRange) { + SizeHeader h; + ASSERT_TRUE(h.Set(0xFFFFFFFFull, 0xFFFFFFFFull)); + size_t extension_bits = 999, total_bits = 999; // Initialize as garbage. + ASSERT_FALSE(Bundle::CanEncode(h, &extension_bits, &total_bits)); +} +#endif + +struct OldBundle : public Fields { + OldBundle() { Bundle::Init(this); } + JXL_FIELDS_NAME(OldBundle) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small)); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large)); + + JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions)); + return visitor->EndExtensions(); + } + + uint32_t old_small; + float old_f; + uint32_t old_large; + uint64_t extensions; +}; + +struct NewBundle : public Fields { + NewBundle() { Bundle::Init(this); } + JXL_FIELDS_NAME(NewBundle) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Bits(2), Bits(3), Bits(4), 1, &old_small)); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.125f, &old_f)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Bits(7), Bits(12), Bits(16), Bits(32), 0, &old_large)); + + JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions)); + if (visitor->Conditional(extensions & 1)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(2), Bits(2), Bits(3), Bits(4), 2, &new_small)); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(-2.0f, &new_f)); + } + if (visitor->Conditional(extensions & 2)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Bits(9), Bits(12), Bits(16), Bits(32), 0, &new_large)); + } + return visitor->EndExtensions(); + } + + uint32_t old_small; + float old_f; + uint32_t old_large; + uint64_t extensions; + + // If extensions & 1 + uint32_t new_small = 2; + float new_f = -2.0f; + // If extensions & 2 + uint32_t new_large = 0; +}; + +TEST(FieldsTest, TestNewDecoderOldData) { + OldBundle old_bundle; + old_bundle.old_large = 123; + old_bundle.old_f = 3.75f; + old_bundle.extensions = 0; + + // Write to bit stream + const size_t kMaxOutBytes = 999; + BitWriter writer; + // Make sure values are initialized by code under test. + size_t extension_bits = 12345, total_bits = 12345; + ASSERT_TRUE(Bundle::CanEncode(old_bundle, &extension_bits, &total_bits)); + ASSERT_LE(total_bits, kMaxOutBytes * kBitsPerByte); + EXPECT_EQ(0u, extension_bits); + AuxOut aux_out; + ASSERT_TRUE(Bundle::Write(old_bundle, &writer, kLayerHeader, &aux_out)); + + BitWriter::Allotment allotment(&writer, + kMaxOutBytes * kBitsPerByte - total_bits); + writer.Write(20, 0xA55A); // sentinel + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, kLayerHeader, nullptr); + + ASSERT_LE(writer.GetSpan().size(), kMaxOutBytes); + BitReader reader(writer.GetSpan()); + NewBundle new_bundle; + ASSERT_TRUE(Bundle::Read(&reader, &new_bundle)); + EXPECT_EQ(reader.TotalBitsConsumed(), + aux_out.layers[kLayerHeader].total_bits); + EXPECT_EQ(reader.ReadBits(20), 0xA55Au); + EXPECT_TRUE(reader.Close()); + + // Old fields are the same in both + EXPECT_EQ(old_bundle.extensions, new_bundle.extensions); + EXPECT_EQ(old_bundle.old_small, new_bundle.old_small); + EXPECT_EQ(old_bundle.old_f, new_bundle.old_f); + EXPECT_EQ(old_bundle.old_large, new_bundle.old_large); + // New fields match their defaults + EXPECT_EQ(2u, new_bundle.new_small); + EXPECT_EQ(-2.0f, new_bundle.new_f); + EXPECT_EQ(0u, new_bundle.new_large); +} + +TEST(FieldsTest, TestOldDecoderNewData) { + NewBundle new_bundle; + new_bundle.old_large = 123; + new_bundle.extensions = 3; + new_bundle.new_f = 999.0f; + new_bundle.new_large = 456; + + // Write to bit stream + constexpr size_t kMaxOutBytes = 999; + BitWriter writer; + // Make sure values are initialized by code under test. + size_t extension_bits = 12345, total_bits = 12345; + ASSERT_TRUE(Bundle::CanEncode(new_bundle, &extension_bits, &total_bits)); + EXPECT_NE(0u, extension_bits); + AuxOut aux_out; + ASSERT_TRUE(Bundle::Write(new_bundle, &writer, kLayerHeader, &aux_out)); + ASSERT_LE(aux_out.layers[kLayerHeader].total_bits, + kMaxOutBytes * kBitsPerByte); + + BitWriter::Allotment allotment( + &writer, + kMaxOutBytes * kBitsPerByte - aux_out.layers[kLayerHeader].total_bits); + // Ensure Read skips the additional fields + writer.Write(20, 0xA55A); // sentinel + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, kLayerHeader, nullptr); + + BitReader reader(writer.GetSpan()); + OldBundle old_bundle; + ASSERT_TRUE(Bundle::Read(&reader, &old_bundle)); + EXPECT_EQ(reader.TotalBitsConsumed(), + aux_out.layers[kLayerHeader].total_bits); + EXPECT_EQ(reader.ReadBits(20), 0xA55Au); + EXPECT_TRUE(reader.Close()); + + // Old fields are the same in both + EXPECT_EQ(new_bundle.extensions, old_bundle.extensions); + EXPECT_EQ(new_bundle.old_small, old_bundle.old_small); + EXPECT_EQ(new_bundle.old_f, old_bundle.old_f); + EXPECT_EQ(new_bundle.old_large, old_bundle.old_large); + // (Can't check new fields because old decoder doesn't know about them) +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/frame_header.cc b/third-party/libjxl/libjxl/lib/jxl/frame_header.cc new file mode 100644 index 0000000000..6b4b145112 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/frame_header.cc @@ -0,0 +1,500 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/frame_header.h" + +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/fields.h" + +namespace jxl { + +constexpr uint8_t YCbCrChromaSubsampling::kHShift[] = {0, 1, 1, 0}; +constexpr uint8_t YCbCrChromaSubsampling::kVShift[] = {0, 1, 0, 1}; + +static Status VisitBlendMode(Visitor* JXL_RESTRICT visitor, + BlendMode default_value, BlendMode* blend_mode) { + uint32_t encoded = static_cast(*blend_mode); + + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val(static_cast(BlendMode::kReplace)), + Val(static_cast(BlendMode::kAdd)), + Val(static_cast(BlendMode::kBlend)), BitsOffset(2, 3), + static_cast(default_value), &encoded)); + if (encoded > 4) { + return JXL_FAILURE("Invalid blend_mode"); + } + *blend_mode = static_cast(encoded); + return true; +} + +static Status VisitFrameType(Visitor* JXL_RESTRICT visitor, + FrameType default_value, FrameType* frame_type) { + uint32_t encoded = static_cast(*frame_type); + + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(static_cast(FrameType::kRegularFrame)), + Val(static_cast(FrameType::kDCFrame)), + Val(static_cast(FrameType::kReferenceOnly)), + Val(static_cast(FrameType::kSkipProgressive)), + static_cast(default_value), &encoded)); + *frame_type = static_cast(encoded); + return true; +} + +BlendingInfo::BlendingInfo() { Bundle::Init(this); } + +Status BlendingInfo::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR( + VisitBlendMode(visitor, BlendMode::kReplace, &mode)); + if (visitor->Conditional(nonserialized_num_extra_channels > 0 && + (mode == BlendMode::kBlend || + mode == BlendMode::kAlphaWeightedAdd))) { + // Up to 11 alpha channels for blending. + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val(0), Val(1), Val(2), BitsOffset(3, 3), 0, &alpha_channel)); + if (visitor->IsReading() && + alpha_channel >= nonserialized_num_extra_channels) { + return JXL_FAILURE("Invalid alpha channel for blending"); + } + } + if (visitor->Conditional((nonserialized_num_extra_channels > 0 && + (mode == BlendMode::kBlend || + mode == BlendMode::kAlphaWeightedAdd)) || + mode == BlendMode::kMul)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &clamp)); + } + // 'old' frame for blending. Only necessary if this is not a full frame, or + // blending is not kReplace. + if (visitor->Conditional(mode != BlendMode::kReplace || + nonserialized_is_partial_frame)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &source)); + } + return true; +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string BlendingInfo::DebugString() const { + std::ostringstream os; + os << (mode == BlendMode::kReplace ? "Replace" + : mode == BlendMode::kAdd ? "Add" + : mode == BlendMode::kBlend ? "Blend" + : mode == BlendMode::kAlphaWeightedAdd ? "AlphaWeightedAdd" + : "Mul"); + if (nonserialized_num_extra_channels > 0 && + (mode == BlendMode::kBlend || mode == BlendMode::kAlphaWeightedAdd)) { + os << ",alpha=" << alpha_channel << ",clamp=" << clamp; + } else if (mode == BlendMode::kMul) { + os << ",clamp=" << clamp; + } + if (mode != BlendMode::kReplace || nonserialized_is_partial_frame) { + os << ",source=" << source; + } + return os.str(); +} +#endif + +AnimationFrame::AnimationFrame(const CodecMetadata* metadata) + : nonserialized_metadata(metadata) { + Bundle::Init(this); +} +Status AnimationFrame::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->Conditional(nonserialized_metadata != nullptr && + nonserialized_metadata->m.have_animation)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), Val(1), Bits(8), Bits(32), 0, &duration)); + } + + if (visitor->Conditional( + nonserialized_metadata != nullptr && + nonserialized_metadata->m.animation.have_timecodes)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(32, 0, &timecode)); + } + return true; +} + +YCbCrChromaSubsampling::YCbCrChromaSubsampling() { Bundle::Init(this); } +Passes::Passes() { Bundle::Init(this); } +Status Passes::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), BitsOffset(3, 4), 1, &num_passes)); + JXL_ASSERT(num_passes <= kMaxNumPasses); // Cannot happen when reading + + if (visitor->Conditional(num_passes != 1)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val(0), Val(1), Val(2), BitsOffset(1, 3), 0, &num_downsample)); + JXL_ASSERT(num_downsample <= 4); // 1,2,4,8 + if (num_downsample > num_passes) { + return JXL_FAILURE("num_downsample %u > num_passes %u", num_downsample, + num_passes); + } + + for (uint32_t i = 0; i < num_passes - 1; i++) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &shift[i])); + } + shift[num_passes - 1] = 0; + + for (uint32_t i = 0; i < num_downsample; ++i) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &downsample[i])); + if (i > 0 && downsample[i] >= downsample[i - 1]) { + return JXL_FAILURE("downsample sequence should be decreasing"); + } + } + for (uint32_t i = 0; i < num_downsample; ++i) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), Val(1), Val(2), Bits(3), 0, &last_pass[i])); + if (i > 0 && last_pass[i] <= last_pass[i - 1]) { + return JXL_FAILURE("last_pass sequence should be increasing"); + } + if (last_pass[i] >= num_passes) { + return JXL_FAILURE("last_pass %u >= num_passes %u", last_pass[i], + num_passes); + } + } + } + + return true; +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string Passes::DebugString() const { + std::ostringstream os; + os << "p=" << num_passes; + if (num_downsample) { + os << ",ds="; + for (uint32_t i = 0; i < num_downsample; ++i) { + os << last_pass[i] << ":" << downsample[i]; + if (i + 1 < num_downsample) os << ";"; + } + } + bool have_shifts = false; + for (uint32_t i = 0; i < num_passes; ++i) { + if (shift[i]) have_shifts = true; + } + if (have_shifts) { + os << ",shifts="; + for (uint32_t i = 0; i < num_passes; ++i) { + os << shift[i]; + if (i + 1 < num_passes) os << ";"; + } + } + return os.str(); +} +#endif + +FrameHeader::FrameHeader(const CodecMetadata* metadata) + : animation_frame(metadata), nonserialized_metadata(metadata) { + Bundle::Init(this); +} + +Status ReadFrameHeader(BitReader* JXL_RESTRICT reader, + FrameHeader* JXL_RESTRICT frame) { + return Bundle::Read(reader, frame); +} + +Status FrameHeader::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + + JXL_QUIET_RETURN_IF_ERROR( + VisitFrameType(visitor, FrameType::kRegularFrame, &frame_type)); + if (visitor->IsReading() && nonserialized_is_preview && + frame_type != kRegularFrame) { + return JXL_FAILURE("Only regular frame could be a preview"); + } + + // FrameEncoding. + bool is_modular = (encoding == FrameEncoding::kModular); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &is_modular)); + encoding = (is_modular ? FrameEncoding::kModular : FrameEncoding::kVarDCT); + + // Flags + JXL_QUIET_RETURN_IF_ERROR(visitor->U64(0, &flags)); + + // Color transform + bool xyb_encoded = nonserialized_metadata == nullptr || + nonserialized_metadata->m.xyb_encoded; + + if (xyb_encoded) { + color_transform = ColorTransform::kXYB; + } else { + // Alternate if kYCbCr. + bool alternate = color_transform == ColorTransform::kYCbCr; + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alternate)); + color_transform = + (alternate ? ColorTransform::kYCbCr : ColorTransform::kNone); + } + + // Chroma subsampling for YCbCr, if no DC frame is used. + if (visitor->Conditional(color_transform == ColorTransform::kYCbCr && + ((flags & kUseDcFrame) == 0))) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&chroma_subsampling)); + } + + size_t num_extra_channels = + nonserialized_metadata != nullptr + ? nonserialized_metadata->m.extra_channel_info.size() + : 0; + + // Upsampling + if (visitor->Conditional((flags & kUseDcFrame) == 0)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &upsampling)); + if (nonserialized_metadata != nullptr && + visitor->Conditional(num_extra_channels != 0)) { + const std::vector& extra_channels = + nonserialized_metadata->m.extra_channel_info; + extra_channel_upsampling.resize(extra_channels.size(), 1); + for (size_t i = 0; i < extra_channels.size(); ++i) { + uint32_t dim_shift = + nonserialized_metadata->m.extra_channel_info[i].dim_shift; + uint32_t& ec_upsampling = extra_channel_upsampling[i]; + ec_upsampling >>= dim_shift; + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(4), Val(8), 1, &ec_upsampling)); + ec_upsampling <<= dim_shift; + if (ec_upsampling < upsampling) { + return JXL_FAILURE( + "EC upsampling (%u) < color upsampling (%u), which is invalid.", + ec_upsampling, upsampling); + } + if (ec_upsampling > 8) { + return JXL_FAILURE("EC upsampling too large (%u)", ec_upsampling); + } + } + } else { + extra_channel_upsampling.clear(); + } + } + + // Modular- or VarDCT-specific data. + if (visitor->Conditional(encoding == FrameEncoding::kModular)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 1, &group_size_shift)); + } + if (visitor->Conditional(encoding == FrameEncoding::kVarDCT && + color_transform == ColorTransform::kXYB)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 3, &x_qm_scale)); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 2, &b_qm_scale)); + } else { + x_qm_scale = b_qm_scale = 2; // noop + } + + // Not useful for kPatchSource + if (visitor->Conditional(frame_type != FrameType::kReferenceOnly)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&passes)); + } + + if (visitor->Conditional(frame_type == FrameType::kDCFrame)) { + // Up to 4 pyramid levels - for up to 16384x downsampling. + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &dc_level)); + } + if (frame_type != FrameType::kDCFrame) { + dc_level = 0; + } + + bool is_partial_frame = false; + if (visitor->Conditional(frame_type != FrameType::kDCFrame)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &custom_size_or_origin)); + if (visitor->Conditional(custom_size_or_origin)) { + const U32Enc enc(Bits(8), BitsOffset(11, 256), BitsOffset(14, 2304), + BitsOffset(30, 18688)); + // Frame offset, only if kRegularFrame or kSkipProgressive. + if (visitor->Conditional(frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive)) { + uint32_t ux0 = PackSigned(frame_origin.x0); + uint32_t uy0 = PackSigned(frame_origin.y0); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &ux0)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &uy0)); + frame_origin.x0 = UnpackSigned(ux0); + frame_origin.y0 = UnpackSigned(uy0); + } + // Frame size + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.xsize)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(enc, 0, &frame_size.ysize)); + if (custom_size_or_origin && + (frame_size.xsize == 0 || frame_size.ysize == 0)) { + return JXL_FAILURE( + "Invalid crop dimensions for frame: zero width or height"); + } + int32_t image_xsize = default_xsize(); + int32_t image_ysize = default_ysize(); + if (frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive) { + is_partial_frame |= frame_origin.x0 > 0; + is_partial_frame |= frame_origin.y0 > 0; + is_partial_frame |= (static_cast(frame_size.xsize) + + frame_origin.x0) < image_xsize; + is_partial_frame |= (static_cast(frame_size.ysize) + + frame_origin.y0) < image_ysize; + } + } + } + + // Blending info, animation info and whether this is the last frame or not. + if (visitor->Conditional(frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive)) { + blending_info.nonserialized_num_extra_channels = num_extra_channels; + blending_info.nonserialized_is_partial_frame = is_partial_frame; + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&blending_info)); + bool replace_all = (blending_info.mode == BlendMode::kReplace); + extra_channel_blending_info.resize(num_extra_channels); + for (size_t i = 0; i < num_extra_channels; i++) { + auto& ec_blending_info = extra_channel_blending_info[i]; + ec_blending_info.nonserialized_is_partial_frame = is_partial_frame; + ec_blending_info.nonserialized_num_extra_channels = num_extra_channels; + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&ec_blending_info)); + replace_all &= (ec_blending_info.mode == BlendMode::kReplace); + } + if (visitor->IsReading() && nonserialized_is_preview) { + if (!replace_all || custom_size_or_origin) { + return JXL_FAILURE("Preview is not compatible with blending"); + } + } + if (visitor->Conditional(nonserialized_metadata != nullptr && + nonserialized_metadata->m.have_animation)) { + animation_frame.nonserialized_metadata = nonserialized_metadata; + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation_frame)); + } + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &is_last)); + } + if (frame_type != FrameType::kRegularFrame) { + is_last = false; + } + + // ID of that can be used to refer to this frame. 0 for a non-zero-duration + // frame means that it will not be referenced. Not necessary for the last + // frame. + if (visitor->Conditional(frame_type != kDCFrame && !is_last)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), Val(1), Val(2), Val(3), 0, &save_as_reference)); + } + + // If this frame is not blended on another frame post-color-transform, it may + // be stored for being referenced either before or after the color transform. + // If it is blended post-color-transform, it must be blended after. It must + // also be blended after if this is a kRegular frame that does not cover the + // full frame, as samples outside the partial region are from a + // post-color-transform frame. + if (frame_type != FrameType::kDCFrame) { + if (visitor->Conditional(CanBeReferenced() && + blending_info.mode == BlendMode::kReplace && + !is_partial_frame && + (frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive))) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->Bool(false, &save_before_color_transform)); + } else if (visitor->Conditional(frame_type == FrameType::kReferenceOnly)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->Bool(true, &save_before_color_transform)); + if (!save_before_color_transform && + (frame_size.xsize < nonserialized_metadata->xsize() || + frame_size.ysize < nonserialized_metadata->ysize() || + frame_origin.x0 != 0 || frame_origin.y0 != 0)) { + return JXL_FAILURE( + "non-patch reference frame with invalid crop: %" PRIuS "x%" PRIuS + "%+d%+d", + static_cast(frame_size.xsize), + static_cast(frame_size.ysize), + static_cast(frame_origin.x0), + static_cast(frame_origin.y0)); + } + } + } else { + save_before_color_transform = true; + } + + JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name)); + + loop_filter.nonserialized_is_modular = is_modular; + JXL_RETURN_IF_ERROR(visitor->VisitNested(&loop_filter)); + + JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions)); + // Extensions: in chronological order of being added to the format. + return visitor->EndExtensions(); +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string FrameHeader::DebugString() const { + std::ostringstream os; + os << (encoding == FrameEncoding::kVarDCT ? "VarDCT" : "Modular"); + os << ","; + os << (frame_type == FrameType::kRegularFrame ? "Regular" + : frame_type == FrameType::kDCFrame ? "DC" + : frame_type == FrameType::kReferenceOnly ? "Reference" + : "SkipProgressive"); + if (frame_type == FrameType::kDCFrame) { + os << "(lv" << dc_level << ")"; + } + + if (flags) { + os << ","; + uint32_t remaining = flags; + +#define TEST_FLAG(name) \ + if (flags & Flags::k##name) { \ + remaining &= ~Flags::k##name; \ + os << #name; \ + if (remaining) os << "|"; \ + } + TEST_FLAG(Noise); + TEST_FLAG(Patches); + TEST_FLAG(Splines); + TEST_FLAG(UseDcFrame); + TEST_FLAG(SkipAdaptiveDCSmoothing); +#undef TEST_FLAG + } + + os << ","; + os << (color_transform == ColorTransform::kXYB ? "XYB" + : color_transform == ColorTransform::kYCbCr ? "YCbCr" + : "None"); + + if (encoding == FrameEncoding::kModular) { + os << ",shift=" << group_size_shift; + } else if (color_transform == ColorTransform::kXYB) { + os << ",qm=" << x_qm_scale << ";" << b_qm_scale; + } + if (frame_type != FrameType::kReferenceOnly) { + os << "," << passes.DebugString(); + } + if (custom_size_or_origin) { + os << ",xs=" << frame_size.xsize; + os << ",ys=" << frame_size.ysize; + if (frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive) { + os << ",x0=" << frame_origin.x0; + os << ",y0=" << frame_origin.y0; + } + } + if (upsampling > 1) os << ",up=" << upsampling; + if (loop_filter.gab) os << ",Gaborish"; + if (loop_filter.epf_iters > 0) os << ",epf=" << loop_filter.epf_iters; + if (animation_frame.duration > 0) os << ",dur=" << animation_frame.duration; + if (frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive) { + os << ","; + os << blending_info.DebugString(); + for (size_t i = 0; i < extra_channel_blending_info.size(); ++i) { + os << (i == 0 ? "[" : ";"); + os << extra_channel_blending_info[i].DebugString(); + if (i + 1 == extra_channel_blending_info.size()) os << "]"; + } + } + if (save_as_reference > 0) os << ",ref=" << save_as_reference; + os << "," << (save_before_color_transform ? "before" : "after") << "_ct"; + if (is_last) os << ",last"; + return os.str(); +} +#endif + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/frame_header.h b/third-party/libjxl/libjxl/lib/jxl/frame_header.h new file mode 100644 index 0000000000..5580bcd6fe --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/frame_header.h @@ -0,0 +1,503 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_FRAME_HEADER_H_ +#define LIB_JXL_FRAME_HEADER_H_ + +// Frame header with backward and forward-compatible extension capability and +// compressed integer fields. + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/loop_filter.h" + +namespace jxl { + +// TODO(eustas): move to proper place? +// Also used by extra channel names. +static inline Status VisitNameString(Visitor* JXL_RESTRICT visitor, + std::string* name) { + uint32_t name_length = static_cast(name->length()); + // Allows layer name lengths up to 1071 bytes + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Bits(4), BitsOffset(5, 16), + BitsOffset(10, 48), 0, &name_length)); + if (visitor->IsReading()) { + name->resize(name_length); + } + for (size_t i = 0; i < name_length; i++) { + uint32_t c = static_cast((*name)[i]); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(8, 0, &c)); + (*name)[i] = static_cast(c); + } + return true; +} + +enum class FrameEncoding : uint32_t { + kVarDCT, + kModular, +}; + +enum class ColorTransform : uint32_t { + kXYB, // Values are encoded with XYB. May only be used if + // ImageBundle::xyb_encoded. + kNone, // Values are encoded according to the attached color profile. May + // only be used if !ImageBundle::xyb_encoded. + kYCbCr, // Values are encoded according to the attached color profile, but + // transformed to YCbCr. May only be used if + // !ImageBundle::xyb_encoded. +}; + +inline std::array JpegOrder(ColorTransform ct, bool is_gray) { + if (is_gray) { + return {{0, 0, 0}}; + } + JXL_ASSERT(ct != ColorTransform::kXYB); + if (ct == ColorTransform::kYCbCr) { + return {{1, 0, 2}}; + } else { + return {{0, 1, 2}}; + } +} + +struct YCbCrChromaSubsampling : public Fields { + YCbCrChromaSubsampling(); + JXL_FIELDS_NAME(YCbCrChromaSubsampling) + size_t HShift(size_t c) const { return maxhs_ - kHShift[channel_mode_[c]]; } + size_t VShift(size_t c) const { return maxvs_ - kVShift[channel_mode_[c]]; } + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override { + // TODO(veluca): consider allowing 4x downsamples + for (size_t i = 0; i < 3; i++) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 0, &channel_mode_[i])); + } + Recompute(); + return true; + } + + uint8_t MaxHShift() const { return maxhs_; } + uint8_t MaxVShift() const { return maxvs_; } + + uint8_t RawHShift(size_t c) const { return kHShift[channel_mode_[c]]; } + uint8_t RawVShift(size_t c) const { return kVShift[channel_mode_[c]]; } + + // Uses JPEG channel order (Y, Cb, Cr). + Status Set(const uint8_t* hsample, const uint8_t* vsample) { + for (size_t c = 0; c < 3; c++) { + size_t cjpeg = c < 2 ? c ^ 1 : c; + size_t i = 0; + for (; i < 4; i++) { + if (1 << kHShift[i] == hsample[cjpeg] && + 1 << kVShift[i] == vsample[cjpeg]) { + channel_mode_[c] = i; + break; + } + } + if (i == 4) { + return JXL_FAILURE("Invalid subsample mode"); + } + } + Recompute(); + return true; + } + + bool Is444() const { + return HShift(0) == 0 && VShift(0) == 0 && // Cb + HShift(2) == 0 && VShift(2) == 0 && // Cr + HShift(1) == 0 && VShift(1) == 0; // Y + } + + bool Is420() const { + return HShift(0) == 1 && VShift(0) == 1 && // Cb + HShift(2) == 1 && VShift(2) == 1 && // Cr + HShift(1) == 0 && VShift(1) == 0; // Y + } + + bool Is422() const { + return HShift(0) == 1 && VShift(0) == 0 && // Cb + HShift(2) == 1 && VShift(2) == 0 && // Cr + HShift(1) == 0 && VShift(1) == 0; // Y + } + + bool Is440() const { + return HShift(0) == 0 && VShift(0) == 1 && // Cb + HShift(2) == 0 && VShift(2) == 1 && // Cr + HShift(1) == 0 && VShift(1) == 0; // Y + } + + std::string DebugString() const { + if (Is444()) return "444"; + if (Is420()) return "420"; + if (Is422()) return "422"; + if (Is440()) return "440"; + return "cs" + std::to_string(channel_mode_[0]) + + std::to_string(channel_mode_[1]) + std::to_string(channel_mode_[2]); + } + + private: + void Recompute() { + maxhs_ = 0; + maxvs_ = 0; + for (size_t i = 0; i < 3; i++) { + maxhs_ = std::max(maxhs_, kHShift[channel_mode_[i]]); + maxvs_ = std::max(maxvs_, kVShift[channel_mode_[i]]); + } + } + static const uint8_t kHShift[4]; + static const uint8_t kVShift[4]; + uint32_t channel_mode_[3]; + uint8_t maxhs_; + uint8_t maxvs_; +}; + +// Indicates how to combine the current frame with a previously-saved one. Can +// be independently controlled for color and extra channels. Formulas are +// indicative and treat alpha as if it is in range 0.0-1.0. In descriptions +// below, alpha channel is the extra channel of type alpha used for blending +// according to the blend_channel, or fully opaque if there is no alpha channel. +// The blending specified here is used for performing blending *after* color +// transforms - in linear sRGB if blending a XYB-encoded frame on another +// XYB-encoded frame, in sRGB if blending a frame with kColorSpace == kSRGB, or +// in the original colorspace otherwise. Blending in XYB or YCbCr is done by +// using patches. +enum class BlendMode { + // The new values (in the crop) replace the old ones: sample = new + kReplace = 0, + // The new values (in the crop) get added to the old ones: sample = old + new + kAdd = 1, + // The new values (in the crop) replace the old ones if alpha>0: + // For the alpha channel that is used as source: + // alpha = old + new * (1 - old) + // For other channels if !alpha_associated: + // sample = ((1 - new_alpha) * old * old_alpha + new_alpha * new) / alpha + // For other channels if alpha_associated: + // sample = (1 - new_alpha) * old + new + // The alpha formula applies to the alpha used for the division in the other + // channels formula, and applies to the alpha channel itself if its + // blend_channel value matches itself. + kBlend = 2, + // The new values (in the crop) are added to the old ones if alpha>0: + // For the alpha channel that is used as source: + // sample = sample = old + new * (1 - old) + // For other channels: sample = old + alpha * new + kAlphaWeightedAdd = 3, + // The new values (in the crop) get multiplied by the old ones: + // sample = old * new + // The range of the new value matters for multiplication purposes, and its + // nominal range of 0..1 is computed the same way as this is done for the + // alpha values in kBlend and kAlphaWeightedAdd. + // If using kMul as a blend mode for color channels, no color transform is + // performed on the current frame. + kMul = 4, +}; + +struct BlendingInfo : public Fields { + BlendingInfo(); + JXL_FIELDS_NAME(BlendingInfo) + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + BlendMode mode; + // Which extra channel to use as alpha channel for blending, only encoded + // for blend modes that involve alpha and if there are more than 1 extra + // channels. + uint32_t alpha_channel; + // Clamp alpha or channel values to 0-1 range. + bool clamp; + // Frame ID to copy from (0-3). Only encoded if blend_mode is not kReplace. + uint32_t source; + + std::string DebugString() const; + + size_t nonserialized_num_extra_channels = 0; + bool nonserialized_is_partial_frame = false; +}; + +// Origin of the current frame. Not present for frames of type +// kOnlyPatches. +struct FrameOrigin { + int32_t x0, y0; // can be negative. +}; + +// Size of the current frame. +struct FrameSize { + uint32_t xsize, ysize; +}; + +// AnimationFrame defines duration of animation frames. +struct AnimationFrame : public Fields { + explicit AnimationFrame(const CodecMetadata* metadata); + JXL_FIELDS_NAME(AnimationFrame) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // How long to wait [in ticks, see Animation{}] after rendering. + // May be 0 if the current frame serves as a foundation for another frame. + uint32_t duration; + + uint32_t timecode; // 0xHHMMSSFF + + // Must be set to the one ImageMetadata acting as the full codestream header, + // with correct xyb_encoded, list of extra channels, etc... + const CodecMetadata* nonserialized_metadata = nullptr; +}; + +// For decoding to lower resolutions. Only used for kRegular frames. +struct Passes : public Fields { + Passes(); + JXL_FIELDS_NAME(Passes) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + void GetDownsamplingBracket(size_t pass, int& minShift, int& maxShift) const { + maxShift = 2; + minShift = 3; + for (size_t i = 0;; i++) { + for (uint32_t j = 0; j < num_downsample; ++j) { + if (i == last_pass[j]) { + if (downsample[j] == 8) minShift = 3; + if (downsample[j] == 4) minShift = 2; + if (downsample[j] == 2) minShift = 1; + if (downsample[j] == 1) minShift = 0; + } + } + if (i == num_passes - 1) minShift = 0; + if (i == pass) return; + maxShift = minShift - 1; + } + } + + uint32_t GetDownsamplingTargetForCompletedPasses(uint32_t num_p) const { + if (num_p >= num_passes) return 1; + uint32_t retval = 8; + for (uint32_t i = 0; i < num_downsample; ++i) { + if (num_p > last_pass[i]) { + retval = std::min(retval, downsample[i]); + } + } + return retval; + } + + std::string DebugString() const; + + uint32_t num_passes; // <= kMaxNumPasses + uint32_t num_downsample; // <= num_passes + + // Array of num_downsample pairs. downsample=1/last_pass=num_passes-1 and + // downsample=8/last_pass=0 need not be specified; they are implicit. + uint32_t downsample[kMaxNumPasses]; + uint32_t last_pass[kMaxNumPasses]; + // Array of shift values for each pass. It is implicitly assumed to be 0 for + // the last pass. + uint32_t shift[kMaxNumPasses]; +}; + +enum FrameType { + // A "regular" frame: might be a crop, and will be blended on a previous + // frame, if any, and displayed or blended in future frames. + kRegularFrame = 0, + // A DC frame: this frame is downsampled and will be *only* used as the DC of + // a future frame and, possibly, for previews. Cannot be cropped, blended, or + // referenced by patches or blending modes. Frames that *use* a DC frame + // cannot have non-default sizes either. + kDCFrame = 1, + // A PatchesSource frame: this frame will be only used as a source frame for + // taking patches. Can be cropped, but cannot have non-(0, 0) x0 and y0. + kReferenceOnly = 2, + // Same as kRegularFrame, but not used for progressive rendering. This also + // implies no early display of DC. + kSkipProgressive = 3, +}; + +// Image/frame := one of more of these, where the last has is_last = true. +// Starts at a byte-aligned address "a"; the next pass starts at "a + size". +struct FrameHeader : public Fields { + // Optional postprocessing steps. These flags are the source of truth; + // Override must set/clear them rather than change their meaning. Values + // chosen such that typical flags == 0 (encoded in only two bits). + enum Flags { + // Often but not always off => low bit value: + + // Inject noise into decoded output. + kNoise = 1, + + // Overlay patches. + kPatches = 2, + + // 4, 8 = reserved for future sometimes-off + + // Overlay splines. + kSplines = 16, + + kUseDcFrame = 32, // Implies kSkipAdaptiveDCSmoothing. + + // 64 = reserved for future often-off + + // Almost always on => negated: + + kSkipAdaptiveDCSmoothing = 128, + }; + + explicit FrameHeader(const CodecMetadata* metadata); + JXL_FIELDS_NAME(FrameHeader) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // Sets/clears `flag` based upon `condition`. + void UpdateFlag(const bool condition, const uint64_t flag) { + if (condition) { + flags |= flag; + } else { + flags &= ~flag; + } + } + + // Returns true if this frame is supposed to be saved for future usage by + // other frames. + bool CanBeReferenced() const { + // DC frames cannot be referenced. The last frame cannot be referenced. A + // duration 0 frame makes little sense if it is not referenced. A + // non-duration 0 frame may or may not be referenced. + return !is_last && frame_type != FrameType::kDCFrame && + (animation_frame.duration == 0 || save_as_reference != 0); + } + + mutable bool all_default; + + // Always present + FrameEncoding encoding; + // Some versions of UBSAN complain in VisitFrameType if not initialized. + FrameType frame_type = FrameType::kRegularFrame; + + uint64_t flags; + + ColorTransform color_transform; + YCbCrChromaSubsampling chroma_subsampling; + + uint32_t group_size_shift; // only if encoding == kModular; + + uint32_t x_qm_scale; // only if VarDCT and color_transform == kXYB + uint32_t b_qm_scale; // only if VarDCT and color_transform == kXYB + + std::string name; + + // Skipped for kReferenceOnly. + Passes passes; + + // Skipped for kDCFrame + bool custom_size_or_origin; + FrameSize frame_size; + + // upsampling factors for color and extra channels. + // Upsampling is always performed before applying any inverse color transform. + // Skipped (1) if kUseDCFrame + uint32_t upsampling; + std::vector extra_channel_upsampling; + + // Only for kRegular frames. + FrameOrigin frame_origin; + + BlendingInfo blending_info; + std::vector extra_channel_blending_info; + + // Animation info for this frame. + AnimationFrame animation_frame; + + // This is the last frame. + bool is_last; + + // ID to refer to this frame with. 0-3, not present if kDCFrame. + // 0 has a special meaning for kRegular frames of nonzero duration: it defines + // a frame that will not be referenced in the future. + uint32_t save_as_reference; + + // Whether to save this frame before or after the color transform. A frame + // that is saved before the color tansform can only be used for blending + // through patches. On the contrary, a frame that is saved after the color + // transform can only be used for blending through blending modes. + // Irrelevant for extra channel blending. Can only be true if + // blending_info.mode == kReplace and this is not a partial kRegularFrame; if + // this is a DC frame, it is always true. + bool save_before_color_transform; + + uint32_t dc_level; // 1-4 if kDCFrame (0 otherwise). + + // Must be set to the one ImageMetadata acting as the full codestream header, + // with correct xyb_encoded, list of extra channels, etc... + const CodecMetadata* nonserialized_metadata = nullptr; + + // NOTE: This is ignored by AllDefault. + LoopFilter loop_filter; + + bool nonserialized_is_preview = false; + + size_t default_xsize() const { + if (!nonserialized_metadata) return 0; + if (nonserialized_is_preview) { + return nonserialized_metadata->m.preview_size.xsize(); + } + return nonserialized_metadata->xsize(); + } + + size_t default_ysize() const { + if (!nonserialized_metadata) return 0; + if (nonserialized_is_preview) { + return nonserialized_metadata->m.preview_size.ysize(); + } + return nonserialized_metadata->ysize(); + } + + FrameDimensions ToFrameDimensions() const { + size_t xsize = default_xsize(); + size_t ysize = default_ysize(); + + xsize = frame_size.xsize ? frame_size.xsize : xsize; + ysize = frame_size.ysize ? frame_size.ysize : ysize; + + if (dc_level != 0) { + xsize = DivCeil(xsize, 1 << (3 * dc_level)); + ysize = DivCeil(ysize, 1 << (3 * dc_level)); + } + + FrameDimensions frame_dim; + frame_dim.Set(xsize, ysize, group_size_shift, + chroma_subsampling.MaxHShift(), + chroma_subsampling.MaxVShift(), + encoding == FrameEncoding::kModular, upsampling); + return frame_dim; + } + + // True if a color transform should be applied to this frame. + bool needs_color_transform() const { + return !save_before_color_transform || + frame_type == FrameType::kRegularFrame || + frame_type == FrameType::kSkipProgressive; + } + + std::string DebugString() const; + + uint64_t extensions; +}; + +Status ReadFrameHeader(BitReader* JXL_RESTRICT reader, + FrameHeader* JXL_RESTRICT frame); + +// Shared by enc/dec. 5F and 13 are by far the most common for d1/2/4/8, 0 +// ensures low overhead for small images. +static constexpr U32Enc kOrderEnc = + U32Enc(Val(0x5F), Val(0x13), Val(0), Bits(kNumOrders)); + +} // namespace jxl + +#endif // LIB_JXL_FRAME_HEADER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/gamma_correct_test.cc b/third-party/libjxl/libjxl/lib/jxl/gamma_correct_test.cc new file mode 100644 index 0000000000..131ec4fa83 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/gamma_correct_test.cc @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include + +#include "lib/jxl/enc_gamma_correct.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(GammaCorrectTest, TestLinearToSrgbEdgeCases) { + EXPECT_EQ(0, LinearToSrgb8Direct(0.0)); + EXPECT_NEAR(0, LinearToSrgb8Direct(1E-6f), 2E-5); + EXPECT_EQ(0, LinearToSrgb8Direct(-1E-6f)); + EXPECT_EQ(0, LinearToSrgb8Direct(-1E6)); + EXPECT_NEAR(1, LinearToSrgb8Direct(1 - 1E-6f), 1E-5); + EXPECT_EQ(1, LinearToSrgb8Direct(1 + 1E-6f)); + EXPECT_EQ(1, LinearToSrgb8Direct(1E6)); +} + +TEST(GammaCorrectTest, TestRoundTrip) { + // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) + for (double linear = 0.0; linear <= 1.0; linear += 1E-7) { + const double srgb = LinearToSrgb8Direct(linear); + const double linear2 = Srgb8ToLinearDirect(srgb); + ASSERT_LT(std::abs(linear - linear2), 2E-13) + << "linear = " << linear << ", linear2 = " << linear2; + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur.cc b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.cc new file mode 100644 index 0000000000..d9bc297d45 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.cc @@ -0,0 +1,619 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/gauss_blur.h" + +#include + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/gauss_blur.cc" +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/matrix_ops.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Broadcast; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::NegMulSub; +#if HWY_TARGET != HWY_SCALAR +using hwy::HWY_NAMESPACE::ShiftLeftLanes; +#endif +using hwy::HWY_NAMESPACE::Vec; + +void FastGaussian1D(const hwy::AlignedUniquePtr& rg, + const float* JXL_RESTRICT in, intptr_t width, + float* JXL_RESTRICT out) { + // Although the current output depends on the previous output, we can unroll + // up to 4x by precomputing up to fourth powers of the constants. Beyond that, + // numerical precision might become a problem. Macro because this is tested + // in #if alongside HWY_TARGET. +#define JXL_GAUSS_MAX_LANES 4 + using D = HWY_CAPPED(float, JXL_GAUSS_MAX_LANES); + using V = Vec; + const D d; + const V mul_in_1 = Load(d, rg->mul_in + 0 * 4); + const V mul_in_3 = Load(d, rg->mul_in + 1 * 4); + const V mul_in_5 = Load(d, rg->mul_in + 2 * 4); + const V mul_prev_1 = Load(d, rg->mul_prev + 0 * 4); + const V mul_prev_3 = Load(d, rg->mul_prev + 1 * 4); + const V mul_prev_5 = Load(d, rg->mul_prev + 2 * 4); + const V mul_prev2_1 = Load(d, rg->mul_prev2 + 0 * 4); + const V mul_prev2_3 = Load(d, rg->mul_prev2 + 1 * 4); + const V mul_prev2_5 = Load(d, rg->mul_prev2 + 2 * 4); + V prev_1 = Zero(d); + V prev_3 = Zero(d); + V prev_5 = Zero(d); + V prev2_1 = Zero(d); + V prev2_3 = Zero(d); + V prev2_5 = Zero(d); + + const intptr_t N = rg->radius; + + intptr_t n = -N + 1; + // Left side with bounds checks and only write output after n >= 0. + const intptr_t first_aligned = RoundUpTo(N + 1, Lanes(d)); + for (; n < std::min(first_aligned, width); ++n) { + const intptr_t left = n - N - 1; + const intptr_t right = n + N - 1; + const float left_val = left >= 0 ? in[left] : 0.0f; + const float right_val = right < width ? in[right] : 0.0f; + const V sum = Set(d, left_val + right_val); + + // (Only processing a single lane here, no need to broadcast) + V out_1 = Mul(sum, mul_in_1); + V out_3 = Mul(sum, mul_in_3); + V out_5 = Mul(sum, mul_in_5); + + out_1 = MulAdd(mul_prev2_1, prev2_1, out_1); + out_3 = MulAdd(mul_prev2_3, prev2_3, out_3); + out_5 = MulAdd(mul_prev2_5, prev2_5, out_5); + prev2_1 = prev_1; + prev2_3 = prev_3; + prev2_5 = prev_5; + + out_1 = MulAdd(mul_prev_1, prev_1, out_1); + out_3 = MulAdd(mul_prev_3, prev_3, out_3); + out_5 = MulAdd(mul_prev_5, prev_5, out_5); + prev_1 = out_1; + prev_3 = out_3; + prev_5 = out_5; + + if (n >= 0) { + out[n] = GetLane(Add(out_1, Add(out_3, out_5))); + } + } + + // The above loop is effectively scalar but it is convenient to use the same + // prev/prev2 variables, so broadcast to each lane before the unrolled loop. +#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES > 1 + prev2_1 = Broadcast<0>(prev2_1); + prev2_3 = Broadcast<0>(prev2_3); + prev2_5 = Broadcast<0>(prev2_5); + prev_1 = Broadcast<0>(prev_1); + prev_3 = Broadcast<0>(prev_3); + prev_5 = Broadcast<0>(prev_5); +#endif + + // Unrolled, no bounds checking needed. + for (; n < width - N + 1 - (JXL_GAUSS_MAX_LANES - 1); n += Lanes(d)) { + const V sum = Add(LoadU(d, in + n - N - 1), LoadU(d, in + n + N - 1)); + + // To get a vector of output(s), we multiply broadcasted vectors (of each + // input plus the two previous outputs) and add them all together. + // Incremental broadcasting and shifting is expected to be cheaper than + // horizontal adds or transposing 4x4 values because they run on a different + // port, concurrently with the FMA. + const V in0 = Broadcast<0>(sum); + V out_1 = Mul(in0, mul_in_1); + V out_3 = Mul(in0, mul_in_3); + V out_5 = Mul(in0, mul_in_5); + +#if HWY_TARGET != HWY_SCALAR && JXL_GAUSS_MAX_LANES >= 2 + const V in1 = Broadcast<1>(sum); + out_1 = MulAdd(ShiftLeftLanes<1>(mul_in_1), in1, out_1); + out_3 = MulAdd(ShiftLeftLanes<1>(mul_in_3), in1, out_3); + out_5 = MulAdd(ShiftLeftLanes<1>(mul_in_5), in1, out_5); + +#if JXL_GAUSS_MAX_LANES >= 4 + const V in2 = Broadcast<2>(sum); + out_1 = MulAdd(ShiftLeftLanes<2>(mul_in_1), in2, out_1); + out_3 = MulAdd(ShiftLeftLanes<2>(mul_in_3), in2, out_3); + out_5 = MulAdd(ShiftLeftLanes<2>(mul_in_5), in2, out_5); + + const V in3 = Broadcast<3>(sum); + out_1 = MulAdd(ShiftLeftLanes<3>(mul_in_1), in3, out_1); + out_3 = MulAdd(ShiftLeftLanes<3>(mul_in_3), in3, out_3); + out_5 = MulAdd(ShiftLeftLanes<3>(mul_in_5), in3, out_5); +#endif +#endif + + out_1 = MulAdd(mul_prev2_1, prev2_1, out_1); + out_3 = MulAdd(mul_prev2_3, prev2_3, out_3); + out_5 = MulAdd(mul_prev2_5, prev2_5, out_5); + + out_1 = MulAdd(mul_prev_1, prev_1, out_1); + out_3 = MulAdd(mul_prev_3, prev_3, out_3); + out_5 = MulAdd(mul_prev_5, prev_5, out_5); +#if HWY_TARGET == HWY_SCALAR || JXL_GAUSS_MAX_LANES == 1 + prev2_1 = prev_1; + prev2_3 = prev_3; + prev2_5 = prev_5; + prev_1 = out_1; + prev_3 = out_3; + prev_5 = out_5; +#else + prev2_1 = Broadcast(out_1); + prev2_3 = Broadcast(out_3); + prev2_5 = Broadcast(out_5); + prev_1 = Broadcast(out_1); + prev_3 = Broadcast(out_3); + prev_5 = Broadcast(out_5); +#endif + + Store(Add(out_1, Add(out_3, out_5)), d, out + n); + } + + // Remainder handling with bounds checks + for (; n < width; ++n) { + const intptr_t left = n - N - 1; + const intptr_t right = n + N - 1; + const float left_val = left >= 0 ? in[left] : 0.0f; + const float right_val = right < width ? in[right] : 0.0f; + const V sum = Set(d, left_val + right_val); + + // (Only processing a single lane here, no need to broadcast) + V out_1 = Mul(sum, mul_in_1); + V out_3 = Mul(sum, mul_in_3); + V out_5 = Mul(sum, mul_in_5); + + out_1 = MulAdd(mul_prev2_1, prev2_1, out_1); + out_3 = MulAdd(mul_prev2_3, prev2_3, out_3); + out_5 = MulAdd(mul_prev2_5, prev2_5, out_5); + prev2_1 = prev_1; + prev2_3 = prev_3; + prev2_5 = prev_5; + + out_1 = MulAdd(mul_prev_1, prev_1, out_1); + out_3 = MulAdd(mul_prev_3, prev_3, out_3); + out_5 = MulAdd(mul_prev_5, prev_5, out_5); + prev_1 = out_1; + prev_3 = out_3; + prev_5 = out_5; + + out[n] = GetLane(Add(out_1, Add(out_3, out_5))); + } +} + +// Ring buffer is for n, n-1, n-2; round up to 4 for faster modulo. +constexpr size_t kMod = 4; + +// Avoids an unnecessary store during warmup. +struct OutputNone { + template + void operator()(const V& /*unused*/, float* JXL_RESTRICT /*pos*/, + ptrdiff_t /*offset*/) const {} +}; + +// Common case: write output vectors in all VerticalBlock except warmup. +struct OutputStore { + template + void operator()(const V& out, float* JXL_RESTRICT pos, + ptrdiff_t offset) const { + // Stream helps for large images but is slower for images that fit in cache. + Store(out, HWY_FULL(float)(), pos + offset); + } +}; + +// At top/bottom borders, we don't have two inputs to load, so avoid addition. +// pos may even point to all zeros if the row is outside the input image. +class SingleInput { + public: + explicit SingleInput(const float* pos) : pos_(pos) {} + Vec operator()(const size_t offset) const { + return Load(HWY_FULL(float)(), pos_ + offset); + } + const float* pos_; +}; + +// In the middle of the image, we need to load from a row above and below, and +// return the sum. +class TwoInputs { + public: + TwoInputs(const float* pos1, const float* pos2) : pos1_(pos1), pos2_(pos2) {} + Vec operator()(const size_t offset) const { + const auto in1 = Load(HWY_FULL(float)(), pos1_ + offset); + const auto in2 = Load(HWY_FULL(float)(), pos2_ + offset); + return Add(in1, in2); + } + + private: + const float* pos1_; + const float* pos2_; +}; + +// Block := kVectors consecutive full vectors (one cache line except on the +// right boundary, where we can only rely on having one vector). Unrolling to +// the cache line size improves cache utilization. +template +void VerticalBlock(const V& d1_1, const V& d1_3, const V& d1_5, const V& n2_1, + const V& n2_3, const V& n2_5, const Input& input, + size_t& ctr, float* ring_buffer, const Output output, + float* JXL_RESTRICT out_pos) { + const HWY_FULL(float) d; + constexpr size_t kVN = MaxLanes(d); + // More cache-friendly to process an entirely cache line at a time + constexpr size_t kLanes = kVectors * kVN; + + float* JXL_RESTRICT y_1 = ring_buffer + 0 * kLanes * kMod; + float* JXL_RESTRICT y_3 = ring_buffer + 1 * kLanes * kMod; + float* JXL_RESTRICT y_5 = ring_buffer + 2 * kLanes * kMod; + + const size_t n_0 = (++ctr) % kMod; + const size_t n_1 = (ctr - 1) % kMod; + const size_t n_2 = (ctr - 2) % kMod; + + for (size_t idx_vec = 0; idx_vec < kVectors; ++idx_vec) { + const V sum = input(idx_vec * kVN); + + const V y_n1_1 = Load(d, y_1 + kLanes * n_1 + idx_vec * kVN); + const V y_n1_3 = Load(d, y_3 + kLanes * n_1 + idx_vec * kVN); + const V y_n1_5 = Load(d, y_5 + kLanes * n_1 + idx_vec * kVN); + const V y_n2_1 = Load(d, y_1 + kLanes * n_2 + idx_vec * kVN); + const V y_n2_3 = Load(d, y_3 + kLanes * n_2 + idx_vec * kVN); + const V y_n2_5 = Load(d, y_5 + kLanes * n_2 + idx_vec * kVN); + // (35) + const V y1 = MulAdd(n2_1, sum, NegMulSub(d1_1, y_n1_1, y_n2_1)); + const V y3 = MulAdd(n2_3, sum, NegMulSub(d1_3, y_n1_3, y_n2_3)); + const V y5 = MulAdd(n2_5, sum, NegMulSub(d1_5, y_n1_5, y_n2_5)); + Store(y1, d, y_1 + kLanes * n_0 + idx_vec * kVN); + Store(y3, d, y_3 + kLanes * n_0 + idx_vec * kVN); + Store(y5, d, y_5 + kLanes * n_0 + idx_vec * kVN); + output(Add(y1, Add(y3, y5)), out_pos, idx_vec * kVN); + } + // NOTE: flushing cache line out_pos hurts performance - less so with + // clflushopt than clflush but still a significant slowdown. +} + +// Reads/writes one block (kVectors full vectors) in each row. +template +void VerticalStrip(const hwy::AlignedUniquePtr& rg, + const ImageF& in, const size_t x, ImageF* JXL_RESTRICT out) { + // We're iterating vertically, so use multiple full-length vectors (each lane + // is one column of row n). + using D = HWY_FULL(float); + using V = Vec; + const D d; + constexpr size_t kVN = MaxLanes(d); + // More cache-friendly to process an entirely cache line at a time + constexpr size_t kLanes = kVectors * kVN; +#if HWY_TARGET == HWY_SCALAR + const V d1_1 = Set(d, rg->d1[0 * 4]); + const V d1_3 = Set(d, rg->d1[1 * 4]); + const V d1_5 = Set(d, rg->d1[2 * 4]); + const V n2_1 = Set(d, rg->n2[0 * 4]); + const V n2_3 = Set(d, rg->n2[1 * 4]); + const V n2_5 = Set(d, rg->n2[2 * 4]); +#else + const V d1_1 = LoadDup128(d, rg->d1 + 0 * 4); + const V d1_3 = LoadDup128(d, rg->d1 + 1 * 4); + const V d1_5 = LoadDup128(d, rg->d1 + 2 * 4); + const V n2_1 = LoadDup128(d, rg->n2 + 0 * 4); + const V n2_3 = LoadDup128(d, rg->n2 + 1 * 4); + const V n2_5 = LoadDup128(d, rg->n2 + 2 * 4); +#endif + + const size_t N = rg->radius; + const size_t ysize = in.ysize(); + + size_t ctr = 0; + HWY_ALIGN float ring_buffer[3 * kLanes * kMod] = {0}; + HWY_ALIGN static constexpr float zero[kLanes] = {0}; + + // Warmup: top is out of bounds (zero padded), bottom is usually in-bounds. + ssize_t n = -static_cast(N) + 1; + for (; n < 0; ++n) { + // bottom is always non-negative since n is initialized in -N + 1. + const size_t bottom = n + N - 1; + VerticalBlock( + d1_1, d1_3, d1_5, n2_1, n2_3, n2_5, + SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr, + ring_buffer, OutputNone(), nullptr); + } + JXL_DASSERT(n >= 0); + + // Start producing output; top is still out of bounds. + for (; static_cast(n) < std::min(N + 1, ysize); ++n) { + const size_t bottom = n + N - 1; + VerticalBlock( + d1_1, d1_3, d1_5, n2_1, n2_3, n2_5, + SingleInput(bottom < ysize ? in.ConstRow(bottom) + x : zero), ctr, + ring_buffer, OutputStore(), out->Row(n) + x); + } + + // Interior outputs with prefetching and without bounds checks. + constexpr size_t kPrefetchRows = 8; + for (; n < static_cast(ysize - N + 1 - kPrefetchRows); ++n) { + const size_t top = n - N - 1; + const size_t bottom = n + N - 1; + VerticalBlock( + d1_1, d1_3, d1_5, n2_1, n2_3, n2_5, + TwoInputs(in.ConstRow(top) + x, in.ConstRow(bottom) + x), ctr, + ring_buffer, OutputStore(), out->Row(n) + x); + hwy::Prefetch(in.ConstRow(top + kPrefetchRows) + x); + hwy::Prefetch(in.ConstRow(bottom + kPrefetchRows) + x); + } + + // Bottom border without prefetching and with bounds checks. + for (; static_cast(n) < ysize; ++n) { + const size_t top = n - N - 1; + const size_t bottom = n + N - 1; + VerticalBlock( + d1_1, d1_3, d1_5, n2_1, n2_3, n2_5, + TwoInputs(in.ConstRow(top) + x, + bottom < ysize ? in.ConstRow(bottom) + x : zero), + ctr, ring_buffer, OutputStore(), out->Row(n) + x); + } +} + +// Apply 1D vertical scan to multiple columns (one per vector lane). +// Not yet parallelized. +void FastGaussianVertical(const hwy::AlignedUniquePtr& rg, + const ImageF& in, ThreadPool* /*pool*/, + ImageF* JXL_RESTRICT out) { + JXL_CHECK(SameSize(in, *out)); + + constexpr size_t kCacheLineLanes = 64 / sizeof(float); + constexpr size_t kVN = MaxLanes(HWY_FULL(float)()); + constexpr size_t kCacheLineVectors = + (kVN < kCacheLineLanes) ? (kCacheLineLanes / kVN) : 4; + constexpr size_t kFastPace = kCacheLineVectors * kVN; + + size_t x = 0; + for (; x + kFastPace <= in.xsize(); x += kFastPace) { + VerticalStrip(rg, in, x, out); + } + for (; x < in.xsize(); x += kVN) { + VerticalStrip<1>(rg, in, x, out); + } +} + +// TODO(veluca): consider replacing with FastGaussian. +ImageF ConvolveXSampleAndTranspose(const ImageF& in, + const std::vector& kernel, + const size_t res) { + JXL_ASSERT(kernel.size() % 2 == 1); + JXL_ASSERT(in.xsize() % res == 0); + const size_t offset = res / 2; + const size_t out_xsize = in.xsize() / res; + ImageF out(in.ysize(), out_xsize); + const int r = kernel.size() / 2; + HWY_FULL(float) df; + std::vector row_tmp(in.xsize() + 2 * r + Lanes(df)); + float* const JXL_RESTRICT rowp = &row_tmp[r]; + std::vector padded_k = kernel; + padded_k.resize(padded_k.size() + Lanes(df)); + const float* const kernelp = &padded_k[r]; + for (size_t y = 0; y < in.ysize(); ++y) { + ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r); + size_t x = offset, ox = 0; + for (; x < static_cast(r) && x < in.xsize(); x += res, ++ox) { + float sum = 0.0f; + for (int i = -r; i <= r; ++i) { + sum += rowp[std::max( + 0, std::min(static_cast(x) + i, in.xsize()))] * + kernelp[i]; + } + out.Row(ox)[y] = sum; + } + for (; x + r < in.xsize(); x += res, ++ox) { + auto sum = Zero(df); + for (int i = -r; i <= r; i += Lanes(df)) { + sum = MulAdd(LoadU(df, rowp + x + i), LoadU(df, kernelp + i), sum); + } + out.Row(ox)[y] = GetLane(SumOfLanes(df, sum)); + } + for (; x < in.xsize(); x += res, ++ox) { + float sum = 0.0f; + for (int i = -r; i <= r; ++i) { + sum += rowp[std::max( + 0, std::min(static_cast(x) + i, in.xsize()))] * + kernelp[i]; + } + out.Row(ox)[y] = sum; + } + } + return out; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(FastGaussian1D); +HWY_EXPORT(ConvolveXSampleAndTranspose); +void FastGaussian1D(const hwy::AlignedUniquePtr& rg, + const float* JXL_RESTRICT in, intptr_t width, + float* JXL_RESTRICT out) { + return HWY_DYNAMIC_DISPATCH(FastGaussian1D)(rg, in, width, out); +} + +HWY_EXPORT(FastGaussianVertical); // Local function. + +void ExtrapolateBorders(const float* const JXL_RESTRICT row_in, + float* const JXL_RESTRICT row_out, const int xsize, + const int radius) { + const int lastcol = xsize - 1; + for (int x = 1; x <= radius; ++x) { + row_out[-x] = row_in[std::min(x, xsize - 1)]; + } + memcpy(row_out, row_in, xsize * sizeof(row_out[0])); + for (int x = 1; x <= radius; ++x) { + row_out[lastcol + x] = row_in[std::max(0, lastcol - x)]; + } +} + +ImageF ConvolveXSampleAndTranspose(const ImageF& in, + const std::vector& kernel, + const size_t res) { + return HWY_DYNAMIC_DISPATCH(ConvolveXSampleAndTranspose)(in, kernel, res); +} + +Image3F ConvolveXSampleAndTranspose(const Image3F& in, + const std::vector& kernel, + const size_t res) { + return Image3F(ConvolveXSampleAndTranspose(in.Plane(0), kernel, res), + ConvolveXSampleAndTranspose(in.Plane(1), kernel, res), + ConvolveXSampleAndTranspose(in.Plane(2), kernel, res)); +} + +ImageF ConvolveAndSample(const ImageF& in, const std::vector& kernel, + const size_t res) { + ImageF tmp = ConvolveXSampleAndTranspose(in, kernel, res); + return ConvolveXSampleAndTranspose(tmp, kernel, res); +} + +// Implements "Recursive Implementation of the Gaussian Filter Using Truncated +// Cosine Functions" by Charalampidis [2016]. +hwy::AlignedUniquePtr CreateRecursiveGaussian(double sigma) { + auto rg = hwy::MakeUniqueAligned(); + constexpr double kPi = 3.141592653589793238; + + const double radius = roundf(3.2795 * sigma + 0.2546); // (57), "N" + + // Table I, first row + const double pi_div_2r = kPi / (2.0 * radius); + const double omega[3] = {pi_div_2r, 3.0 * pi_div_2r, 5.0 * pi_div_2r}; + + // (37), k={1,3,5} + const double p_1 = +1.0 / std::tan(0.5 * omega[0]); + const double p_3 = -1.0 / std::tan(0.5 * omega[1]); + const double p_5 = +1.0 / std::tan(0.5 * omega[2]); + + // (44), k={1,3,5} + const double r_1 = +p_1 * p_1 / std::sin(omega[0]); + const double r_3 = -p_3 * p_3 / std::sin(omega[1]); + const double r_5 = +p_5 * p_5 / std::sin(omega[2]); + + // (50), k={1,3,5} + const double neg_half_sigma2 = -0.5 * sigma * sigma; + const double recip_radius = 1.0 / radius; + double rho[3]; + for (size_t i = 0; i < 3; ++i) { + rho[i] = std::exp(neg_half_sigma2 * omega[i] * omega[i]) * recip_radius; + } + + // second part of (52), k1,k2 = 1,3; 3,5; 5,1 + const double D_13 = p_1 * r_3 - r_1 * p_3; + const double D_35 = p_3 * r_5 - r_3 * p_5; + const double D_51 = p_5 * r_1 - r_5 * p_1; + + // (52), k=5 + const double recip_d13 = 1.0 / D_13; + const double zeta_15 = D_35 * recip_d13; + const double zeta_35 = D_51 * recip_d13; + + double A[9] = {p_1, p_3, p_5, // + r_1, r_3, r_5, // (56) + zeta_15, zeta_35, 1}; + JXL_CHECK(Inv3x3Matrix(A)); + const double gamma[3] = {1, radius * radius - sigma * sigma, // (55) + zeta_15 * rho[0] + zeta_35 * rho[1] + rho[2]}; + double beta[3]; + Mul3x3Vector(A, gamma, beta); // (53) + + // Sanity check: correctly solved for beta (IIR filter weights are normalized) + const double sum = beta[0] * p_1 + beta[1] * p_3 + beta[2] * p_5; // (39) + JXL_ASSERT(std::abs(sum - 1) < 1E-12); + (void)sum; + + rg->radius = static_cast(radius); + + double n2[3]; + double d1[3]; + for (size_t i = 0; i < 3; ++i) { + n2[i] = -beta[i] * std::cos(omega[i] * (radius + 1.0)); // (33) + d1[i] = -2.0 * std::cos(omega[i]); // (33) + + for (size_t lane = 0; lane < 4; ++lane) { + rg->n2[4 * i + lane] = static_cast(n2[i]); + rg->d1[4 * i + lane] = static_cast(d1[i]); + } + + const double d_2 = d1[i] * d1[i]; + + // Obtained by expanding (35) for four consecutive outputs via sympy: + // n, d, p, pp = symbols('n d p pp') + // i0, i1, i2, i3 = symbols('i0 i1 i2 i3') + // o0, o1, o2, o3 = symbols('o0 o1 o2 o3') + // o0 = n*i0 - d*p - pp + // o1 = n*i1 - d*o0 - p + // o2 = n*i2 - d*o1 - o0 + // o3 = n*i3 - d*o2 - o1 + // Then expand(o3) and gather terms for p(prev), pp(prev2) etc. + rg->mul_prev[4 * i + 0] = -d1[i]; + rg->mul_prev[4 * i + 1] = d_2 - 1.0; + rg->mul_prev[4 * i + 2] = -d_2 * d1[i] + 2.0 * d1[i]; + rg->mul_prev[4 * i + 3] = d_2 * d_2 - 3.0 * d_2 + 1.0; + rg->mul_prev2[4 * i + 0] = -1.0; + rg->mul_prev2[4 * i + 1] = d1[i]; + rg->mul_prev2[4 * i + 2] = -d_2 + 1.0; + rg->mul_prev2[4 * i + 3] = d_2 * d1[i] - 2.0 * d1[i]; + rg->mul_in[4 * i + 0] = n2[i]; + rg->mul_in[4 * i + 1] = -d1[i] * n2[i]; + rg->mul_in[4 * i + 2] = d_2 * n2[i] - n2[i]; + rg->mul_in[4 * i + 3] = -d_2 * d1[i] * n2[i] + 2.0 * d1[i] * n2[i]; + } + return rg; +} + +namespace { + +// Apply 1D horizontal scan to each row. +void FastGaussianHorizontal(const hwy::AlignedUniquePtr& rg, + const ImageF& in, ThreadPool* pool, + ImageF* JXL_RESTRICT out) { + JXL_CHECK(SameSize(in, *out)); + + const intptr_t xsize = in.xsize(); + JXL_CHECK(RunOnPool( + pool, 0, in.ysize(), ThreadPool::NoInit, + [&](const uint32_t task, size_t /*thread*/) { + const size_t y = task; + const float* row_in = in.ConstRow(y); + float* JXL_RESTRICT row_out = out->Row(y); + FastGaussian1D(rg, row_in, xsize, row_out); + }, + "FastGaussianHorizontal")); +} + +} // namespace + +void FastGaussian(const hwy::AlignedUniquePtr& rg, + const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp, + ImageF* JXL_RESTRICT out) { + FastGaussianHorizontal(rg, in, pool, temp); + HWY_DYNAMIC_DISPATCH(FastGaussianVertical)(rg, *temp, pool, out); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur.h b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.h new file mode 100644 index 0000000000..fb4741f03a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur.h @@ -0,0 +1,94 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_GAUSS_BLUR_H_ +#define LIB_JXL_GAUSS_BLUR_H_ + +#include + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/image.h" + +namespace jxl { + +template +std::vector GaussianKernel(int radius, T sigma) { + JXL_ASSERT(sigma > 0.0); + std::vector kernel(2 * radius + 1); + const T scaler = -1.0 / (2 * sigma * sigma); + double sum = 0.0; + for (int i = -radius; i <= radius; ++i) { + const T val = std::exp(scaler * i * i); + kernel[i + radius] = val; + sum += val; + } + for (size_t i = 0; i < kernel.size(); ++i) { + kernel[i] /= sum; + } + return kernel; +} + +// All convolution functions below apply mirroring of the input on the borders +// in the following way: +// +// input: [a0 a1 a2 ... aN] +// mirrored input: [aR ... a1 | a0 a1 a2 .... aN | aN-1 ... aN-R] +// +// where R is the radius of the kernel (i.e. kernel size is 2*R+1). + +// REQUIRES: in.xsize() and in.ysize() are integer multiples of res. +ImageF ConvolveAndSample(const ImageF& in, const std::vector& kernel, + const size_t res); + +// Private, used by test. +void ExtrapolateBorders(const float* const JXL_RESTRICT row_in, + float* const JXL_RESTRICT row_out, const int xsize, + const int radius); + +// Only for use by CreateRecursiveGaussian and FastGaussian*. +#pragma pack(push, 1) +struct RecursiveGaussian { + // For k={1,3,5} in that order, each broadcasted 4x for LoadDup128. Used only + // for vertical passes. + float n2[3 * 4]; + float d1[3 * 4]; + + // We unroll horizontal passes 4x - one output per lane. These are each lane's + // multiplier for the previous output (relative to the first of the four + // outputs). Indexing: 4 * 0..2 (for {1,3,5}) + 0..3 for the lane index. + float mul_prev[3 * 4]; + // Ditto for the second to last output. + float mul_prev2[3 * 4]; + + // We multiply a vector of inputs 0..3 by a vector shifted from this array. + // in=0 uses all 4 (nonzero) terms; for in=3, the lower three lanes are 0. + float mul_in[3 * 4]; + + size_t radius; +}; +#pragma pack(pop) + +// Precomputation for FastGaussian*; users may use the same pointer/storage in +// subsequent calls to FastGaussian* with the same sigma. +hwy::AlignedUniquePtr CreateRecursiveGaussian(double sigma); + +// 1D Gaussian with zero-pad boundary handling and runtime independent of sigma. +void FastGaussian1D(const hwy::AlignedUniquePtr& rg, + const float* JXL_RESTRICT in, intptr_t width, + float* JXL_RESTRICT out); + +// 2D Gaussian with zero-pad boundary handling and runtime independent of sigma. +void FastGaussian(const hwy::AlignedUniquePtr& rg, + const ImageF& in, ThreadPool* pool, ImageF* JXL_RESTRICT temp, + ImageF* JXL_RESTRICT out); + +} // namespace jxl + +#endif // LIB_JXL_GAUSS_BLUR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_gbench.cc new file mode 100644 index 0000000000..b1bb64abc5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_gbench.cc @@ -0,0 +1,126 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include "benchmark/benchmark.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/gauss_blur.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { +namespace { + +JXL_MAYBE_UNUSED ImageF Convolve(const ImageF& in, + const std::vector& kernel) { + return ConvolveAndSample(in, kernel, 1); +} + +void BM_GaussBlur1d(benchmark::State& state) { + // Uncomment to disable SIMD and force and scalar implementation + // hwy::DisableTargets(~HWY_SCALAR); + // Uncomment to run AVX2 + // hwy::DisableTargets(HWY_AVX3); + + const size_t length = state.range(); + const double sigma = 7.0; // (from Butteraugli application) + ImageF in(length, 1); + const float expected = length; + FillImage(expected, &in); + + ImageF temp(length, 1); + ImageF out(length, 1); + const auto rg = CreateRecursiveGaussian(sigma); + for (auto _ : state) { + FastGaussian1D(rg, in.Row(0), length, out.Row(0)); + // Prevent optimizing out + JXL_ASSERT(std::abs(out.ConstRow(0)[length / 2] - expected) / expected < + 9E-5); + } + state.SetItemsProcessed(length * state.iterations()); +} + +void BM_GaussBlur2d(benchmark::State& state) { + // See GaussBlur1d for SIMD changes. + + const size_t xsize = state.range(); + const size_t ysize = xsize; + const double sigma = 7.0; // (from Butteraugli application) + ImageF in(xsize, ysize); + const float expected = xsize + ysize; + FillImage(expected, &in); + + ImageF temp(xsize, ysize); + ImageF out(xsize, ysize); + ThreadPool* null_pool = nullptr; + const auto rg = CreateRecursiveGaussian(sigma); + for (auto _ : state) { + FastGaussian(rg, in, null_pool, &temp, &out); + // Prevent optimizing out + JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) / + expected < + 9E-5); + } + state.SetItemsProcessed(xsize * ysize * state.iterations()); +} + +void BM_GaussBlurFir(benchmark::State& state) { + // See GaussBlur1d for SIMD changes. + + const size_t xsize = state.range(); + const size_t ysize = xsize; + const double sigma = 7.0; // (from Butteraugli application) + ImageF in(xsize, ysize); + const float expected = xsize + ysize; + FillImage(expected, &in); + + ImageF temp(xsize, ysize); + ImageF out(xsize, ysize); + const std::vector kernel = + GaussianKernel(static_cast(4 * sigma), static_cast(sigma)); + for (auto _ : state) { + // Prevent optimizing out + JXL_ASSERT(std::abs(Convolve(in, kernel).ConstRow(ysize / 2)[xsize / 2] - + expected) / + expected < + 9E-5); + } + state.SetItemsProcessed(xsize * ysize * state.iterations()); +} + +void BM_GaussBlurSep7(benchmark::State& state) { + // See GaussBlur1d for SIMD changes. + + const size_t xsize = state.range(); + const size_t ysize = xsize; + ImageF in(xsize, ysize); + const float expected = xsize + ysize; + FillImage(expected, &in); + + ImageF temp(xsize, ysize); + ImageF out(xsize, ysize); + ThreadPool* null_pool = nullptr; + // Gaussian with sigma 1 + const WeightsSeparable7 weights = {{HWY_REP4(0.383103f), HWY_REP4(0.241843f), + HWY_REP4(0.060626f), HWY_REP4(0.00598f)}, + {HWY_REP4(0.383103f), HWY_REP4(0.241843f), + HWY_REP4(0.060626f), HWY_REP4(0.00598f)}}; + for (auto _ : state) { + Separable7(in, Rect(in), weights, null_pool, &out); + // Prevent optimizing out + JXL_ASSERT(std::abs(out.ConstRow(ysize / 2)[xsize / 2] - expected) / + expected < + 9E-5); + } + state.SetItemsProcessed(xsize * ysize * state.iterations()); +} + +BENCHMARK(BM_GaussBlur1d)->Range(1 << 8, 1 << 14); +BENCHMARK(BM_GaussBlur2d)->Range(1 << 7, 1 << 10); +BENCHMARK(BM_GaussBlurFir)->Range(1 << 7, 1 << 10); +BENCHMARK(BM_GaussBlurSep7)->Range(1 << 7, 1 << 10); + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/gauss_blur_test.cc b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_test.cc new file mode 100644 index 0000000000..097c1aa8df --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/gauss_blur_test.cc @@ -0,0 +1,453 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/gauss_blur.h" + +#include +#include +#include + +#include "lib/extras/time.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/convolve.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +bool NearEdge(const int64_t width, const int64_t peak) { + // When around 3*sigma from the edge, there is negligible truncation. + return peak < 10 || peak > width - 10; +} + +// Follow the curve downwards by scanning right from `peak` and verifying +// identical values at the same offset to the left. +void VerifySymmetric(const int64_t width, const int64_t peak, + const float* out) { + const double tolerance = NearEdge(width, peak) ? 0.015 : 6E-7; + for (int64_t i = 1;; ++i) { + // Stop if we passed either end of the array + if (peak - i < 0 || peak + i >= width) break; + EXPECT_GT(out[peak + i - 1] + tolerance, out[peak + i]); // descending + EXPECT_NEAR(out[peak - i], out[peak + i], tolerance); // symmetric + } +} + +void TestImpulseResponse(size_t width, size_t peak) { + const auto rg3 = CreateRecursiveGaussian(3.0); + const auto rg4 = CreateRecursiveGaussian(4.0); + const auto rg5 = CreateRecursiveGaussian(5.0); + + // Extra padding for 4x unrolling + auto in = hwy::AllocateAligned(width + 3); + memset(in.get(), 0, sizeof(float) * (width + 3)); + in[peak] = 1.0f; + + auto out3 = hwy::AllocateAligned(width + 3); + auto out4 = hwy::AllocateAligned(width + 3); + auto out5 = hwy::AllocateAligned(width + 3); + FastGaussian1D(rg3, in.get(), width, out3.get()); + FastGaussian1D(rg4, out3.get(), width, out4.get()); + FastGaussian1D(rg5, in.get(), width, out5.get()); + + VerifySymmetric(width, peak, out3.get()); + VerifySymmetric(width, peak, out4.get()); + VerifySymmetric(width, peak, out5.get()); + + // Wider kernel has flatter peak + EXPECT_LT(out5[peak] + 0.05, out3[peak]); + + // Gauss3 o Gauss4 ~= Gauss5 + const double tolerance = NearEdge(width, peak) ? 0.04 : 0.01; + for (size_t i = 0; i < width; ++i) { + EXPECT_NEAR(out4[i], out5[i], tolerance); + } +} + +void TestImpulseResponseForWidth(size_t width) { + for (size_t i = 0; i < width; ++i) { + TestImpulseResponse(width, i); + } +} + +TEST(GaussBlurTest, ImpulseResponse) { + TestImpulseResponseForWidth(10); // tiny even + TestImpulseResponseForWidth(15); // small odd + TestImpulseResponseForWidth(32); // power of two + TestImpulseResponseForWidth(31); // power of two - 1 + TestImpulseResponseForWidth(33); // power of two + 1 +} + +ImageF Convolve(const ImageF& in, const std::vector& kernel) { + return ConvolveAndSample(in, kernel, 1); +} + +// Higher-precision version for accuracy test. +ImageF ConvolveAndTransposeF64(const ImageF& in, + const std::vector& kernel) { + JXL_ASSERT(kernel.size() % 2 == 1); + ImageF out(in.ysize(), in.xsize()); + const int r = kernel.size() / 2; + std::vector row_tmp(in.xsize() + 2 * r); + float* const JXL_RESTRICT rowp = &row_tmp[r]; + const double* const kernelp = &kernel[r]; + for (size_t y = 0; y < in.ysize(); ++y) { + ExtrapolateBorders(in.Row(y), rowp, in.xsize(), r); + for (size_t x = 0, ox = 0; x < in.xsize(); ++x, ++ox) { + double sum = 0.0; + for (int i = -r; i <= r; ++i) { + sum += rowp[std::max( + 0, std::min(static_cast(x) + i, in.xsize()))] * + kernelp[i]; + } + out.Row(ox)[y] = static_cast(sum); + } + } + return out; +} + +ImageF ConvolveF64(const ImageF& in, const std::vector& kernel) { + ImageF tmp = ConvolveAndTransposeF64(in, kernel); + return ConvolveAndTransposeF64(tmp, kernel); +} + +void TestDirac2D(size_t xsize, size_t ysize, double sigma) { + ImageF in(xsize, ysize); + ZeroFillImage(&in); + // We anyway ignore the border below, so might as well choose the middle. + in.Row(ysize / 2)[xsize / 2] = 1.0f; + + ImageF temp(xsize, ysize); + ImageF out(xsize, ysize); + const auto rg = CreateRecursiveGaussian(sigma); + ThreadPool* null_pool = nullptr; + FastGaussian(rg, in, null_pool, &temp, &out); + + const std::vector kernel = + GaussianKernel(static_cast(4 * sigma), static_cast(sigma)); + const ImageF expected = Convolve(in, kernel); + + const double max_l1 = sigma < 1.5 ? 5E-3 : 6E-4; + const size_t border = 2 * sigma; + + JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, 1E-8, _, border)); +} + +TEST(GaussBlurTest, Test2D) { + const std::vector dimensions{6, 15, 17, 64, 50, 49}; + for (int xsize : dimensions) { + for (int ysize : dimensions) { + for (double sigma : {1.0, 2.5, 3.6, 7.0}) { + TestDirac2D(static_cast(xsize), static_cast(ysize), + sigma); + } + } + } +} + +// Slow (44 sec). To run, remove the disabled prefix. +TEST(GaussBlurTest, DISABLED_SlowTestDirac1D) { + const double sigma = 7.0; + const auto rg = CreateRecursiveGaussian(sigma); + + // IPOL accuracy test uses 10^-15 tolerance, this is 2*10^-11. + const size_t radius = static_cast(7 * sigma); + const std::vector kernel = GaussianKernel(radius, sigma); + + const size_t length = 16384; + ImageF inputs(length, 1); + ZeroFillImage(&inputs); + + auto outputs = hwy::AllocateAligned(length); + + // One per center position + auto sum_abs_err = hwy::AllocateAligned(length); + std::fill(sum_abs_err.get(), sum_abs_err.get() + length, 0.0); + + for (size_t center = radius; center < length - radius; ++center) { + inputs.Row(0)[center - 1] = 0.0f; // reset last peak, entire array now 0 + inputs.Row(0)[center] = 1.0f; + FastGaussian1D(rg, inputs.Row(0), length, outputs.get()); + + const ImageF outputs_fir = ConvolveF64(inputs, kernel); + + for (size_t i = 0; i < length; ++i) { + const float abs_err = std::abs(outputs[i] - outputs_fir.Row(0)[i]); + sum_abs_err[i] += static_cast(abs_err); + } + } + + const double max_abs_err = + *std::max_element(sum_abs_err.get(), sum_abs_err.get() + length); + printf("Max abs err: %.8e\n", max_abs_err); +} + +void TestRandom(size_t xsize, size_t ysize, float min, float max, double sigma, + double max_l1, double max_rel) { + printf("%4" PRIuS " x %4" PRIuS " %4.1f %4.1f sigma %.1f\n", xsize, ysize, + min, max, sigma); + ImageF in(xsize, ysize); + RandomFillImage(&in, min, max, 65537 + xsize * 129 + ysize); + // FastGaussian/Convolve handle borders differently, so keep those pixels 0. + const size_t border = 4 * sigma; + SetBorder(border, 0.0f, &in); + + ImageF temp(xsize, ysize); + ImageF out(xsize, ysize); + const auto rg = CreateRecursiveGaussian(sigma); + ThreadPool* null_pool = nullptr; + FastGaussian(rg, in, null_pool, &temp, &out); + + const std::vector kernel = + GaussianKernel(static_cast(4 * sigma), static_cast(sigma)); + const ImageF expected = Convolve(in, kernel); + + JXL_ASSERT_OK(VerifyRelativeError(expected, out, max_l1, max_rel, _, border)); +} + +void TestRandomForSizes(float min, float max, double sigma) { + double max_l1 = 6E-3; + double max_rel = 3E-3; + TestRandom(128, 1, min, max, sigma, max_l1, max_rel); + TestRandom(1, 128, min, max, sigma, max_l1, max_rel); + TestRandom(30, 201, min, max, sigma, max_l1 * 1.6, max_rel * 1.2); + TestRandom(201, 30, min, max, sigma, max_l1 * 1.6, max_rel * 1.2); + TestRandom(201, 201, min, max, sigma, max_l1 * 2.0, max_rel * 1.2); +} + +TEST(GaussBlurTest, TestRandom) { + // small non-negative + TestRandomForSizes(0.0f, 10.0f, 3.0f); + TestRandomForSizes(0.0f, 10.0f, 7.0f); + + // small negative + TestRandomForSizes(-4.0f, -1.0f, 3.0f); + TestRandomForSizes(-4.0f, -1.0f, 7.0f); + + // mixed positive/negative + TestRandomForSizes(-6.0f, 6.0f, 3.0f); + TestRandomForSizes(-6.0f, 6.0f, 7.0f); +} + +TEST(GaussBlurTest, TestSign) { + const size_t xsize = 500; + const size_t ysize = 606; + ImageF in(xsize, ysize); + + ZeroFillImage(&in); + const float center[33 * 33] = { + -0.128445f, -0.098473f, -0.121883f, -0.093601f, 0.095665f, -0.271332f, + -0.705475f, -1.324005f, -2.020741f, -1.329464f, 1.834064f, 4.787300f, + 5.834560f, 5.272720f, 3.967960f, 3.547935f, 3.432732f, 3.383015f, + 3.239326f, 3.290806f, 3.298954f, 3.397808f, 3.359730f, 3.533844f, + 3.511856f, 3.436787f, 3.428310f, 3.460209f, 3.550011f, 3.590942f, + 3.593109f, 3.560005f, 3.443165f, 0.089741f, 0.179230f, -0.032997f, + -0.182610f, 0.005669f, -0.244759f, -0.395123f, -0.514961f, -1.003529f, + -1.798656f, -2.377975f, 0.222191f, 3.957664f, 5.946804f, 5.543129f, + 4.290096f, 3.621010f, 3.407257f, 3.392494f, 3.345367f, 3.391903f, + 3.441605f, 3.429260f, 3.444969f, 3.507130f, 3.518612f, 3.443111f, + 3.475948f, 3.536148f, 3.470333f, 3.628311f, 3.600243f, 3.292892f, + -0.226730f, -0.573616f, -0.762165f, -0.398739f, -0.189842f, -0.275921f, + -0.446739f, -0.550037f, -0.461033f, -0.724792f, -1.448349f, -1.814064f, + -0.491032f, 2.817703f, 5.213242f, 5.675629f, 4.864548f, 3.876324f, + 3.535587f, 3.530312f, 3.413765f, 3.386261f, 3.404854f, 3.383472f, + 3.420830f, 3.326496f, 3.257877f, 3.362152f, 3.489609f, 3.619587f, + 3.555805f, 3.423164f, 3.309708f, -0.483940f, -0.502926f, -0.592983f, + -0.492527f, -0.413616f, -0.482555f, -0.475506f, -0.447990f, -0.338120f, + -0.189072f, -0.376427f, -0.910828f, -1.878044f, -1.937927f, 1.423218f, + 4.871609f, 5.767548f, 5.103741f, 3.983868f, 3.633003f, 3.458263f, + 3.507309f, 3.247021f, 3.220612f, 3.326061f, 3.352814f, 3.291061f, + 3.322739f, 3.444302f, 3.506207f, 3.556839f, 3.529575f, 3.457024f, + -0.408161f, -0.431343f, -0.454369f, -0.356419f, -0.380924f, -0.399452f, + -0.439476f, -0.412189f, -0.306816f, -0.008213f, -0.325813f, -0.537842f, + -0.984100f, -1.805332f, -2.028198f, 0.773205f, 4.423046f, 5.604839f, + 5.231617f, 4.080299f, 3.603008f, 3.498741f, 3.517010f, 3.333897f, + 3.381336f, 3.342617f, 3.369686f, 3.434155f, 3.490452f, 3.607029f, + 3.555298f, 3.702297f, 3.618679f, -0.503609f, -0.578564f, -0.419014f, + -0.239883f, 0.269836f, 0.022984f, -0.455067f, -0.621777f, -0.304176f, + -0.163792f, -0.490250f, -0.466637f, -0.391792f, -0.657940f, -1.498035f, + -1.895836f, 0.036537f, 3.462456f, 5.586445f, 5.658791f, 4.434784f, + 3.423435f, 3.318848f, 3.202328f, 3.532764f, 3.436687f, 3.354881f, + 3.356941f, 3.382645f, 3.503902f, 3.512867f, 3.632366f, 3.537312f, + -0.274734f, -0.658829f, -0.726532f, -0.281254f, 0.053196f, -0.064991f, + -0.608517f, -0.720966f, -0.070602f, -0.111320f, -0.440956f, -0.492180f, + -0.488762f, -0.569283f, -1.012741f, -1.582779f, -2.101479f, -1.392380f, + 2.451153f, 5.555855f, 6.096313f, 5.230045f, 4.068172f, 3.404274f, + 3.392586f, 3.326065f, 3.156670f, 3.284828f, 3.347012f, 3.319252f, + 3.352310f, 3.610790f, 3.499847f, -0.150600f, -0.314445f, -0.093575f, + -0.057384f, 0.053688f, -0.189255f, -0.263515f, -0.318653f, 0.053246f, + 0.080627f, -0.119553f, -0.152454f, -0.305420f, -0.404869f, -0.385944f, + -0.689949f, -1.204914f, -1.985748f, -1.711361f, 1.260658f, 4.626896f, + 5.888351f, 5.450989f, 4.070587f, 3.539200f, 3.383492f, 3.296318f, + 3.267334f, 3.436028f, 3.463005f, 3.502625f, 3.522282f, 3.403763f, + -0.348049f, -0.302303f, -0.137016f, -0.041737f, -0.164001f, -0.358849f, + -0.469627f, -0.428291f, -0.375797f, -0.246346f, -0.118950f, -0.084229f, + -0.205681f, -0.241199f, -0.391796f, -0.323151f, -0.241211f, -0.834137f, + -1.684219f, -1.972137f, 0.448399f, 4.019985f, 5.648144f, 5.647846f, + 4.295094f, 3.641884f, 3.374790f, 3.197342f, 3.425545f, 3.507481f, + 3.478065f, 3.430889f, 3.341900f, -1.016304f, -0.959221f, -0.909466f, + -0.810715f, -0.590729f, -0.594467f, -0.646721f, -0.629364f, -0.528561f, + -0.551819f, -0.301086f, -0.149101f, -0.060146f, -0.162220f, -0.326210f, + -0.156548f, -0.036293f, -0.426098f, -1.145470f, -1.628998f, -2.003052f, + -1.142891f, 2.885162f, 5.652863f, 5.718426f, 4.911140f, 3.234222f, + 3.473373f, 3.577183f, 3.271603f, 3.410435f, 3.505489f, 3.434032f, + -0.508911f, -0.438797f, -0.437450f, -0.627426f, -0.511745f, -0.304874f, + -0.274246f, -0.261841f, -0.228466f, -0.342491f, -0.528206f, -0.490082f, + -0.516350f, -0.361694f, -0.398514f, -0.276020f, -0.210369f, -0.355938f, + -0.402622f, -0.538864f, -1.249573f, -2.100105f, -0.996178f, 1.886410f, + 4.929745f, 5.630871f, 5.444199f, 4.042740f, 3.739189f, 3.691399f, + 3.391956f, 3.469696f, 3.431232f, 0.204849f, 0.205433f, -0.131927f, + -0.367908f, -0.374378f, -0.126820f, -0.186951f, -0.228565f, -0.081776f, + -0.143143f, -0.379230f, -0.598701f, -0.458019f, -0.295586f, -0.407730f, + -0.245853f, -0.043140f, 0.024242f, -0.038998f, -0.044151f, -0.425991f, + -1.240753f, -1.943146f, -2.174755f, 0.523415f, 4.376751f, 5.956558f, + 5.850082f, 4.403152f, 3.517399f, 3.560753f, 3.554836f, 3.471985f, + -0.508503f, -0.109783f, 0.057747f, 0.190079f, -0.257153f, -0.591980f, + -0.666771f, -0.525391f, -0.293060f, -0.489731f, -0.304855f, -0.259644f, + -0.367825f, -0.346977f, -0.292889f, -0.215652f, -0.120705f, -0.176010f, + -0.422905f, -0.114647f, -0.289749f, -0.374203f, -0.606754f, -1.127949f, + -1.994583f, -0.588058f, 3.415840f, 5.603470f, 5.811581f, 4.959423f, + 3.721760f, 3.710499f, 3.785461f, -0.554588f, -0.565517f, -0.434578f, + -0.012482f, -0.284660f, -0.699795f, -0.957535f, -0.755135f, -0.382034f, + -0.321552f, -0.287571f, -0.279537f, -0.314972f, -0.256287f, -0.372818f, + -0.316017f, -0.287975f, -0.365639f, -0.512589f, -0.420692f, -0.436485f, + -0.295353f, -0.451958f, -0.755459f, -1.272358f, -2.301353f, -1.776161f, + 1.572483f, 4.826286f, 5.741898f, 5.162853f, 4.028049f, 3.686325f, + -0.495590f, -0.664413f, -0.760044f, -0.152634f, -0.286480f, -0.340462f, + 0.076477f, 0.187706f, -0.068787f, -0.293491f, -0.361145f, -0.292515f, + -0.140671f, -0.190723f, -0.333302f, -0.368168f, -0.192581f, -0.154499f, + -0.236544f, -0.124405f, -0.208321f, -0.465607f, -0.883080f, -1.104813f, + -1.210567f, -1.415665f, -1.924683f, -1.634758f, 0.601017f, 4.276672f, + 5.501350f, 5.331257f, 3.809288f, -0.727722f, -0.533619f, -0.511524f, + -0.470688f, -0.610710f, -0.575130f, -0.311115f, -0.090420f, -0.297676f, + -0.646118f, -0.742805f, -0.485050f, -0.330910f, -0.275417f, -0.357037f, + -0.425598f, -0.481876f, -0.488941f, -0.393551f, -0.051105f, -0.090755f, + -0.328674f, -0.536369f, -0.533684f, -0.336960f, -0.689194f, -1.187195f, + -1.860954f, -2.290253f, -0.424774f, 3.050060f, 5.083332f, 5.291920f, + -0.343605f, -0.190975f, -0.303692f, -0.456512f, -0.681820f, -0.690693f, + -0.416729f, -0.286446f, -0.442055f, -0.709148f, -0.569160f, -0.382423f, + -0.402321f, -0.383362f, -0.366413f, -0.290718f, -0.110069f, -0.220280f, + -0.279018f, -0.255424f, -0.262081f, -0.487556f, -0.444492f, -0.250500f, + -0.119583f, -0.291557f, -0.537781f, -1.104073f, -1.737091f, -1.697441f, + -0.323456f, 2.042049f, 4.605103f, -0.310631f, -0.279568f, -0.012695f, + -0.160130f, -0.358746f, -0.421101f, -0.559677f, -0.474136f, -0.416565f, + -0.561817f, -0.534672f, -0.519157f, -0.767197f, -0.605831f, -0.186523f, + 0.219872f, 0.264984f, -0.193432f, -0.363182f, -0.467472f, -0.462009f, + -0.571053f, -0.522476f, -0.315903f, -0.237427f, -0.147320f, -0.100201f, + -0.237568f, -0.763435f, -1.242043f, -2.135159f, -1.409485f, 1.236370f, + -0.474247f, -0.517906f, -0.410217f, -0.542244f, -0.795986f, -0.590004f, + -0.388863f, -0.462921f, -0.810627f, -0.778637f, -0.512486f, -0.718025f, + -0.710854f, -0.482513f, -0.318233f, -0.194962f, -0.220116f, -0.421673f, + -0.534233f, -0.403339f, -0.389332f, -0.407303f, -0.437355f, -0.469730f, + -0.359600f, -0.352745f, -0.466755f, -0.414585f, -0.430756f, -0.656822f, + -1.237038f, -2.046097f, -1.574898f, -0.593815f, -0.582165f, -0.336098f, + -0.372612f, -0.554386f, -0.410603f, -0.428276f, -0.647644f, -0.640720f, + -0.582207f, -0.414112f, -0.435547f, -0.435505f, -0.332561f, -0.248116f, + -0.340221f, -0.277855f, -0.352699f, -0.377319f, -0.230850f, -0.313267f, + -0.446270f, -0.346237f, -0.420422f, -0.530781f, -0.400341f, -0.463661f, + -0.209091f, -0.056705f, -0.011772f, -0.169388f, -0.736275f, -1.463017f, + -0.752701f, -0.668865f, -0.329765f, -0.299347f, -0.245667f, -0.286999f, + -0.520420f, -0.675438f, -0.255753f, 0.141357f, -0.079639f, -0.419476f, + -0.374069f, -0.046253f, 0.116116f, -0.145847f, -0.380371f, -0.563412f, + -0.638634f, -0.310116f, -0.260914f, -0.508404f, -0.465508f, -0.527824f, + -0.370979f, -0.305595f, -0.244694f, -0.254490f, 0.009968f, -0.050201f, + -0.331219f, -0.614960f, -0.788208f, -0.483242f, -0.367516f, -0.186951f, + -0.180031f, 0.129711f, -0.127811f, -0.384750f, -0.499542f, -0.418613f, + -0.121635f, 0.203197f, -0.167290f, -0.397270f, -0.355461f, -0.218746f, + -0.376785f, -0.521698f, -0.721581f, -0.845741f, -0.535439f, -0.220882f, + -0.309067f, -0.555248f, -0.690342f, -0.664948f, -0.390102f, 0.020355f, + -0.130447f, -0.173252f, -0.170059f, -0.633663f, -0.956001f, -0.621696f, + -0.388302f, -0.342262f, -0.244370f, -0.386948f, -0.401421f, -0.172979f, + -0.206163f, -0.450058f, -0.525789f, -0.549274f, -0.349251f, -0.474613f, + -0.667976f, -0.435600f, -0.175369f, -0.196877f, -0.202976f, -0.242481f, + -0.258369f, -0.189133f, -0.395397f, -0.765499f, -0.944016f, -0.850967f, + -0.631561f, -0.152493f, -0.046432f, -0.262066f, -0.195919f, 0.048218f, + 0.084972f, 0.039902f, 0.000618f, -0.404430f, -0.447456f, -0.418076f, + -0.631935f, -0.717415f, -0.502888f, -0.530514f, -0.747826f, -0.704041f, + -0.674969f, -0.516853f, -0.418446f, -0.327740f, -0.308815f, -0.481636f, + -0.440083f, -0.481720f, -0.341053f, -0.283897f, -0.324368f, -0.352829f, + -0.434349f, -0.545589f, -0.533104f, -0.472755f, -0.570496f, -0.557735f, + -0.708176f, -0.493332f, -0.194416f, -0.186249f, -0.256710f, -0.271835f, + -0.304752f, -0.431267f, -0.422398f, -0.646725f, -0.680801f, -0.249031f, + -0.058567f, -0.213890f, -0.383949f, -0.540291f, -0.549877f, -0.225567f, + -0.037174f, -0.499874f, -0.641010f, -0.628044f, -0.390549f, -0.311497f, + -0.542313f, -0.569565f, -0.473408f, -0.331245f, -0.357197f, -0.285599f, + -0.200157f, -0.201866f, -0.124428f, -0.346016f, -0.392311f, -0.264496f, + -0.285370f, -0.436974f, -0.523483f, -0.410461f, -0.267925f, -0.055016f, + -0.382458f, -0.319771f, -0.049927f, 0.124329f, 0.266102f, -0.106606f, + -0.773647f, -0.973053f, -0.708206f, -0.486137f, -0.319923f, -0.493900f, + -0.490860f, -0.324986f, -0.147346f, -0.146088f, -0.161758f, -0.084396f, + -0.379494f, 0.041626f, -0.113361f, -0.277767f, 0.083366f, 0.126476f, + 0.139057f, 0.038040f, 0.038162f, -0.242126f, -0.411736f, -0.370049f, + -0.455357f, -0.039257f, 0.264442f, -0.271492f, -0.425346f, -0.514847f, + -0.448650f, -0.580399f, -0.652603f, -0.774803f, -0.692524f, -0.579578f, + -0.465206f, -0.386265f, -0.458012f, -0.446594f, -0.284893f, -0.345448f, + -0.350876f, -0.440350f, -0.360378f, -0.270428f, 0.237213f, -0.063602f, + -0.364529f, -0.179867f, 0.078197f, 0.117947f, -0.093410f, -0.359119f, + -0.480961f, -0.540638f, -0.436287f, -0.598576f, -0.253735f, -0.060093f, + -0.549145f, -0.808327f, -0.698593f, -0.595764f, -0.582508f, -0.497353f, + -0.480892f, -0.584240f, -0.665791f, -0.690903f, -0.743446f, -0.796677f, + -0.782391f, -0.649010f, -0.628139f, -0.880848f, -0.829361f, -0.373272f, + -0.223667f, 0.174572f, -0.348743f, -0.798901f, -0.692307f, -0.607609f, + -0.401455f, -0.480919f, -0.450798f, -0.435413f, -0.322338f, -0.228382f, + -0.450466f, -0.504440f, -0.477402f, -0.662224f, -0.583397f, -0.217445f, + -0.157459f, -0.079584f, -0.226168f, -0.488720f, -0.669624f, -0.666878f, + -0.565311f, -0.549625f, -0.364601f, -0.497627f, -0.736897f, -0.763023f, + -0.741020f, -0.404503f, 0.184814f, -0.075315f, -0.281513f, -0.532906f, + -0.405800f, -0.313438f, -0.536652f, -0.403381f, 0.011967f, 0.103310f, + -0.269848f, -0.508656f, -0.445923f, -0.644859f, -0.617870f, -0.500927f, + -0.371559f, -0.125580f, 0.028625f, -0.154713f, -0.442024f, -0.492764f, + -0.199371f, 0.236305f, 0.225925f, 0.075577f, -0.285812f, -0.437145f, + -0.374260f, -0.156693f, -0.129635f, -0.243206f, -0.123058f, 0.162148f, + -0.313152f, -0.337982f, -0.358421f, 0.040070f, 0.038925f, -0.333313f, + -0.351662f, 0.023014f, 0.091362f, -0.282890f, -0.373253f, -0.389050f, + -0.532707f, -0.423347f, -0.349968f, -0.287045f, -0.202442f, -0.308430f, + -0.222801f, -0.106323f, -0.056358f, 0.027222f, 0.390732f, 0.033558f, + -0.160088f, -0.382217f, -0.535282f, -0.515900f, -0.022736f, 0.165665f, + -0.111408f, -0.233784f, -0.312357f, -0.541885f, -0.480022f, -0.482513f, + -0.246254f, 0.132244f, 0.090134f, 0.234634f, -0.089249f, -0.460854f, + -0.515457f, -0.450874f, -0.311031f, -0.387680f, -0.360554f, -0.179241f, + -0.283817f, -0.475815f, -0.246399f, -0.388958f, -0.551140f, -0.496239f, + -0.559879f, -0.379761f, -0.254288f, -0.395111f, -0.613018f, -0.459427f, + -0.263580f, -0.268929f, 0.080826f, 0.115616f, -0.097324f, -0.325310f, + -0.480450f, -0.313286f, -0.310371f, -0.517361f, -0.288288f, -0.112679f, + -0.173241f, -0.221664f, -0.039452f, -0.107578f, -0.089630f, -0.483768f, + -0.571087f, -0.497108f, -0.321533f, -0.375492f, -0.540363f, -0.406815f, + -0.388512f, -0.514561f, -0.540192f, -0.402412f, -0.232246f, -0.304749f, + -0.383724f, -0.679596f, -0.685463f, -0.694538f, -0.642937f, -0.425789f, + 0.103271f, -0.194862f, -0.487999f, -0.717281f, -0.681850f, -0.709286f, + -0.615398f, -0.554245f, -0.254681f, -0.049950f, -0.002914f, -0.095383f, + -0.370911f, -0.564224f, -0.242714f}; + const size_t xtest = xsize / 2; + const size_t ytest = ysize / 2; + + for (intptr_t dy = -16; dy <= 16; ++dy) { + float* row = in.Row(ytest + dy); + for (intptr_t dx = -16; dx <= 16; ++dx) + row[xtest + dx] = center[(dy + 16) * 33 + (dx + 16)]; + } + + const double sigma = 7.155933; + + ImageF temp(xsize, ysize); + ImageF out_rg(xsize, ysize); + const auto rg = CreateRecursiveGaussian(sigma); + ThreadPool* null_pool = nullptr; + FastGaussian(rg, in, null_pool, &temp, &out_rg); + + ImageF out_old; + { + const std::vector kernel = + GaussianKernel(static_cast(4 * sigma), static_cast(sigma)); + printf("old kernel size %" PRIuS "\n", kernel.size()); + out_old = Convolve(in, kernel); + } + + printf("rg %.4f old %.4f\n", out_rg.Row(ytest)[xtest], + out_old.Row(ytest)[xtest]); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/gradient_test.cc b/third-party/libjxl/libjxl/lib/jxl/gradient_test.cc new file mode 100644 index 0000000000..282fe89f0a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/gradient_test.cc @@ -0,0 +1,207 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +struct AuxOut; + +namespace { + +// Returns distance of point p to line p0..p1, the result is signed and is not +// normalized. +double PointLineDist(double x0, double y0, double x1, double y1, double x, + double y) { + return (y1 - y0) * x - (x1 - x0) * y + x1 * y0 - y1 * x0; +} + +// Generates a test image with a gradient from one color to another. +// Angle in degrees, colors can be given in hex as 0xRRGGBB. The angle is the +// angle in which the change direction happens. +Image3F GenerateTestGradient(uint32_t color0, uint32_t color1, double angle, + size_t xsize, size_t ysize) { + Image3F image(xsize, ysize); + + double x0 = xsize / 2; + double y0 = ysize / 2; + double x1 = x0 + std::sin(angle / 360.0 * 2.0 * kPi); + double y1 = y0 + std::cos(angle / 360.0 * 2.0 * kPi); + + double maxdist = + std::max(fabs(PointLineDist(x0, y0, x1, y1, 0, 0)), + fabs(PointLineDist(x0, y0, x1, y1, xsize, 0))); + + for (size_t c = 0; c < 3; ++c) { + float c0 = ((color0 >> (8 * (2 - c))) & 255); + float c1 = ((color1 >> (8 * (2 - c))) & 255); + for (size_t y = 0; y < ysize; ++y) { + float* row = image.PlaneRow(c, y); + for (size_t x = 0; x < xsize; ++x) { + double dist = PointLineDist(x0, y0, x1, y1, x, y); + double v = ((dist / maxdist) + 1.0) / 2.0; + float color = c0 * (1.0 - v) + c1 * v; + row[x] = color; + } + } + } + + return image; +} + +// Computes the max of the horizontal and vertical second derivative for each +// pixel, where second derivative means absolute value of difference of left +// delta and right delta (top/bottom for vertical direction). +// The radius over which the derivative is computed is only 1 pixel and it only +// checks two angles (hor and ver), but this approximation works well enough. +static ImageF Gradient2(const ImageF& image) { + size_t xsize = image.xsize(); + size_t ysize = image.ysize(); + ImageF image2(image.xsize(), image.ysize()); + for (size_t y = 1; y + 1 < ysize; y++) { + const auto* JXL_RESTRICT row0 = image.Row(y - 1); + const auto* JXL_RESTRICT row1 = image.Row(y); + const auto* JXL_RESTRICT row2 = image.Row(y + 1); + auto* row_out = image2.Row(y); + for (size_t x = 1; x + 1 < xsize; x++) { + float ddx = (row1[x] - row1[x - 1]) - (row1[x + 1] - row1[x]); + float ddy = (row1[x] - row0[x]) - (row2[x] - row1[x]); + row_out[x] = std::max(fabsf(ddx), fabsf(ddy)); + } + } + // Copy to the borders + if (ysize > 2) { + auto* JXL_RESTRICT row0 = image2.Row(0); + const auto* JXL_RESTRICT row1 = image2.Row(1); + const auto* JXL_RESTRICT row2 = image2.Row(ysize - 2); + auto* JXL_RESTRICT row3 = image2.Row(ysize - 1); + for (size_t x = 1; x + 1 < xsize; x++) { + row0[x] = row1[x]; + row3[x] = row2[x]; + } + } else { + const auto* row0_in = image.Row(0); + const auto* row1_in = image.Row(ysize - 1); + auto* row0_out = image2.Row(0); + auto* row1_out = image2.Row(ysize - 1); + for (size_t x = 1; x + 1 < xsize; x++) { + // Image too narrow, take first derivative instead + row0_out[x] = row1_out[x] = fabsf(row0_in[x] - row1_in[x]); + } + } + if (xsize > 2) { + for (size_t y = 0; y < ysize; y++) { + auto* row = image2.Row(y); + row[0] = row[1]; + row[xsize - 1] = row[xsize - 2]; + } + } else { + for (size_t y = 0; y < ysize; y++) { + const auto* JXL_RESTRICT row_in = image.Row(y); + auto* row_out = image2.Row(y); + // Image too narrow, take first derivative instead + row_out[0] = row_out[xsize - 1] = fabsf(row_in[0] - row_in[xsize - 1]); + } + } + return image2; +} + +static Image3F Gradient2(const Image3F& image) { + return Image3F(Gradient2(image.Plane(0)), Gradient2(image.Plane(1)), + Gradient2(image.Plane(2))); +} + +/* +Tests if roundtrip with jxl on a gradient image doesn't cause banding. +Only tests if use_gradient is true. Set to false for debugging to see the +distance values. +Angle in degrees, colors can be given in hex as 0xRRGGBB. +*/ +void TestGradient(ThreadPool* pool, uint32_t color0, uint32_t color1, + size_t xsize, size_t ysize, float angle, bool fast_mode, + float butteraugli_distance, bool use_gradient = true) { + CompressParams cparams; + cparams.butteraugli_distance = butteraugli_distance; + if (fast_mode) { + cparams.speed_tier = SpeedTier::kSquirrel; + } + Image3F gradient = GenerateTestGradient(color0, color1, angle, xsize, ysize); + + CodecInOut io; + io.metadata.m.SetUintSamples(8); + io.metadata.m.color_encoding = ColorEncoding::SRGB(); + io.SetFromImage(std::move(gradient), io.metadata.m.color_encoding); + + CodecInOut io2; + + PaddedBytes compressed; + AuxOut* aux_out = nullptr; + PassesEncoderState enc_state; + EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + aux_out, pool)); + EXPECT_TRUE( + test::DecodeFile({}, Span(compressed), &io2, pool)); + EXPECT_TRUE( + io2.Main().TransformTo(io2.metadata.m.color_encoding, GetJxlCms(), pool)); + + if (use_gradient) { + // Test that the gradient map worked. For that, we take a second derivative + // of the image with Gradient2 to measure how linear the change is in x and + // y direction. For a well handled gradient, we expect max values around + // 0.1, while if there is noticeable banding, which means the gradient map + // failed, the values are around 0.5-1.0 (regardless of + // butteraugli_distance). + Image3F gradient2 = Gradient2(*io2.Main().color()); + + std::array image_max; + Image3Max(gradient2, &image_max); + + // TODO(jyrki): These values used to work with 0.2, 0.2, 0.2. + EXPECT_LE(image_max[0], 3.15); + EXPECT_LE(image_max[1], 1.72); + EXPECT_LE(image_max[2], 5.05); + } +} + +static constexpr bool fast_mode = true; + +TEST(GradientTest, SteepGradient) { + test::ThreadPoolForTests pool(8); + // Relatively steep gradients, colors from the sky of stp.png + TestGradient(&pool, 0xd99d58, 0x889ab1, 512, 512, 90, fast_mode, 3.0); +} + +TEST(GradientTest, SubtleGradient) { + test::ThreadPoolForTests pool(8); + // Very subtle gradient + TestGradient(&pool, 0xb89b7b, 0xa89b8d, 512, 512, 90, fast_mode, 4.0); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/headers.cc b/third-party/libjxl/libjxl/lib/jxl/headers.cc new file mode 100644 index 0000000000..dc53726385 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/headers.cc @@ -0,0 +1,194 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/headers.h" + +#include "lib/jxl/common.h" +#include "lib/jxl/fields.h" + +namespace jxl { +namespace { + +struct Rational { + constexpr explicit Rational(uint32_t num, uint32_t den) + : num(num), den(den) {} + + // Returns floor(multiplicand * rational). + constexpr uint32_t MulTruncate(uint32_t multiplicand) const { + return uint64_t(multiplicand) * num / den; + } + + uint32_t num; + uint32_t den; +}; + +Rational FixedAspectRatios(uint32_t ratio) { + JXL_ASSERT(0 != ratio && ratio < 8); + // Other candidates: 5/4, 7/5, 14/9, 16/10, 5/3, 21/9, 12/5 + constexpr Rational kRatios[7] = {Rational(1, 1), // square + Rational(12, 10), // + Rational(4, 3), // camera + Rational(3, 2), // mobile camera + Rational(16, 9), // camera/display + Rational(5, 4), // + Rational(2, 1)}; // + return kRatios[ratio - 1]; +} + +uint32_t FindAspectRatio(uint32_t xsize, uint32_t ysize) { + for (uint32_t r = 1; r < 8; ++r) { + if (xsize == FixedAspectRatios(r).MulTruncate(ysize)) { + return r; + } + } + return 0; // Must send xsize instead +} + +} // namespace + +size_t SizeHeader::xsize() const { + if (ratio_ != 0) { + return FixedAspectRatios(ratio_).MulTruncate( + static_cast(ysize())); + } + return small_ ? ((xsize_div8_minus_1_ + 1) * 8) : xsize_; +} + +Status SizeHeader::Set(size_t xsize64, size_t ysize64) { + if (xsize64 > 0xFFFFFFFFull || ysize64 > 0xFFFFFFFFull) { + return JXL_FAILURE("Image too large"); + } + const uint32_t xsize32 = static_cast(xsize64); + const uint32_t ysize32 = static_cast(ysize64); + if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty image"); + ratio_ = FindAspectRatio(xsize32, ysize32); + small_ = ysize64 <= 256 && (ysize64 % kBlockDim) == 0 && + (ratio_ != 0 || (xsize64 <= 256 && (xsize64 % kBlockDim) == 0)); + if (small_) { + ysize_div8_minus_1_ = ysize32 / 8 - 1; + } else { + ysize_ = ysize32; + } + + if (ratio_ == 0) { + if (small_) { + xsize_div8_minus_1_ = xsize32 / 8 - 1; + } else { + xsize_ = xsize32; + } + } + JXL_ASSERT(xsize() == xsize64); + JXL_ASSERT(ysize() == ysize64); + return true; +} + +Status PreviewHeader::Set(size_t xsize64, size_t ysize64) { + const uint32_t xsize32 = static_cast(xsize64); + const uint32_t ysize32 = static_cast(ysize64); + if (xsize64 == 0 || ysize64 == 0) return JXL_FAILURE("Empty preview"); + div8_ = (xsize64 % kBlockDim) == 0 && (ysize64 % kBlockDim) == 0; + if (div8_) { + ysize_div8_ = ysize32 / 8; + } else { + ysize_ = ysize32; + } + + ratio_ = FindAspectRatio(xsize32, ysize32); + if (ratio_ == 0) { + if (div8_) { + xsize_div8_ = xsize32 / 8; + } else { + xsize_ = xsize32; + } + } + JXL_ASSERT(xsize() == xsize64); + JXL_ASSERT(ysize() == ysize64); + return true; +} + +size_t PreviewHeader::xsize() const { + if (ratio_ != 0) { + return FixedAspectRatios(ratio_).MulTruncate( + static_cast(ysize())); + } + return div8_ ? (xsize_div8_ * 8) : xsize_; +} + +SizeHeader::SizeHeader() { Bundle::Init(this); } +Status SizeHeader::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &small_)); + + if (visitor->Conditional(small_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &ysize_div8_minus_1_)); + } + if (visitor->Conditional(!small_)) { + // (Could still be small, but non-multiple of 8.) + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1), + BitsOffset(18, 1), BitsOffset(30, 1), + 1, &ysize_)); + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_)); + if (visitor->Conditional(ratio_ == 0 && small_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, 0, &xsize_div8_minus_1_)); + } + if (visitor->Conditional(ratio_ == 0 && !small_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(9, 1), BitsOffset(13, 1), + BitsOffset(18, 1), BitsOffset(30, 1), + 1, &xsize_)); + } + + return true; +} + +PreviewHeader::PreviewHeader() { Bundle::Init(this); } +Status PreviewHeader::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &div8_)); + + if (visitor->Conditional(div8_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1), + BitsOffset(9, 33), 1, &ysize_div8_)); + } + if (visitor->Conditional(!div8_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65), + BitsOffset(10, 321), + BitsOffset(12, 1345), 1, &ysize_)); + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &ratio_)); + if (visitor->Conditional(ratio_ == 0 && div8_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), Val(32), BitsOffset(5, 1), + BitsOffset(9, 33), 1, &xsize_div8_)); + } + if (visitor->Conditional(ratio_ == 0 && !div8_)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(BitsOffset(6, 1), BitsOffset(8, 65), + BitsOffset(10, 321), + BitsOffset(12, 1345), 1, &xsize_)); + } + + return true; +} + +AnimationHeader::AnimationHeader() { Bundle::Init(this); } +Status AnimationHeader::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(100), Val(1000), BitsOffset(10, 1), + BitsOffset(30, 1), 1, &tps_numerator)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Val(1001), BitsOffset(8, 1), + BitsOffset(10, 1), 1, + &tps_denominator)); + + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), Bits(3), Bits(16), Bits(32), 0, &num_loops)); + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_timecodes)); + return true; +} + +Status ReadSizeHeader(BitReader* JXL_RESTRICT reader, + SizeHeader* JXL_RESTRICT size) { + return Bundle::Read(reader, size); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/headers.h b/third-party/libjxl/libjxl/lib/jxl/headers.h new file mode 100644 index 0000000000..3cce84dabc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/headers.h @@ -0,0 +1,97 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_HEADERS_H_ +#define LIB_JXL_HEADERS_H_ + +// Codestream headers. + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/field_encodings.h" + +namespace jxl { + +// Reserved by ISO/IEC 10918-1. LF causes files opened in text mode to be +// rejected because the marker changes to 0x0D instead. The 0xFF prefix also +// ensures there were no 7-bit transmission limitations. +static constexpr uint8_t kCodestreamMarker = 0x0A; + +// Compact representation of image dimensions (best case: 9 bits) so decoders +// can preallocate early. +class SizeHeader : public Fields { + public: + SizeHeader(); + JXL_FIELDS_NAME(SizeHeader) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + Status Set(size_t xsize, size_t ysize); + + size_t xsize() const; + size_t ysize() const { + return small_ ? ((ysize_div8_minus_1_ + 1) * 8) : ysize_; + } + + private: + bool small_; // xsize and ysize <= 256 and divisible by 8. + + uint32_t ysize_div8_minus_1_; + uint32_t ysize_; + + uint32_t ratio_; + uint32_t xsize_div8_minus_1_; + uint32_t xsize_; +}; + +// (Similar to SizeHeader but different encoding because previews are smaller) +class PreviewHeader : public Fields { + public: + PreviewHeader(); + JXL_FIELDS_NAME(PreviewHeader) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + Status Set(size_t xsize, size_t ysize); + + size_t xsize() const; + size_t ysize() const { return div8_ ? (ysize_div8_ * 8) : ysize_; } + + private: + bool div8_; // xsize and ysize divisible by 8. + + uint32_t ysize_div8_; + uint32_t ysize_; + + uint32_t ratio_; + uint32_t xsize_div8_; + uint32_t xsize_; +}; + +struct AnimationHeader : public Fields { + AnimationHeader(); + JXL_FIELDS_NAME(AnimationHeader) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // Ticks per second (expressed as rational number to support NTSC) + uint32_t tps_numerator; + uint32_t tps_denominator; + + uint32_t num_loops; // 0 means to repeat infinitely. + + bool have_timecodes; +}; + +Status ReadSizeHeader(BitReader* JXL_RESTRICT reader, + SizeHeader* JXL_RESTRICT size); + +} // namespace jxl + +#endif // LIB_JXL_HEADERS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/huffman_table.cc b/third-party/libjxl/libjxl/lib/jxl/huffman_table.cc new file mode 100644 index 0000000000..9ae7865af6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/huffman_table.cc @@ -0,0 +1,161 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/huffman_table.h" + +#include /* for memcpy */ +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/dec_huffman.h" + +namespace jxl { + +/* Returns reverse(reverse(key, len) + 1, len), where reverse(key, len) is the + bit-wise reversal of the len least significant bits of key. */ +static inline int GetNextKey(int key, int len) { + int step = 1u << (len - 1); + while (key & step) { + step >>= 1; + } + return (key & (step - 1)) + step; +} + +/* Stores code in table[0], table[step], table[2*step], ..., table[end] */ +/* Assumes that end is an integer multiple of step */ +static inline void ReplicateValue(HuffmanCode* table, int step, int end, + HuffmanCode code) { + do { + end -= step; + table[end] = code; + } while (end > 0); +} + +/* Returns the table width of the next 2nd level table. count is the histogram + of bit lengths for the remaining symbols, len is the code length of the next + processed symbol */ +static inline size_t NextTableBitSize(const uint16_t* const count, size_t len, + int root_bits) { + size_t left = 1u << (len - root_bits); + while (len < PREFIX_MAX_BITS) { + if (left <= count[len]) break; + left -= count[len]; + ++len; + left <<= 1; + } + return len - root_bits; +} + +uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits, + const uint8_t* const code_lengths, + size_t code_lengths_size, uint16_t* count) { + HuffmanCode code; /* current table entry */ + HuffmanCode* table; /* next available space in table */ + size_t len; /* current code length */ + size_t symbol; /* symbol index in original or sorted table */ + int key; /* reversed prefix code */ + int step; /* step size to replicate values in current table */ + int low; /* low bits for current root entry */ + int mask; /* mask for low bits */ + size_t table_bits; /* key length of current table */ + int table_size; /* size of current table */ + int total_size; /* sum of root table size and 2nd level table sizes */ + /* offsets in sorted table for each length */ + uint16_t offset[PREFIX_MAX_BITS + 1]; + size_t max_length = 1; + + if (code_lengths_size > 1u << PREFIX_MAX_BITS) return 0; + + /* symbols sorted by code length */ + std::vector sorted_storage(code_lengths_size); + uint16_t* sorted = sorted_storage.data(); + + /* generate offsets into sorted symbol table by code length */ + { + uint16_t sum = 0; + for (len = 1; len <= PREFIX_MAX_BITS; len++) { + offset[len] = sum; + if (count[len]) { + sum = static_cast(sum + count[len]); + max_length = len; + } + } + } + + /* sort symbols by length, by symbol order within each length */ + for (symbol = 0; symbol < code_lengths_size; symbol++) { + if (code_lengths[symbol] != 0) { + sorted[offset[code_lengths[symbol]]++] = symbol; + } + } + + table = root_table; + table_bits = root_bits; + table_size = 1u << table_bits; + total_size = table_size; + + /* special case code with only one value */ + if (offset[PREFIX_MAX_BITS] == 1) { + code.bits = 0; + code.value = static_cast(sorted[0]); + for (key = 0; key < total_size; ++key) { + table[key] = code; + } + return total_size; + } + + /* fill in root table */ + /* let's reduce the table size to a smaller size if possible, and */ + /* create the repetitions by memcpy if possible in the coming loop */ + if (table_bits > max_length) { + table_bits = max_length; + table_size = 1u << table_bits; + } + key = 0; + symbol = 0; + code.bits = 1; + step = 2; + do { + for (; count[code.bits] != 0; --count[code.bits]) { + code.value = static_cast(sorted[symbol++]); + ReplicateValue(&table[key], step, table_size, code); + key = GetNextKey(key, code.bits); + } + step <<= 1; + } while (++code.bits <= table_bits); + + /* if root_bits != table_bits we only created one fraction of the */ + /* table, and we need to replicate it now. */ + while (total_size != table_size) { + memcpy(&table[table_size], &table[0], table_size * sizeof(table[0])); + table_size <<= 1; + } + + /* fill in 2nd level tables and add pointers to root table */ + mask = total_size - 1; + low = -1; + for (len = root_bits + 1, step = 2; len <= max_length; ++len, step <<= 1) { + for (; count[len] != 0; --count[len]) { + if ((key & mask) != low) { + table += table_size; + table_bits = NextTableBitSize(count, len, root_bits); + table_size = 1u << table_bits; + total_size += table_size; + low = key & mask; + root_table[low].bits = static_cast(table_bits + root_bits); + root_table[low].value = + static_cast((table - root_table) - low); + } + code.bits = static_cast(len - root_bits); + code.value = static_cast(sorted[symbol++]); + ReplicateValue(&table[key >> root_bits], step, table_size, code); + key = GetNextKey(key, len); + } + } + + return total_size; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/huffman_table.h b/third-party/libjxl/libjxl/lib/jxl/huffman_table.h new file mode 100644 index 0000000000..11cdb2fc45 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/huffman_table.h @@ -0,0 +1,28 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_HUFFMAN_TABLE_H_ +#define LIB_JXL_HUFFMAN_TABLE_H_ + +#include +#include + +namespace jxl { + +struct HuffmanCode { + uint8_t bits; /* number of bits used for this symbol */ + uint16_t value; /* symbol value or table offset */ +}; + +/* Builds Huffman lookup table assuming code lengths are in symbol order. */ +/* Returns 0 in case of error (invalid tree or memory error), otherwise + populated size of table. */ +uint32_t BuildHuffmanTable(HuffmanCode* root_table, int root_bits, + const uint8_t* code_lengths, + size_t code_lengths_size, uint16_t* count); + +} // namespace jxl + +#endif // LIB_JXL_HUFFMAN_TABLE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/iaca_test.cc b/third-party/libjxl/libjxl/lib/jxl/iaca_test.cc new file mode 100644 index 0000000000..e25d9316d5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/iaca_test.cc @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/iaca.h" + +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(IacaTest, MarkersDefaultToDisabledAndDoNotCrash) { + BeginIACA(); + EndIACA(); +} + +TEST(IacaTest, ScopeDefaultToDisabledAndDoNotCrash) { ScopeIACA iaca; } + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec.cc b/third-party/libjxl/libjxl/lib/jxl/icc_codec.cc new file mode 100644 index 0000000000..f367461c0f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec.cc @@ -0,0 +1,389 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/icc_codec.h" + +#include + +#include +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/icc_codec_common.h" + +namespace jxl { +namespace { + +// Shuffles or interleaves bytes, for example with width 2, turns "ABCDabcd" +// into "AaBbCcDc". Transposes a matrix of ceil(size / width) columns and +// width rows. There are size elements, size may be < width * height, if so the +// last elements of the rightmost column are missing, the missing spots are +// transposed along with the filled spots, and the result has the missing +// elements at the end of the bottom row. The input is the input matrix in +// scanline order but with missing elements skipped (which may occur in multiple +// locations), the output is the result matrix in scanline order (with +// no need to skip missing elements as they are past the end of the data). +void Shuffle(uint8_t* data, size_t size, size_t width) { + size_t height = (size + width - 1) / width; // amount of rows of output + PaddedBytes result(size); + // i = output index, j input index + size_t s = 0, j = 0; + for (size_t i = 0; i < size; i++) { + result[i] = data[j]; + j += height; + if (j >= size) j = ++s; + } + + for (size_t i = 0; i < size; i++) { + data[i] = result[i]; + } +} + +// TODO(eustas): should be 20, or even 18, once DecodeVarInt is improved; +// currently DecodeVarInt does not signal the errors, and marks +// 11 bytes as used even if only 10 are used (and 9 is enough for +// 63-bit values). +constexpr const size_t kPreambleSize = 22; // enough for reading 2 VarInts + +} // namespace + +// Mimics the beginning of UnpredictICC for quick validity check. +// At least kPreambleSize bytes of data should be valid at invocation time. +Status CheckPreamble(const PaddedBytes& data, size_t enc_size, + size_t output_limit) { + const uint8_t* enc = data.data(); + size_t size = data.size(); + size_t pos = 0; + uint64_t osize = DecodeVarInt(enc, size, &pos); + JXL_RETURN_IF_ERROR(CheckIs32Bit(osize)); + if (pos >= size) return JXL_FAILURE("Out of bounds"); + uint64_t csize = DecodeVarInt(enc, size, &pos); + JXL_RETURN_IF_ERROR(CheckIs32Bit(csize)); + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size)); + // We expect that UnpredictICC inflates input, not the other way round. + if (osize + 65536 < enc_size) return JXL_FAILURE("Malformed ICC"); + if (output_limit && osize > output_limit) { + return JXL_FAILURE("Decoded ICC is too large"); + } + return true; +} + +// Decodes the result of PredictICC back to a valid ICC profile. +Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result) { + if (!result->empty()) return JXL_FAILURE("result must be empty initially"); + size_t pos = 0; + // TODO(lode): technically speaking we need to check that the entire varint + // decoding never goes out of bounds, not just the first byte. This requires + // a DecodeVarInt function that returns an error code. It is safe to use + // DecodeVarInt with out of bounds values, it silently returns, but the + // specification requires an error. Idem for all DecodeVarInt below. + if (pos >= size) return JXL_FAILURE("Out of bounds"); + uint64_t osize = DecodeVarInt(enc, size, &pos); // Output size + JXL_RETURN_IF_ERROR(CheckIs32Bit(osize)); + if (pos >= size) return JXL_FAILURE("Out of bounds"); + uint64_t csize = DecodeVarInt(enc, size, &pos); // Commands size + // Every command is translated to at least on byte. + JXL_RETURN_IF_ERROR(CheckIs32Bit(csize)); + size_t cpos = pos; // pos in commands stream + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, csize, size)); + size_t commands_end = cpos + csize; + pos = commands_end; // pos in data stream + + // Header + PaddedBytes header = ICCInitialHeaderPrediction(); + EncodeUint32(0, osize, &header); + for (size_t i = 0; i <= kICCHeaderSize; i++) { + if (result->size() == osize) { + if (cpos != commands_end) return JXL_FAILURE("Not all commands used"); + if (pos != size) return JXL_FAILURE("Not all data used"); + return true; // Valid end + } + if (i == kICCHeaderSize) break; // Done + ICCPredictHeader(result->data(), result->size(), header.data(), i); + if (pos >= size) return JXL_FAILURE("Out of bounds"); + result->push_back(enc[pos++] + header[i]); + } + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + + // Tag list + uint64_t numtags = DecodeVarInt(enc, size, &cpos); + + if (numtags != 0) { + numtags--; + JXL_RETURN_IF_ERROR(CheckIs32Bit(numtags)); + AppendUint32(numtags, result); + uint64_t prevtagstart = kICCHeaderSize + numtags * 12; + uint64_t prevtagsize = 0; + for (;;) { + if (result->size() > osize) return JXL_FAILURE("Invalid result size"); + if (cpos > commands_end) return JXL_FAILURE("Out of bounds"); + if (cpos == commands_end) break; // Valid end + uint8_t command = enc[cpos++]; + uint8_t tagcode = command & 63; + Tag tag; + if (tagcode == 0) { + break; + } else if (tagcode == kCommandTagUnknown) { + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 4, size)); + tag = DecodeKeyword(enc, size, pos); + pos += 4; + } else if (tagcode == kCommandTagTRC) { + tag = kRtrcTag; + } else if (tagcode == kCommandTagXYZ) { + tag = kRxyzTag; + } else { + if (tagcode - kCommandTagStringFirst >= kNumTagStrings) { + return JXL_FAILURE("Unknown tagcode"); + } + tag = *kTagStrings[tagcode - kCommandTagStringFirst]; + } + AppendKeyword(tag, result); + + uint64_t tagstart; + uint64_t tagsize = prevtagsize; + if (tag == kRxyzTag || tag == kGxyzTag || tag == kBxyzTag || + tag == kKxyzTag || tag == kWtptTag || tag == kBkptTag || + tag == kLumiTag) { + tagsize = 20; + } + + if (command & kFlagBitOffset) { + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + tagstart = DecodeVarInt(enc, size, &cpos); + } else { + JXL_RETURN_IF_ERROR(CheckIs32Bit(prevtagstart)); + tagstart = prevtagstart + prevtagsize; + } + JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart)); + AppendUint32(tagstart, result); + if (command & kFlagBitSize) { + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + tagsize = DecodeVarInt(enc, size, &cpos); + } + JXL_RETURN_IF_ERROR(CheckIs32Bit(tagsize)); + AppendUint32(tagsize, result); + prevtagstart = tagstart; + prevtagsize = tagsize; + + if (tagcode == kCommandTagTRC) { + AppendKeyword(kGtrcTag, result); + AppendUint32(tagstart, result); + AppendUint32(tagsize, result); + AppendKeyword(kBtrcTag, result); + AppendUint32(tagstart, result); + AppendUint32(tagsize, result); + } + + if (tagcode == kCommandTagXYZ) { + JXL_RETURN_IF_ERROR(CheckIs32Bit(tagstart + tagsize * 2)); + AppendKeyword(kGxyzTag, result); + AppendUint32(tagstart + tagsize, result); + AppendUint32(tagsize, result); + AppendKeyword(kBxyzTag, result); + AppendUint32(tagstart + tagsize * 2, result); + AppendUint32(tagsize, result); + } + } + } + + // Main Content + for (;;) { + if (result->size() > osize) return JXL_FAILURE("Invalid result size"); + if (cpos > commands_end) return JXL_FAILURE("Out of bounds"); + if (cpos == commands_end) break; // Valid end + uint8_t command = enc[cpos++]; + if (command == kCommandInsert) { + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + uint64_t num = DecodeVarInt(enc, size, &cpos); + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size)); + for (size_t i = 0; i < num; i++) { + result->push_back(enc[pos++]); + } + } else if (command == kCommandShuffle2 || command == kCommandShuffle4) { + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + uint64_t num = DecodeVarInt(enc, size, &cpos); + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size)); + PaddedBytes shuffled(num); + for (size_t i = 0; i < num; i++) { + shuffled[i] = enc[pos + i]; + } + if (command == kCommandShuffle2) { + Shuffle(shuffled.data(), num, 2); + } else if (command == kCommandShuffle4) { + Shuffle(shuffled.data(), num, 4); + } + for (size_t i = 0; i < num; i++) { + result->push_back(shuffled[i]); + pos++; + } + } else if (command == kCommandPredict) { + JXL_RETURN_IF_ERROR(CheckOutOfBounds(cpos, 2, commands_end)); + uint8_t flags = enc[cpos++]; + + size_t width = (flags & 3) + 1; + if (width == 3) return JXL_FAILURE("Invalid width"); + + int order = (flags & 12) >> 2; + if (order == 3) return JXL_FAILURE("Invalid order"); + + uint64_t stride = width; + if (flags & 16) { + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + stride = DecodeVarInt(enc, size, &cpos); + if (stride < width) { + return JXL_FAILURE("Invalid stride"); + } + } + // If stride * 4 >= result->size(), return failure. The check + // "size == 0 || ((size - 1) >> 2) < stride" corresponds to + // "stride * 4 >= size", but does not suffer from integer overflow. + // This check is more strict than necessary but follows the specification + // and the encoder should ensure this is followed. + if (result->empty() || ((result->size() - 1u) >> 2u) < stride) { + return JXL_FAILURE("Invalid stride"); + } + + if (cpos >= commands_end) return JXL_FAILURE("Out of bounds"); + uint64_t num = DecodeVarInt(enc, size, &cpos); // in bytes + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, num, size)); + + PaddedBytes shuffled(num); + for (size_t i = 0; i < num; i++) { + shuffled[i] = enc[pos + i]; + } + if (width > 1) Shuffle(shuffled.data(), num, width); + + size_t start = result->size(); + for (size_t i = 0; i < num; i++) { + uint8_t predicted = LinearPredictICCValue(result->data(), start, i, + stride, width, order); + result->push_back(predicted + shuffled[i]); + } + pos += num; + } else if (command == kCommandXYZ) { + AppendKeyword(kXyz_Tag, result); + for (int i = 0; i < 4; i++) result->push_back(0); + JXL_RETURN_IF_ERROR(CheckOutOfBounds(pos, 12, size)); + for (size_t i = 0; i < 12; i++) { + result->push_back(enc[pos++]); + } + } else if (command >= kCommandTypeStartFirst && + command < kCommandTypeStartFirst + kNumTypeStrings) { + AppendKeyword(*kTypeStrings[command - kCommandTypeStartFirst], result); + for (size_t i = 0; i < 4; i++) { + result->push_back(0); + } + } else { + return JXL_FAILURE("Unknown command"); + } + } + + if (pos != size) return JXL_FAILURE("Not all data used"); + if (result->size() != osize) return JXL_FAILURE("Invalid result size"); + + return true; +} + +Status ICCReader::Init(BitReader* reader, size_t output_limit) { + JXL_RETURN_IF_ERROR(CheckEOI(reader)); + used_bits_base_ = reader->TotalBitsConsumed(); + if (bits_to_skip_ == 0) { + enc_size_ = U64Coder::Read(reader); + if (enc_size_ > 268435456) { + // Avoid too large memory allocation for invalid file. + return JXL_FAILURE("Too large encoded profile"); + } + JXL_RETURN_IF_ERROR( + DecodeHistograms(reader, kNumICCContexts, &code_, &context_map_)); + ans_reader_ = ANSSymbolReader(&code_, reader); + i_ = 0; + decompressed_.resize(std::min(i_ + 0x400, enc_size_)); + for (; i_ < std::min(2, enc_size_); i_++) { + decompressed_[i_] = ans_reader_.ReadHybridUint( + ICCANSContext(i_, i_ > 0 ? decompressed_[i_ - 1] : 0, + i_ > 1 ? decompressed_[i_ - 2] : 0), + reader, context_map_); + } + if (enc_size_ > kPreambleSize) { + for (; i_ < kPreambleSize; i_++) { + decompressed_[i_] = ans_reader_.ReadHybridUint( + ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]), + reader, context_map_); + } + JXL_RETURN_IF_ERROR(CheckEOI(reader)); + JXL_RETURN_IF_ERROR( + CheckPreamble(decompressed_, enc_size_, output_limit)); + } + bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_; + } else { + reader->SkipBits(bits_to_skip_); + } + return true; +} + +Status ICCReader::Process(BitReader* reader, PaddedBytes* icc) { + ANSSymbolReader::Checkpoint checkpoint; + size_t saved_i = 0; + auto save = [&]() { + ans_reader_.Save(&checkpoint); + bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_; + saved_i = i_; + }; + save(); + auto check_and_restore = [&]() { + Status status = CheckEOI(reader); + if (!status) { + // not enough bytes. + ans_reader_.Restore(checkpoint); + i_ = saved_i; + return status; + } + return Status(true); + }; + for (; i_ < enc_size_; i_++) { + if (i_ % ANSSymbolReader::kMaxCheckpointInterval == 0 && i_ > 0) { + JXL_RETURN_IF_ERROR(check_and_restore()); + save(); + if ((i_ > 0) && (((i_ & 0xFFFF) == 0))) { + float used_bytes = + (reader->TotalBitsConsumed() - used_bits_base_) / 8.0f; + if (i_ > used_bytes * 256) return JXL_FAILURE("Corrupted stream"); + } + decompressed_.resize(std::min(i_ + 0x400, enc_size_)); + } + JXL_DASSERT(i_ >= 2); + decompressed_[i_] = ans_reader_.ReadHybridUint( + ICCANSContext(i_, decompressed_[i_ - 1], decompressed_[i_ - 2]), reader, + context_map_); + } + JXL_RETURN_IF_ERROR(check_and_restore()); + bits_to_skip_ = reader->TotalBitsConsumed() - used_bits_base_; + if (!ans_reader_.CheckANSFinalState()) { + return JXL_FAILURE("Corrupted ICC profile"); + } + + icc->clear(); + return UnpredictICC(decompressed_.data(), decompressed_.size(), icc); +} + +Status ICCReader::CheckEOI(BitReader* reader) { + if (reader->AllReadsWithinBounds()) return true; + return JXL_STATUS(StatusCode::kNotEnoughBytes, + "Not enough bytes for reading ICC profile"); +} + +Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc, + size_t output_limit) { + ICCReader icc_reader; + JXL_RETURN_IF_ERROR(icc_reader.Init(reader, output_limit)); + JXL_RETURN_IF_ERROR(icc_reader.Process(reader, icc)); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec.h b/third-party/libjxl/libjxl/lib/jxl/icc_codec.h new file mode 100644 index 0000000000..a6c7477c60 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec.h @@ -0,0 +1,57 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ICC_CODEC_H_ +#define LIB_JXL_ICC_CODEC_H_ + +// Compressed representation of ICC profiles. + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" + +namespace jxl { + +struct ICCReader { + Status Init(BitReader* reader, size_t output_limit); + Status Process(BitReader* reader, PaddedBytes* icc); + void Reset() { + bits_to_skip_ = 0; + decompressed_.clear(); + } + + private: + Status CheckEOI(BitReader* reader); + size_t i_ = 0; + size_t bits_to_skip_ = 0; + size_t used_bits_base_ = 0; + uint64_t enc_size_ = 0; + std::vector context_map_; + ANSCode code_; + ANSSymbolReader ans_reader_; + PaddedBytes decompressed_; +}; + +// `icc` may be empty afterwards - if so, call CreateProfile. Does not append, +// clears any original data that was in icc. +// If `output_limit` is not 0, then returns error if resulting profile would be +// longer than `output_limit` +Status ReadICC(BitReader* JXL_RESTRICT reader, PaddedBytes* JXL_RESTRICT icc, + size_t output_limit = 0); + +// Exposed only for testing +Status PredictICC(const uint8_t* icc, size_t size, PaddedBytes* result); + +// Exposed only for testing +Status UnpredictICC(const uint8_t* enc, size_t size, PaddedBytes* result); + +} // namespace jxl + +#endif // LIB_JXL_ICC_CODEC_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.cc b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.cc new file mode 100644 index 0000000000..212387e78f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.cc @@ -0,0 +1,190 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/icc_codec_common.h" + +#include + +#include +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/fields.h" + +namespace jxl { +namespace { +static uint8_t ByteKind1(uint8_t b) { + if ('a' <= b && b <= 'z') return 0; + if ('A' <= b && b <= 'Z') return 0; + if ('0' <= b && b <= '9') return 1; + if (b == '.' || b == ',') return 1; + if (b == 0) return 2; + if (b == 1) return 3; + if (b < 16) return 4; + if (b == 255) return 6; + if (b > 240) return 5; + return 7; +} + +static uint8_t ByteKind2(uint8_t b) { + if ('a' <= b && b <= 'z') return 0; + if ('A' <= b && b <= 'Z') return 0; + if ('0' <= b && b <= '9') return 1; + if (b == '.' || b == ',') return 1; + if (b < 16) return 2; + if (b > 240) return 3; + return 4; +} + +template +T PredictValue(T p1, T p2, T p3, int order) { + if (order == 0) return p1; + if (order == 1) return 2 * p1 - p2; + if (order == 2) return 3 * p1 - 3 * p2 + p3; + return 0; +} +} // namespace + +uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos) { + return pos + 4 > size ? 0 : LoadBE32(data + pos); +} + +void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data) { + if (pos + 4 > data->size()) return; + StoreBE32(value, data->data() + pos); +} + +void AppendUint32(uint32_t value, PaddedBytes* data) { + data->resize(data->size() + 4); + EncodeUint32(data->size() - 4, value, data); +} + +typedef std::array Tag; + +Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos) { + if (pos + 4 > size) return {{' ', ' ', ' ', ' '}}; + return {{data[pos], data[pos + 1], data[pos + 2], data[pos + 3]}}; +} + +void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos) { + if (keyword.size() != 4 || pos + 3 >= size) return; + for (size_t i = 0; i < 4; ++i) data[pos + i] = keyword[i]; +} + +void AppendKeyword(const Tag& keyword, PaddedBytes* data) { + JXL_ASSERT(keyword.size() == 4); + data->append(keyword); +} + +// Checks if a + b > size, taking possible integer overflow into account. +Status CheckOutOfBounds(size_t a, size_t b, size_t size) { + size_t pos = a + b; + if (pos > size) return JXL_FAILURE("Out of bounds"); + if (pos < a) return JXL_FAILURE("Out of bounds"); // overflow happened + return true; +} + +Status CheckIs32Bit(uint64_t v) { + static constexpr const uint64_t kUpper32 = ~static_cast(0xFFFFFFFF); + if ((v & kUpper32) != 0) return JXL_FAILURE("32-bit value expected"); + return true; +} + +PaddedBytes ICCInitialHeaderPrediction() { + PaddedBytes result(kICCHeaderSize); + for (size_t i = 0; i < kICCHeaderSize; i++) { + result[i] = 0; + } + result[8] = 4; + EncodeKeyword(kMntrTag, result.data(), result.size(), 12); + EncodeKeyword(kRgb_Tag, result.data(), result.size(), 16); + EncodeKeyword(kXyz_Tag, result.data(), result.size(), 20); + EncodeKeyword(kAcspTag, result.data(), result.size(), 36); + result[68] = 0; + result[69] = 0; + result[70] = 246; + result[71] = 214; + result[72] = 0; + result[73] = 1; + result[74] = 0; + result[75] = 0; + result[76] = 0; + result[77] = 0; + result[78] = 211; + result[79] = 45; + return result; +} + +void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header, + size_t pos) { + if (pos == 8 && size >= 8) { + header[80] = icc[4]; + header[81] = icc[5]; + header[82] = icc[6]; + header[83] = icc[7]; + } + if (pos == 41 && size >= 41) { + if (icc[40] == 'A') { + header[41] = 'P'; + header[42] = 'P'; + header[43] = 'L'; + } + if (icc[40] == 'M') { + header[41] = 'S'; + header[42] = 'F'; + header[43] = 'T'; + } + } + if (pos == 42 && size >= 42) { + if (icc[40] == 'S' && icc[41] == 'G') { + header[42] = 'I'; + header[43] = ' '; + } + if (icc[40] == 'S' && icc[41] == 'U') { + header[42] = 'N'; + header[43] = 'W'; + } + } +} + +// Predicts a value with linear prediction of given order (0-2), for integers +// with width bytes and given stride in bytes between values. +// The start position is at start + i, and the relevant modulus of i describes +// which byte of the multi-byte integer is being handled. +// The value start + i must be at least stride * 4. +uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i, + size_t stride, size_t width, int order) { + size_t pos = start + i; + if (width == 1) { + uint8_t p1 = data[pos - stride]; + uint8_t p2 = data[pos - stride * 2]; + uint8_t p3 = data[pos - stride * 3]; + return PredictValue(p1, p2, p3, order); + } else if (width == 2) { + size_t p = start + (i & ~1); + uint16_t p1 = (data[p - stride * 1] << 8) + data[p - stride * 1 + 1]; + uint16_t p2 = (data[p - stride * 2] << 8) + data[p - stride * 2 + 1]; + uint16_t p3 = (data[p - stride * 3] << 8) + data[p - stride * 3 + 1]; + uint16_t pred = PredictValue(p1, p2, p3, order); + return (i & 1) ? (pred & 255) : ((pred >> 8) & 255); + } else { + size_t p = start + (i & ~3); + uint32_t p1 = DecodeUint32(data, pos, p - stride); + uint32_t p2 = DecodeUint32(data, pos, p - stride * 2); + uint32_t p3 = DecodeUint32(data, pos, p - stride * 3); + uint32_t pred = PredictValue(p1, p2, p3, order); + unsigned shiftbytes = 3 - (i & 3); + return (pred >> (shiftbytes * 8)) & 255; + } +} + +size_t ICCANSContext(size_t i, size_t b1, size_t b2) { + if (i <= 128) return 0; + return 1 + ByteKind1(b1) + ByteKind2(b2) * 8; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.h b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.h new file mode 100644 index 0000000000..e91e908669 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec_common.h @@ -0,0 +1,106 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_ICC_CODEC_COMMON_H_ +#define LIB_JXL_ICC_CODEC_COMMON_H_ + +// Compressed representation of ICC profiles. + +#include +#include + +#include + +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +static constexpr size_t kICCHeaderSize = 128; + +typedef std::array Tag; + +static const Tag kAcspTag = {{'a', 'c', 's', 'p'}}; +static const Tag kBkptTag = {{'b', 'k', 'p', 't'}}; +static const Tag kBtrcTag = {{'b', 'T', 'R', 'C'}}; +static const Tag kBxyzTag = {{'b', 'X', 'Y', 'Z'}}; +static const Tag kChadTag = {{'c', 'h', 'a', 'd'}}; +static const Tag kChrmTag = {{'c', 'h', 'r', 'm'}}; +static const Tag kCprtTag = {{'c', 'p', 'r', 't'}}; +static const Tag kCurvTag = {{'c', 'u', 'r', 'v'}}; +static const Tag kDescTag = {{'d', 'e', 's', 'c'}}; +static const Tag kDmddTag = {{'d', 'm', 'd', 'd'}}; +static const Tag kDmndTag = {{'d', 'm', 'n', 'd'}}; +static const Tag kGbd_Tag = {{'g', 'b', 'd', ' '}}; +static const Tag kGtrcTag = {{'g', 'T', 'R', 'C'}}; +static const Tag kGxyzTag = {{'g', 'X', 'Y', 'Z'}}; +static const Tag kKtrcTag = {{'k', 'T', 'R', 'C'}}; +static const Tag kKxyzTag = {{'k', 'X', 'Y', 'Z'}}; +static const Tag kLumiTag = {{'l', 'u', 'm', 'i'}}; +static const Tag kMab_Tag = {{'m', 'A', 'B', ' '}}; +static const Tag kMba_Tag = {{'m', 'B', 'A', ' '}}; +static const Tag kMlucTag = {{'m', 'l', 'u', 'c'}}; +static const Tag kMntrTag = {{'m', 'n', 't', 'r'}}; +static const Tag kParaTag = {{'p', 'a', 'r', 'a'}}; +static const Tag kRgb_Tag = {{'R', 'G', 'B', ' '}}; +static const Tag kRtrcTag = {{'r', 'T', 'R', 'C'}}; +static const Tag kRxyzTag = {{'r', 'X', 'Y', 'Z'}}; +static const Tag kSf32Tag = {{'s', 'f', '3', '2'}}; +static const Tag kTextTag = {{'t', 'e', 'x', 't'}}; +static const Tag kVcgtTag = {{'v', 'c', 'g', 't'}}; +static const Tag kWtptTag = {{'w', 't', 'p', 't'}}; +static const Tag kXyz_Tag = {{'X', 'Y', 'Z', ' '}}; + +// Tag names focused on RGB and GRAY monitor profiles +static constexpr size_t kNumTagStrings = 17; +static constexpr const Tag* kTagStrings[kNumTagStrings] = { + &kCprtTag, &kWtptTag, &kBkptTag, &kRxyzTag, &kGxyzTag, &kBxyzTag, + &kKxyzTag, &kRtrcTag, &kGtrcTag, &kBtrcTag, &kKtrcTag, &kChadTag, + &kDescTag, &kChrmTag, &kDmndTag, &kDmddTag, &kLumiTag}; + +static constexpr size_t kCommandTagUnknown = 1; +static constexpr size_t kCommandTagTRC = 2; +static constexpr size_t kCommandTagXYZ = 3; +static constexpr size_t kCommandTagStringFirst = 4; + +// Tag types focused on RGB and GRAY monitor profiles +static constexpr size_t kNumTypeStrings = 8; +static constexpr const Tag* kTypeStrings[kNumTypeStrings] = { + &kXyz_Tag, &kDescTag, &kTextTag, &kMlucTag, + &kParaTag, &kCurvTag, &kSf32Tag, &kGbd_Tag}; + +static constexpr size_t kCommandInsert = 1; +static constexpr size_t kCommandShuffle2 = 2; +static constexpr size_t kCommandShuffle4 = 3; +static constexpr size_t kCommandPredict = 4; +static constexpr size_t kCommandXYZ = 10; +static constexpr size_t kCommandTypeStartFirst = 16; + +static constexpr size_t kFlagBitOffset = 64; +static constexpr size_t kFlagBitSize = 128; + +static constexpr size_t kNumICCContexts = 41; + +uint32_t DecodeUint32(const uint8_t* data, size_t size, size_t pos); +void EncodeUint32(size_t pos, uint32_t value, PaddedBytes* data); +void AppendUint32(uint32_t value, PaddedBytes* data); +Tag DecodeKeyword(const uint8_t* data, size_t size, size_t pos); +void EncodeKeyword(const Tag& keyword, uint8_t* data, size_t size, size_t pos); +void AppendKeyword(const Tag& keyword, PaddedBytes* data); + +// Checks if a + b > size, taking possible integer overflow into account. +Status CheckOutOfBounds(size_t a, size_t b, size_t size); +Status CheckIs32Bit(uint64_t v); + +PaddedBytes ICCInitialHeaderPrediction(); +void ICCPredictHeader(const uint8_t* icc, size_t size, uint8_t* header, + size_t pos); +uint8_t LinearPredictICCValue(const uint8_t* data, size_t start, size_t i, + size_t stride, size_t width, int order); +size_t ICCANSContext(size_t i, size_t b1, size_t b2); + +} // namespace jxl + +#endif // LIB_JXL_ICC_CODEC_COMMON_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/icc_codec_test.cc b/third-party/libjxl/libjxl/lib/jxl/icc_codec_test.cc new file mode 100644 index 0000000000..af02094e99 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/icc_codec_test.cc @@ -0,0 +1,207 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/icc_codec.h" + +#include + +#include "lib/jxl/base/span.h" +#include "lib/jxl/enc_icc_codec.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +void TestProfile(const PaddedBytes& icc) { + BitWriter writer; + ASSERT_TRUE(WriteICC(icc, &writer, 0, nullptr)); + writer.ZeroPadToByte(); + PaddedBytes dec; + BitReader reader(writer.GetSpan()); + ASSERT_TRUE(ReadICC(&reader, &dec)); + ASSERT_TRUE(reader.Close()); + EXPECT_EQ(icc.size(), dec.size()); + if (icc.size() == dec.size()) { + for (size_t i = 0; i < icc.size(); i++) { + EXPECT_EQ(icc[i], dec[i]); + if (icc[i] != dec[i]) break; // One output is enough + } + } +} + +void TestProfile(const std::string& icc) { + PaddedBytes bytes(icc.size()); + for (size_t i = 0; i < icc.size(); i++) { + bytes[i] = icc[i]; + } + TestProfile(bytes); +} + +// Valid profile from one of the images output by the decoder. +static const unsigned char kTestProfile[] = { + 0x00, 0x00, 0x03, 0x80, 0x6c, 0x63, 0x6d, 0x73, 0x04, 0x30, 0x00, 0x00, + 0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20, + 0x07, 0xe3, 0x00, 0x04, 0x00, 0x1d, 0x00, 0x0f, 0x00, 0x32, 0x00, 0x2e, + 0x61, 0x63, 0x73, 0x70, 0x41, 0x50, 0x50, 0x4c, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0xf6, 0xd6, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x6c, 0x63, 0x6d, 0x73, + 0x5f, 0x07, 0x0d, 0x3e, 0x4d, 0x32, 0xf2, 0x6e, 0x5d, 0x77, 0x26, 0xcc, + 0x23, 0xb0, 0x6a, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, + 0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x42, + 0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x64, 0x00, 0x00, 0x01, 0x00, + 0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x02, 0x64, 0x00, 0x00, 0x00, 0x14, + 0x63, 0x68, 0x61, 0x64, 0x00, 0x00, 0x02, 0x78, 0x00, 0x00, 0x00, 0x2c, + 0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xa4, 0x00, 0x00, 0x00, 0x14, + 0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xb8, 0x00, 0x00, 0x00, 0x14, + 0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x02, 0xcc, 0x00, 0x00, 0x00, 0x14, + 0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20, + 0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20, + 0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x02, 0xe0, 0x00, 0x00, 0x00, 0x20, + 0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x64, 0x6d, 0x6e, 0x64, 0x00, 0x00, 0x03, 0x24, 0x00, 0x00, 0x00, 0x28, + 0x64, 0x6d, 0x64, 0x64, 0x00, 0x00, 0x03, 0x4c, 0x00, 0x00, 0x00, 0x32, + 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x26, + 0x00, 0x00, 0x00, 0x1c, 0x00, 0x52, 0x00, 0x47, 0x00, 0x42, 0x00, 0x5f, + 0x00, 0x44, 0x00, 0x36, 0x00, 0x35, 0x00, 0x5f, 0x00, 0x53, 0x00, 0x52, + 0x00, 0x47, 0x00, 0x5f, 0x00, 0x52, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x5f, + 0x00, 0x37, 0x00, 0x30, 0x00, 0x39, 0x00, 0x00, 0x6d, 0x6c, 0x75, 0x63, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, + 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x1c, + 0x00, 0x43, 0x00, 0x6f, 0x00, 0x70, 0x00, 0x79, 0x00, 0x72, 0x00, 0x69, + 0x00, 0x67, 0x00, 0x68, 0x00, 0x74, 0x00, 0x20, 0x00, 0x32, 0x00, 0x30, + 0x00, 0x31, 0x00, 0x38, 0x00, 0x20, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f, + 0x00, 0x67, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x20, 0x00, 0x4c, 0x00, 0x4c, + 0x00, 0x43, 0x00, 0x2c, 0x00, 0x20, 0x00, 0x43, 0x00, 0x43, 0x00, 0x2d, + 0x00, 0x42, 0x00, 0x59, 0x00, 0x2d, 0x00, 0x53, 0x00, 0x41, 0x00, 0x20, + 0x00, 0x33, 0x00, 0x2e, 0x00, 0x30, 0x00, 0x20, 0x00, 0x55, 0x00, 0x6e, + 0x00, 0x70, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x74, 0x00, 0x65, 0x00, 0x64, + 0x00, 0x20, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e, + 0x00, 0x73, 0x00, 0x65, 0x00, 0x28, 0x00, 0x68, 0x00, 0x74, 0x00, 0x74, + 0x00, 0x70, 0x00, 0x73, 0x00, 0x3a, 0x00, 0x2f, 0x00, 0x2f, 0x00, 0x63, + 0x00, 0x72, 0x00, 0x65, 0x00, 0x61, 0x00, 0x74, 0x00, 0x69, 0x00, 0x76, + 0x00, 0x65, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x6d, 0x00, 0x6d, 0x00, 0x6f, + 0x00, 0x6e, 0x00, 0x73, 0x00, 0x2e, 0x00, 0x6f, 0x00, 0x72, 0x00, 0x67, + 0x00, 0x2f, 0x00, 0x6c, 0x00, 0x69, 0x00, 0x63, 0x00, 0x65, 0x00, 0x6e, + 0x00, 0x73, 0x00, 0x65, 0x00, 0x73, 0x00, 0x2f, 0x00, 0x62, 0x00, 0x79, + 0x00, 0x2d, 0x00, 0x73, 0x00, 0x61, 0x00, 0x2f, 0x00, 0x33, 0x00, 0x2e, + 0x00, 0x30, 0x00, 0x2f, 0x00, 0x6c, 0x00, 0x65, 0x00, 0x67, 0x00, 0x61, + 0x00, 0x6c, 0x00, 0x63, 0x00, 0x6f, 0x00, 0x64, 0x00, 0x65, 0x00, 0x29, + 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x73, 0x66, 0x33, 0x32, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x0c, 0x42, 0x00, 0x00, 0x05, 0xde, + 0xff, 0xff, 0xf3, 0x25, 0x00, 0x00, 0x07, 0x93, 0x00, 0x00, 0xfd, 0x90, + 0xff, 0xff, 0xfb, 0xa1, 0xff, 0xff, 0xfd, 0xa2, 0x00, 0x00, 0x03, 0xdc, + 0x00, 0x00, 0xc0, 0x6e, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x6f, 0xa0, 0x00, 0x00, 0x38, 0xf5, 0x00, 0x00, 0x03, 0x90, + 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x9f, + 0x00, 0x00, 0x0f, 0x84, 0x00, 0x00, 0xb6, 0xc4, 0x58, 0x59, 0x5a, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x97, 0x00, 0x00, 0xb7, 0x87, + 0x00, 0x00, 0x18, 0xd9, 0x70, 0x61, 0x72, 0x61, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x38, 0xe4, 0x00, 0x00, 0xe8, 0xf0, + 0x00, 0x00, 0x17, 0x10, 0x00, 0x00, 0x38, 0xe4, 0x00, 0x00, 0x14, 0xbc, + 0x63, 0x68, 0x72, 0x6d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, + 0x00, 0x00, 0xa3, 0xd7, 0x00, 0x00, 0x54, 0x7c, 0x00, 0x00, 0x4c, 0xcd, + 0x00, 0x00, 0x99, 0x9a, 0x00, 0x00, 0x26, 0x67, 0x00, 0x00, 0x0f, 0x5c, + 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, 0x00, 0x00, 0x00, 0x0c, + 0x00, 0x00, 0x00, 0x1c, 0x00, 0x47, 0x00, 0x6f, 0x00, 0x6f, 0x00, 0x67, + 0x00, 0x6c, 0x00, 0x65, 0x6d, 0x6c, 0x75, 0x63, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x65, 0x6e, 0x55, 0x53, + 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x49, 0x00, 0x6d, + 0x00, 0x61, 0x00, 0x67, 0x00, 0x65, 0x00, 0x20, 0x00, 0x63, 0x00, 0x6f, + 0x00, 0x64, 0x00, 0x65, 0x00, 0x63, 0x00, 0x00, +}; + +} // namespace + +TEST(IccCodecTest, Icc) { + // Empty string cannot be tested, encoder checks against writing it. + TestProfile("a"); + TestProfile("ab"); + TestProfile("aaaa"); + + { + // Exactly the ICC header size + PaddedBytes profile(128); + for (size_t i = 0; i < 128; i++) { + profile[i] = 0; + } + TestProfile(profile); + } + + { + PaddedBytes profile; + profile.append(kTestProfile, kTestProfile + sizeof(kTestProfile)); + TestProfile(profile); + } + + // Test substrings of full profile + { + PaddedBytes profile; + for (size_t i = 0; i <= 256; i++) { + profile.push_back(kTestProfile[i]); + TestProfile(profile); + } + } +} + +// kTestProfile after encoding with the ICC codec +static const unsigned char kEncodedTestProfile[] = { + 0x1f, 0x8b, 0x1, 0x13, 0x10, 0x0, 0x0, 0x0, 0x20, 0x4c, 0xcc, 0x3, + 0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26, + 0x57, 0x54, 0xef, 0x0, 0xe8, 0x97, 0x2, 0xce, 0xa1, 0xd7, 0x85, 0x16, + 0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c, + 0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4, 0x75, 0x12, 0xc9, 0xcc, + 0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae, + 0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd, 0x40, + 0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7, 0xa6, 0xb9, + 0x27, 0x92, 0x38, 0x0, 0x3, 0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf, + 0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66, + 0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4, + 0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd, + 0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f, + 0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93, + 0x92, 0x3, 0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7, + 0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7, 0x38, 0x38, 0xd4, 0xa, 0x66, + 0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb, 0x7a, 0x24, + 0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3, + 0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8, 0xa8, 0xc4, 0x2a, 0x86, + 0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6, + 0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9, + 0x8, 0x98, 0xe1, 0x21, 0x4a, 0x9, 0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0, + 0x69, 0x1a, 0xeb, 0x52, 0x1, 0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29, + 0x70, 0xee, 0x4, 0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff, + 0xfe, 0xdb, 0xaf, 0x8, 0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8, + 0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8, 0xe9, 0x7, 0xee, 0x4b, 0x80, 0xda, + 0x4a, 0x4, 0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60, + 0xb, 0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6, 0x20, 0xb8, 0x64, 0x18, + 0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b, + 0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9, 0xf7, + 0x72, 0xf0, 0x7a, 0xe, 0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc, + 0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28, + 0xc1, 0xa7, 0x59, 0xe3, 0x0, +}; + +// Tests that the decoded kEncodedTestProfile matches kTestProfile. +TEST(IccCodecTest, EncodedIccProfile) { + jxl::BitReader reader(jxl::Span(kEncodedTestProfile, + sizeof(kEncodedTestProfile))); + jxl::PaddedBytes dec; + ASSERT_TRUE(ReadICC(&reader, &dec)); + ASSERT_TRUE(reader.Close()); + EXPECT_EQ(sizeof(kTestProfile), dec.size()); + if (sizeof(kTestProfile) == dec.size()) { + for (size_t i = 0; i < dec.size(); i++) { + EXPECT_EQ(kTestProfile[i], dec[i]); + if (kTestProfile[i] != dec[i]) break; // One output is enough + } + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/image.cc b/third-party/libjxl/libjxl/lib/jxl/image.cc new file mode 100644 index 0000000000..088ff7699a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image.cc @@ -0,0 +1,204 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/image.h" + +#include // swap + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/image.cc" +#include +#include + +#include "lib/jxl/common.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/sanitizers.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { + +namespace HWY_NAMESPACE { +size_t GetVectorSize() { return HWY_LANES(uint8_t); } +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE + +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +namespace { + +HWY_EXPORT(GetVectorSize); // Local function. + +// Returns distance [bytes] between the start of two consecutive rows, a +// multiple of vector/cache line size but NOT CacheAligned::kAlias - see below. +size_t BytesPerRow(const size_t xsize, const size_t sizeof_t) { + const size_t vec_size = VectorSize(); + size_t valid_bytes = xsize * sizeof_t; + + // Allow unaligned accesses starting at the last valid value - this may raise + // msan errors unless the user calls InitializePaddingForUnalignedAccesses. + // Skip for the scalar case because no extra lanes will be loaded. + if (vec_size != 0) { + valid_bytes += vec_size - sizeof_t; + } + + // Round up to vector and cache line size. + const size_t align = std::max(vec_size, CacheAligned::kAlignment); + size_t bytes_per_row = RoundUpTo(valid_bytes, align); + + // During the lengthy window before writes are committed to memory, CPUs + // guard against read after write hazards by checking the address, but + // only the lower 11 bits. We avoid a false dependency between writes to + // consecutive rows by ensuring their sizes are not multiples of 2 KiB. + // Avoid2K prevents the same problem for the planes of an Image3. + if (bytes_per_row % CacheAligned::kAlias == 0) { + bytes_per_row += align; + } + + JXL_ASSERT(bytes_per_row % align == 0); + return bytes_per_row; +} + +} // namespace + +size_t VectorSize() { + static size_t bytes = HWY_DYNAMIC_DISPATCH(GetVectorSize)(); + return bytes; +} + +PlaneBase::PlaneBase(const size_t xsize, const size_t ysize, + const size_t sizeof_t) + : xsize_(static_cast(xsize)), + ysize_(static_cast(ysize)), + orig_xsize_(static_cast(xsize)), + orig_ysize_(static_cast(ysize)) { + JXL_CHECK(xsize == xsize_); + JXL_CHECK(ysize == ysize_); + + JXL_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8); + + bytes_per_row_ = 0; + // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate + // if nonzero, because "zero" bytes still have padding/bookkeeping overhead. + if (xsize != 0 && ysize != 0) { + bytes_per_row_ = BytesPerRow(xsize, sizeof_t); + bytes_ = AllocateArray(bytes_per_row_ * ysize); + JXL_CHECK(bytes_.get()); + InitializePadding(sizeof_t, Padding::kRoundUp); + } +} + +void PlaneBase::InitializePadding(const size_t sizeof_t, Padding padding) { +#if defined(MEMORY_SANITIZER) || HWY_IDE + if (xsize_ == 0 || ysize_ == 0) return; + + const size_t vec_size = VectorSize(); + if (vec_size == 0) return; // Scalar mode: no padding needed + + const size_t valid_size = xsize_ * sizeof_t; + const size_t initialize_size = padding == Padding::kRoundUp + ? RoundUpTo(valid_size, vec_size) + : valid_size + vec_size - sizeof_t; + if (valid_size == initialize_size) return; + + for (size_t y = 0; y < ysize_; ++y) { + uint8_t* JXL_RESTRICT row = static_cast(VoidRow(y)); +#if defined(__clang__) && \ + ((!defined(__apple_build_version__) && __clang_major__ <= 6) || \ + (defined(__apple_build_version__) && \ + __apple_build_version__ <= 10001145)) + // There's a bug in msan in clang-6 when handling AVX2 operations. This + // workaround allows tests to pass on msan, although it is slower and + // prevents msan warnings from uninitialized images. + std::fill(row, msan::kSanitizerSentinelByte, initialize_size); +#else + memset(row + valid_size, msan::kSanitizerSentinelByte, + initialize_size - valid_size); +#endif // clang6 + } +#endif // MEMORY_SANITIZER +} + +void PlaneBase::Swap(PlaneBase& other) { + std::swap(xsize_, other.xsize_); + std::swap(ysize_, other.ysize_); + std::swap(orig_xsize_, other.orig_xsize_); + std::swap(orig_ysize_, other.orig_ysize_); + std::swap(bytes_per_row_, other.bytes_per_row_); + std::swap(bytes_, other.bytes_); +} + +void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in, + size_t block_dim) { + const size_t xsize_orig = in->xsize(); + const size_t ysize_orig = in->ysize(); + const size_t xsize = RoundUpTo(xsize_orig, block_dim); + const size_t ysize = RoundUpTo(ysize_orig, block_dim); + // Expands image size to the originally-allocated size. + in->ShrinkTo(xsize, ysize); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < ysize_orig; y++) { + float* JXL_RESTRICT row = in->PlaneRow(c, y); + for (size_t x = xsize_orig; x < xsize; x++) { + row[x] = row[xsize_orig - 1]; + } + } + const float* JXL_RESTRICT row_src = in->ConstPlaneRow(c, ysize_orig - 1); + for (size_t y = ysize_orig; y < ysize; y++) { + memcpy(in->PlaneRow(c, y), row_src, xsize * sizeof(float)); + } + } +} + +static void DownsampleImage(const ImageF& input, size_t factor, + ImageF* output) { + JXL_ASSERT(factor != 1); + output->ShrinkTo(DivCeil(input.xsize(), factor), + DivCeil(input.ysize(), factor)); + size_t in_stride = input.PixelsPerRow(); + for (size_t y = 0; y < output->ysize(); y++) { + float* row_out = output->Row(y); + const float* row_in = input.Row(factor * y); + for (size_t x = 0; x < output->xsize(); x++) { + size_t cnt = 0; + float sum = 0; + for (size_t iy = 0; iy < factor && iy + factor * y < input.ysize(); + iy++) { + for (size_t ix = 0; ix < factor && ix + factor * x < input.xsize(); + ix++) { + sum += row_in[iy * in_stride + x * factor + ix]; + cnt++; + } + } + row_out[x] = sum / cnt; + } + } +} + +void DownsampleImage(ImageF* image, size_t factor) { + // Allocate extra space to avoid a reallocation when padding. + ImageF downsampled(DivCeil(image->xsize(), factor) + kBlockDim, + DivCeil(image->ysize(), factor) + kBlockDim); + DownsampleImage(*image, factor, &downsampled); + *image = std::move(downsampled); +} + +void DownsampleImage(Image3F* opsin, size_t factor) { + JXL_ASSERT(factor != 1); + // Allocate extra space to avoid a reallocation when padding. + Image3F downsampled(DivCeil(opsin->xsize(), factor) + kBlockDim, + DivCeil(opsin->ysize(), factor) + kBlockDim); + downsampled.ShrinkTo(downsampled.xsize() - kBlockDim, + downsampled.ysize() - kBlockDim); + for (size_t c = 0; c < 3; c++) { + DownsampleImage(opsin->Plane(c), factor, &downsampled.Plane(c)); + } + *opsin = std::move(downsampled); +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/image.h b/third-party/libjxl/libjxl/lib/jxl/image.h new file mode 100644 index 0000000000..e66534220c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image.h @@ -0,0 +1,497 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_IMAGE_H_ +#define LIB_JXL_IMAGE_H_ + +// SIMD/multicore-friendly planar image representation with row accessors. + +#include +#include +#include +#include + +#include +#include +#include // std::move + +#include "lib/jxl/base/cache_aligned.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" + +namespace jxl { + +// Helper function to create rows that are multiples of SIMD vector size. +size_t VectorSize(); + +// Type-independent parts of Plane<> - reduces code duplication and facilitates +// moving member function implementations to cc file. +struct PlaneBase { + PlaneBase() + : xsize_(0), + ysize_(0), + orig_xsize_(0), + orig_ysize_(0), + bytes_per_row_(0), + bytes_(nullptr) {} + PlaneBase(size_t xsize, size_t ysize, size_t sizeof_t); + + // Copy construction/assignment is forbidden to avoid inadvertent copies, + // which can be very expensive. Use CopyImageTo() instead. + PlaneBase(const PlaneBase& other) = delete; + PlaneBase& operator=(const PlaneBase& other) = delete; + + // Move constructor (required for returning Image from function) + PlaneBase(PlaneBase&& other) noexcept = default; + + // Move assignment (required for std::vector) + PlaneBase& operator=(PlaneBase&& other) noexcept = default; + + void Swap(PlaneBase& other); + + // Useful for pre-allocating image with some padding for alignment purposes + // and later reporting the actual valid dimensions. May also be used to + // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <= + // the original dimensions. + void ShrinkTo(const size_t xsize, const size_t ysize) { + JXL_CHECK(xsize <= orig_xsize_); + JXL_CHECK(ysize <= orig_ysize_); + xsize_ = static_cast(xsize); + ysize_ = static_cast(ysize); + // NOTE: we can't recompute bytes_per_row for more compact storage and + // better locality because that would invalidate the image contents. + } + + // How many pixels. + JXL_INLINE size_t xsize() const { return xsize_; } + JXL_INLINE size_t ysize() const { return ysize_; } + + // NOTE: do not use this for copying rows - the valid xsize may be much less. + JXL_INLINE size_t bytes_per_row() const { return bytes_per_row_; } + + // Raw access to byte contents, for interfacing with other libraries. + // Unsigned char instead of char to avoid surprises (sign extension). + JXL_INLINE uint8_t* bytes() { + void* p = bytes_.get(); + return static_cast(JXL_ASSUME_ALIGNED(p, 64)); + } + JXL_INLINE const uint8_t* bytes() const { + const void* p = bytes_.get(); + return static_cast(JXL_ASSUME_ALIGNED(p, 64)); + } + + protected: + // Returns pointer to the start of a row. + JXL_INLINE void* VoidRow(const size_t y) const { +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) + if (y >= ysize_) { + JXL_ABORT("Row(%" PRIu64 ") in (%u x %u) image\n", (uint64_t)y, xsize_, + ysize_); + } +#endif + + void* row = bytes_.get() + y * bytes_per_row_; + return JXL_ASSUME_ALIGNED(row, 64); + } + + enum class Padding { + // Allow Load(d, row + x) for x = 0; x < xsize(); x += Lanes(d). Default. + kRoundUp, + // Allow LoadU(d, row + x) for x = xsize() - 1. This requires an extra + // vector to be initialized. If done by default, this would suppress + // legitimate msan warnings. We therefore require users to explicitly call + // InitializePadding before using unaligned loads (e.g. convolution). + kUnaligned + }; + + // Initializes the minimum bytes required to suppress msan warnings from + // legitimate (according to Padding mode) vector loads/stores on the right + // border, where some lanes are uninitialized and assumed to be unused. + void InitializePadding(size_t sizeof_t, Padding padding); + + // (Members are non-const to enable assignment during move-assignment.) + uint32_t xsize_; // In valid pixels, not including any padding. + uint32_t ysize_; + uint32_t orig_xsize_; + uint32_t orig_ysize_; + size_t bytes_per_row_; // Includes padding. + CacheAlignedUniquePtr bytes_; +}; + +// Single channel, aligned rows separated by padding. T must be POD. +// +// 'Single channel' (one 2D array per channel) simplifies vectorization +// (repeating the same operation on multiple adjacent components) without the +// complexity of a hybrid layout (8 R, 8 G, 8 B, ...). In particular, clients +// can easily iterate over all components in a row and Image requires no +// knowledge of the pixel format beyond the component type "T". +// +// 'Aligned' means each row is aligned to the L1 cache line size. This prevents +// false sharing between two threads operating on adjacent rows. +// +// 'Padding' is still relevant because vectors could potentially be larger than +// a cache line. By rounding up row sizes to the vector size, we allow +// reading/writing ALIGNED vectors whose first lane is a valid sample. This +// avoids needing a separate loop to handle remaining unaligned lanes. +// +// This image layout could also be achieved with a vector and a row accessor +// function, but a class wrapper with support for "deleter" allows wrapping +// existing memory allocated by clients without copying the pixels. It also +// provides convenient accessors for xsize/ysize, which shortens function +// argument lists. Supports move-construction so it can be stored in containers. +template +class Plane : public PlaneBase { + public: + using T = ComponentType; + static constexpr size_t kNumPlanes = 1; + + Plane() = default; + Plane(const size_t xsize, const size_t ysize) + : PlaneBase(xsize, ysize, sizeof(T)) {} + + void InitializePaddingForUnalignedAccesses() { + InitializePadding(sizeof(T), Padding::kUnaligned); + } + + JXL_INLINE T* Row(const size_t y) { return static_cast(VoidRow(y)); } + + // Returns pointer to const (see above). + JXL_INLINE const T* Row(const size_t y) const { + return static_cast(VoidRow(y)); + } + + // Documents that the access is const. + JXL_INLINE const T* ConstRow(const size_t y) const { + return static_cast(VoidRow(y)); + } + + // Returns number of pixels (some of which are padding) per row. Useful for + // computing other rows via pointer arithmetic. WARNING: this must + // NOT be used to determine xsize. + JXL_INLINE intptr_t PixelsPerRow() const { + return static_cast(bytes_per_row_ / sizeof(T)); + } +}; + +using ImageSB = Plane; +using ImageB = Plane; +using ImageS = Plane; // signed integer or half-float +using ImageU = Plane; +using ImageI = Plane; +using ImageF = Plane; +using ImageD = Plane; + +// Also works for Image3 and mixed argument types. +template +bool SameSize(const Image1& image1, const Image2& image2) { + return image1.xsize() == image2.xsize() && image1.ysize() == image2.ysize(); +} + +template +class Image3; + +// Rectangular region in image(s). Factoring this out of Image instead of +// shifting the pointer by x0/y0 allows this to apply to multiple images with +// different resolutions (e.g. color transform and quantization field). +// Can compare using SameSize(rect1, rect2). +template +class RectT { + public: + // Most windows are xsize_max * ysize_max, except those on the borders where + // begin + size_max > end. + constexpr RectT(T xbegin, T ybegin, size_t xsize_max, size_t ysize_max, + T xend, T yend) + : x0_(xbegin), + y0_(ybegin), + xsize_(ClampedSize(xbegin, xsize_max, xend)), + ysize_(ClampedSize(ybegin, ysize_max, yend)) {} + + // Construct with origin and known size (typically from another Rect). + constexpr RectT(T xbegin, T ybegin, size_t xsize, size_t ysize) + : x0_(xbegin), y0_(ybegin), xsize_(xsize), ysize_(ysize) {} + + // Construct a rect that covers a whole image/plane/ImageBundle etc. + template + explicit RectT(const ImageT& image) + : RectT(0, 0, image.xsize(), image.ysize()) {} + + RectT() : RectT(0, 0, 0, 0) {} + + RectT(const RectT&) = default; + RectT& operator=(const RectT&) = default; + + // Construct a subrect that resides in an image/plane/ImageBundle etc. + template + RectT Crop(const ImageT& image) const { + return Intersection(RectT(image)); + } + + // Construct a subrect that resides in the [0, ysize) x [0, xsize) region of + // the current rect. + RectT Crop(size_t area_xsize, size_t area_ysize) const { + return Intersection(RectT(0, 0, area_xsize, area_ysize)); + } + + // Returns a rect that only contains `num` lines with offset `y` from `y0()`. + RectT Lines(size_t y, size_t num) const { + JXL_DASSERT(y + num <= ysize_); + return RectT(x0_, y0_ + y, xsize_, num); + } + + RectT Line(size_t y) const { return Lines(y, 1); } + + JXL_MUST_USE_RESULT RectT Intersection(const RectT& other) const { + return RectT(std::max(x0_, other.x0_), std::max(y0_, other.y0_), xsize_, + ysize_, std::min(x1(), other.x1()), + std::min(y1(), other.y1())); + } + + JXL_MUST_USE_RESULT RectT Translate(int64_t x_offset, + int64_t y_offset) const { + return RectT(x0_ + x_offset, y0_ + y_offset, xsize_, ysize_); + } + + template + V* Row(Plane* image, size_t y) const { + JXL_DASSERT(y + y0_ >= 0); + return image->Row(y + y0_) + x0_; + } + + template + const V* Row(const Plane* image, size_t y) const { + JXL_DASSERT(y + y0_ >= 0); + return image->Row(y + y0_) + x0_; + } + + template + V* PlaneRow(Image3* image, const size_t c, size_t y) const { + JXL_DASSERT(y + y0_ >= 0); + return image->PlaneRow(c, y + y0_) + x0_; + } + + template + const V* ConstRow(const Plane& image, size_t y) const { + JXL_DASSERT(y + y0_ >= 0); + return image.ConstRow(y + y0_) + x0_; + } + + template + const V* ConstPlaneRow(const Image3& image, size_t c, size_t y) const { + JXL_DASSERT(y + y0_ >= 0); + return image.ConstPlaneRow(c, y + y0_) + x0_; + } + + bool IsInside(const RectT& other) const { + return x0_ >= other.x0() && x1() <= other.x1() && y0_ >= other.y0() && + y1() <= other.y1(); + } + + // Returns true if this Rect fully resides in the given image. ImageT could be + // Plane or Image3; however if ImageT is Rect, results are nonsensical. + template + bool IsInside(const ImageT& image) const { + return IsInside(RectT(image)); + } + + T x0() const { return x0_; } + T y0() const { return y0_; } + size_t xsize() const { return xsize_; } + size_t ysize() const { return ysize_; } + T x1() const { return x0_ + xsize_; } + T y1() const { return y0_ + ysize_; } + + RectT ShiftLeft(size_t shiftx, size_t shifty) const { + return RectT(x0_ * (1 << shiftx), y0_ * (1 << shifty), xsize_ << shiftx, + ysize_ << shifty); + } + RectT ShiftLeft(size_t shift) const { return ShiftLeft(shift, shift); } + + // Requires x0(), y0() to be multiples of 1< CeilShiftRight(size_t shiftx, size_t shifty) const { + JXL_ASSERT(x0_ % (1 << shiftx) == 0); + JXL_ASSERT(y0_ % (1 << shifty) == 0); + return RectT(x0_ / (1 << shiftx), y0_ / (1 << shifty), + DivCeil(xsize_, T{1} << shiftx), + DivCeil(ysize_, T{1} << shifty)); + } + RectT CeilShiftRight(std::pair shift) const { + return CeilShiftRight(shift.first, shift.second); + } + RectT CeilShiftRight(size_t shift) const { + return CeilShiftRight(shift, shift); + } + + template + RectT As() const { + return RectT(U(x0_), U(y0_), U(xsize_), U(ysize_)); + } + + private: + // Returns size_max, or whatever is left in [begin, end). + static constexpr size_t ClampedSize(T begin, size_t size_max, T end) { + return (static_cast(begin + size_max) <= end) + ? size_max + : (end > begin ? end - begin : 0); + } + + T x0_; + T y0_; + + size_t xsize_; + size_t ysize_; +}; + +template +std::string Description(RectT r) { + std::ostringstream os; + os << "[" << r.x0() << ".." << r.x1() << ")x" + << "[" << r.y0() << ".." << r.y1() << ")"; + return os.str(); +} + +using Rect = RectT; + +// Currently, we abuse Image to either refer to an image that owns its storage +// or one that doesn't. In similar vein, we abuse Image* function parameters to +// either mean "assign to me" or "fill the provided image with data". +// Hopefully, the "assign to me" meaning will go away and most images in the +// codebase will not be backed by own storage. When this happens we can redesign +// Image to be a non-storage-holding view class and introduce BackedImage in +// those places that actually need it. + +// NOTE: we can't use Image as a view because invariants are violated +// (alignment and the presence of padding before/after each "row"). + +// A bundle of 3 same-sized images. Typically constructed by moving from three +// rvalue references to Image. To overwrite an existing Image3 using +// single-channel producers, we also need access to Image*. Constructing +// temporary non-owning Image pointing to one plane of an existing Image3 risks +// dangling references, especially if the wrapper is moved. Therefore, we +// store an array of Image (which are compact enough that size is not a concern) +// and provide Plane+Row accessors. +template +class Image3 { + public: + using T = ComponentType; + using PlaneT = jxl::Plane; + static constexpr size_t kNumPlanes = 3; + + Image3() : planes_{PlaneT(), PlaneT(), PlaneT()} {} + + Image3(const size_t xsize, const size_t ysize) + : planes_{PlaneT(xsize, ysize), PlaneT(xsize, ysize), + PlaneT(xsize, ysize)} {} + + Image3(Image3&& other) noexcept { + for (size_t i = 0; i < kNumPlanes; i++) { + planes_[i] = std::move(other.planes_[i]); + } + } + + Image3(PlaneT&& plane0, PlaneT&& plane1, PlaneT&& plane2) { + JXL_CHECK(SameSize(plane0, plane1)); + JXL_CHECK(SameSize(plane0, plane2)); + planes_[0] = std::move(plane0); + planes_[1] = std::move(plane1); + planes_[2] = std::move(plane2); + } + + // Copy construction/assignment is forbidden to avoid inadvertent copies, + // which can be very expensive. Use CopyImageTo instead. + Image3(const Image3& other) = delete; + Image3& operator=(const Image3& other) = delete; + + Image3& operator=(Image3&& other) noexcept { + for (size_t i = 0; i < kNumPlanes; i++) { + planes_[i] = std::move(other.planes_[i]); + } + return *this; + } + + // Returns row pointer; usage: PlaneRow(idx_plane, y)[x] = val. + JXL_INLINE T* PlaneRow(const size_t c, const size_t y) { + // Custom implementation instead of calling planes_[c].Row ensures only a + // single multiplication is needed for PlaneRow(0..2, y). + PlaneRowBoundsCheck(c, y); + const size_t row_offset = y * planes_[0].bytes_per_row(); + void* row = planes_[c].bytes() + row_offset; + return static_cast(JXL_ASSUME_ALIGNED(row, 64)); + } + + // Returns const row pointer; usage: val = PlaneRow(idx_plane, y)[x]. + JXL_INLINE const T* PlaneRow(const size_t c, const size_t y) const { + PlaneRowBoundsCheck(c, y); + const size_t row_offset = y * planes_[0].bytes_per_row(); + const void* row = planes_[c].bytes() + row_offset; + return static_cast(JXL_ASSUME_ALIGNED(row, 64)); + } + + // Returns const row pointer, even if called from a non-const Image3. + JXL_INLINE const T* ConstPlaneRow(const size_t c, const size_t y) const { + PlaneRowBoundsCheck(c, y); + return PlaneRow(c, y); + } + + JXL_INLINE const PlaneT& Plane(size_t idx) const { return planes_[idx]; } + + JXL_INLINE PlaneT& Plane(size_t idx) { return planes_[idx]; } + + void Swap(Image3& other) { + for (size_t c = 0; c < 3; ++c) { + other.planes_[c].Swap(planes_[c]); + } + } + + // Useful for pre-allocating image with some padding for alignment purposes + // and later reporting the actual valid dimensions. May also be used to + // un-shrink the image. Caller is responsible for ensuring xsize/ysize are <= + // the original dimensions. + void ShrinkTo(const size_t xsize, const size_t ysize) { + for (PlaneT& plane : planes_) { + plane.ShrinkTo(xsize, ysize); + } + } + + // Sizes of all three images are guaranteed to be equal. + JXL_INLINE size_t xsize() const { return planes_[0].xsize(); } + JXL_INLINE size_t ysize() const { return planes_[0].ysize(); } + // Returns offset [bytes] from one row to the next row of the same plane. + // WARNING: this must NOT be used to determine xsize, nor for copying rows - + // the valid xsize may be much less. + JXL_INLINE size_t bytes_per_row() const { return planes_[0].bytes_per_row(); } + // Returns number of pixels (some of which are padding) per row. Useful for + // computing other rows via pointer arithmetic. WARNING: this must NOT be used + // to determine xsize. + JXL_INLINE intptr_t PixelsPerRow() const { return planes_[0].PixelsPerRow(); } + + private: + void PlaneRowBoundsCheck(const size_t c, const size_t y) const { +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) + if (c >= kNumPlanes || y >= ysize()) { + JXL_ABORT("PlaneRow(%" PRIu64 ", %" PRIu64 ") in (%" PRIu64 " x %" PRIu64 + ") image\n", + static_cast(c), static_cast(y), + static_cast(xsize()), static_cast(ysize())); + } +#endif + } + + private: + PlaneT planes_[kNumPlanes]; +}; + +using Image3B = Image3; +using Image3S = Image3; +using Image3U = Image3; +using Image3I = Image3; +using Image3F = Image3; +using Image3D = Image3; + +} // namespace jxl + +#endif // LIB_JXL_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/image_bundle.cc b/third-party/libjxl/libjxl/lib/jxl/image_bundle.cc new file mode 100644 index 0000000000..a9bb40cf7e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_bundle.cc @@ -0,0 +1,125 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/image_bundle.h" + +#include +#include + +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/fields.h" + +namespace jxl { + +void ImageBundle::ShrinkTo(size_t xsize, size_t ysize) { + if (HasColor()) color_.ShrinkTo(xsize, ysize); + for (ImageF& ec : extra_channels_) { + ec.ShrinkTo(xsize, ysize); + } +} + +// Called by all other SetFrom*. +void ImageBundle::SetFromImage(Image3F&& color, + const ColorEncoding& c_current) { + JXL_CHECK(color.xsize() != 0 && color.ysize() != 0); + JXL_CHECK(metadata_->color_encoding.IsGray() == c_current.IsGray()); + color_ = std::move(color); + c_current_ = c_current; + VerifySizes(); +} + +void ImageBundle::VerifyMetadata() const { + JXL_CHECK(!c_current_.ICC().empty()); + JXL_CHECK(metadata_->color_encoding.IsGray() == IsGray()); + + if (metadata_->HasAlpha() && alpha().xsize() == 0) { + JXL_UNREACHABLE("MD alpha_bits %u IB alpha %" PRIuS " x %" PRIuS "\n", + metadata_->GetAlphaBits(), alpha().xsize(), + alpha().ysize()); + } + const uint32_t alpha_bits = metadata_->GetAlphaBits(); + JXL_CHECK(alpha_bits <= 32); + + // metadata_->num_extra_channels may temporarily differ from + // extra_channels_.size(), e.g. after SetAlpha. They are synced by the next + // call to VisitFields. +} + +void ImageBundle::VerifySizes() const { + const size_t xs = xsize(); + const size_t ys = ysize(); + + if (HasExtraChannels()) { + JXL_CHECK(xs != 0 && ys != 0); + for (const ImageF& ec : extra_channels_) { + JXL_CHECK(ec.xsize() == xs); + JXL_CHECK(ec.ysize() == ys); + } + } +} + +size_t ImageBundle::DetectRealBitdepth() const { + return metadata_->bit_depth.bits_per_sample; + + // TODO(lode): let this function return lower bit depth if possible, e.g. + // return 8 bits in case the original image came from a 16-bit PNG that + // was in fact representable as 8-bit PNG. Ensure that the implementation + // returns 16 if e.g. two consecutive 16-bit values appeared in the original + // image (such as 32768 and 32769), take into account that e.g. the values + // 3-bit can represent is not a superset of the values 2-bit can represent, + // and there may be slight imprecisions in the floating point image. +} + +const ImageF& ImageBundle::black() const { + JXL_ASSERT(HasBlack()); + const size_t ec = metadata_->Find(ExtraChannel::kBlack) - + metadata_->extra_channel_info.data(); + JXL_ASSERT(ec < extra_channels_.size()); + return extra_channels_[ec]; +} +const ImageF& ImageBundle::alpha() const { + JXL_ASSERT(HasAlpha()); + const size_t ec = metadata_->Find(ExtraChannel::kAlpha) - + metadata_->extra_channel_info.data(); + JXL_ASSERT(ec < extra_channels_.size()); + return extra_channels_[ec]; +} +ImageF* ImageBundle::alpha() { + JXL_ASSERT(HasAlpha()); + const size_t ec = metadata_->Find(ExtraChannel::kAlpha) - + metadata_->extra_channel_info.data(); + JXL_ASSERT(ec < extra_channels_.size()); + return &extra_channels_[ec]; +} + +void ImageBundle::SetAlpha(ImageF&& alpha) { + const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha); + // Must call SetAlphaBits first, otherwise we don't know which channel index + JXL_CHECK(eci != nullptr); + JXL_CHECK(alpha.xsize() != 0 && alpha.ysize() != 0); + if (extra_channels_.size() < metadata_->extra_channel_info.size()) { + // TODO(jon): get rid of this case + extra_channels_.insert( + extra_channels_.begin() + (eci - metadata_->extra_channel_info.data()), + std::move(alpha)); + } else { + extra_channels_[eci - metadata_->extra_channel_info.data()] = + std::move(alpha); + } + // num_extra_channels is automatically set in visitor + VerifySizes(); +} + +void ImageBundle::SetExtraChannels(std::vector&& extra_channels) { + for (const ImageF& plane : extra_channels) { + JXL_CHECK(plane.xsize() != 0 && plane.ysize() != 0); + } + extra_channels_ = std::move(extra_channels); + VerifySizes(); +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/image_bundle.h b/third-party/libjxl/libjxl/lib/jxl/image_bundle.h new file mode 100644 index 0000000000..f64f8cfcd6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_bundle.h @@ -0,0 +1,257 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_IMAGE_BUNDLE_H_ +#define LIB_JXL_IMAGE_BUNDLE_H_ + +// The main image or frame consists of a bundle of associated images. + +#include +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/jpeg/jpeg_data.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { + +// A bundle of color/alpha/depth/plane images. +class ImageBundle { + public: + // Uninitialized state for use as output parameter. + ImageBundle() : metadata_(nullptr) {} + // Caller is responsible for setting metadata before calling Set*. + explicit ImageBundle(const ImageMetadata* metadata) : metadata_(metadata) {} + + // Move-only (allows storing in std::vector). + ImageBundle(ImageBundle&&) = default; + ImageBundle& operator=(ImageBundle&&) = default; + + ImageBundle Copy() const { + ImageBundle copy(metadata_); + copy.color_ = Image3F(color_.xsize(), color_.ysize()); + CopyImageTo(color_, ©.color_); + copy.c_current_ = c_current_; + copy.extra_channels_.reserve(extra_channels_.size()); + for (const ImageF& plane : extra_channels_) { + ImageF ec(plane.xsize(), plane.ysize()); + CopyImageTo(plane, &ec); + copy.extra_channels_.emplace_back(std::move(ec)); + } + + copy.jpeg_data = + jpeg_data ? make_unique(*jpeg_data) : nullptr; + copy.color_transform = color_transform; + copy.chroma_subsampling = chroma_subsampling; + + return copy; + } + + // -- SIZE + + size_t xsize() const { + if (IsJPEG()) return jpeg_data->width; + if (color_.xsize() != 0) return color_.xsize(); + return extra_channels_.empty() ? 0 : extra_channels_[0].xsize(); + } + size_t ysize() const { + if (IsJPEG()) return jpeg_data->height; + if (color_.ysize() != 0) return color_.ysize(); + return extra_channels_.empty() ? 0 : extra_channels_[0].ysize(); + } + void ShrinkTo(size_t xsize, size_t ysize); + + // sizes taking orientation into account + size_t oriented_xsize() const { + if (static_cast(metadata_->GetOrientation()) > 4) { + return ysize(); + } else { + return xsize(); + } + } + size_t oriented_ysize() const { + if (static_cast(metadata_->GetOrientation()) > 4) { + return xsize(); + } else { + return ysize(); + } + } + + // -- COLOR + + // Whether color() is valid/usable. Returns true in most cases. Even images + // with spot colors (one example of when !planes().empty()) typically have a + // part that can be converted to RGB. + bool HasColor() const { return color_.xsize() != 0; } + + // For resetting the size when switching from a reference to main frame. + void RemoveColor() { color_ = Image3F(); } + + // Do not use if !HasColor(). + const Image3F& color() const { + // If this fails, Set* was not called - perhaps because decoding failed? + JXL_DASSERT(HasColor()); + return color_; + } + + // Do not use if !HasColor(). + Image3F* color() { + JXL_DASSERT(HasColor()); + return &color_; + } + + // If c_current.IsGray(), all planes must be identical. NOTE: c_current is + // independent of metadata()->color_encoding, which is the original, whereas + // a decoder might return pixels in a different c_current. + // This only sets the color channels, you must also make extra channels + // match the amount that is in the metadata. + void SetFromImage(Image3F&& color, const ColorEncoding& c_current); + + // -- COLOR ENCODING + + const ColorEncoding& c_current() const { return c_current_; } + + // Returns whether the color image has identical planes. Once established by + // Set*, remains unchanged until a subsequent Set* or TransformTo. + bool IsGray() const { return c_current_.IsGray(); } + + bool IsSRGB() const { return c_current_.IsSRGB(); } + bool IsLinearSRGB() const { + return c_current_.white_point == WhitePoint::kD65 && + c_current_.primaries == Primaries::kSRGB && c_current_.tf.IsLinear(); + } + + // Set the c_current profile without doing any transformation, e.g. if the + // transformation was already applied. + void OverrideProfile(const ColorEncoding& new_c_current) { + c_current_ = new_c_current; + } + + // TODO(lode): TransformTo and CopyTo are implemented in enc_image_bundle.cc, + // move these functions out of this header file and class, to + // enc_image_bundle.h. + + // Transforms color to c_desired and sets c_current to c_desired. Alpha and + // metadata remains unchanged. + Status TransformTo(const ColorEncoding& c_desired, const JxlCmsInterface& cms, + ThreadPool* pool = nullptr); + // Copies this:rect, converts to c_desired, and allocates+fills out. + Status CopyTo(const Rect& rect, const ColorEncoding& c_desired, + const JxlCmsInterface& cms, Image3F* out, + ThreadPool* pool = nullptr) const; + + // Detect 'real' bit depth, which can be lower than nominal bit depth + // (this is common in PNG), returns 'real' bit depth + size_t DetectRealBitdepth() const; + + // -- ALPHA + + void SetAlpha(ImageF&& alpha); + bool HasAlpha() const { + return metadata_->Find(ExtraChannel::kAlpha) != nullptr; + } + bool AlphaIsPremultiplied() const { + const ExtraChannelInfo* eci = metadata_->Find(ExtraChannel::kAlpha); + return (eci == nullptr) ? false : eci->alpha_associated; + } + const ImageF& alpha() const; + ImageF* alpha(); + + // -- EXTRA CHANNELS + bool HasBlack() const { + return metadata_->Find(ExtraChannel::kBlack) != nullptr; + } + const ImageF& black() const; + + // Extra channels of unknown interpretation (e.g. spot colors). + void SetExtraChannels(std::vector&& extra_channels); + void ClearExtraChannels() { extra_channels_.clear(); } + bool HasExtraChannels() const { return !extra_channels_.empty(); } + const std::vector& extra_channels() const { return extra_channels_; } + std::vector& extra_channels() { return extra_channels_; } + + const ImageMetadata* metadata() const { return metadata_; } + + void VerifyMetadata() const; + + void SetDecodedBytes(size_t decoded_bytes) { decoded_bytes_ = decoded_bytes; } + size_t decoded_bytes() const { return decoded_bytes_; } + + // -- JPEG transcoding: + + // Returns true if image does or will represent quantized DCT-8 coefficients, + // stored in 8x8 pixel regions. + bool IsJPEG() const { +#if JPEGXL_ENABLE_TRANSCODE_JPEG + return jpeg_data != nullptr; +#else // JPEGXL_ENABLE_TRANSCODE_JPEG + return false; +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + } + + std::unique_ptr jpeg_data; + // these fields are used to signal the input JPEG color space + // NOTE: JPEG doesn't actually provide a way to determine whether YCbCr was + // applied or not. + ColorTransform color_transform = ColorTransform::kNone; + YCbCrChromaSubsampling chroma_subsampling; + + FrameOrigin origin{0, 0}; + + // Animation-related information, corresponding to the timecode and duration + // fields of the jxl::AnimationFrame of the jxl::FrameHeader. + // TODO(lode): ImageBundle is used here to carry the information from + // jxl::FrameHeader, consider instead passing a jxl::FrameHeader directly to + // EncodeFrame or having a field of that type here. + uint32_t duration = 0; + uint32_t timecode = 0; + + // TODO(lode): these fields do not match the JXL frame header, it should be + // possible to specify up to 4 (3 if nonzero duration) slots to save this + // frame as reference (see save_as_reference). + bool use_for_next_frame = false; + bool blend = false; + BlendMode blendmode = BlendMode::kBlend; + + std::string name; + + private: + // Called after any Set* to ensure their sizes are compatible. + void VerifySizes() const; + + // Required for TransformTo so that an ImageBundle is self-sufficient. Always + // points to the same thing, but cannot be const-pointer because that prevents + // the compiler from generating a move ctor. + const ImageMetadata* metadata_; + + // Initialized by Set*: + Image3F color_; // If empty, planes_ is not; all planes equal if IsGray(). + ColorEncoding c_current_; // of color_ + + // Initialized by SetPlanes; size = ImageMetadata.num_extra_channels + std::vector extra_channels_; + + // How many bytes of the input were actually read. + size_t decoded_bytes_ = 0; +}; + +} // namespace jxl + +#endif // LIB_JXL_IMAGE_BUNDLE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/image_bundle_test.cc b/third-party/libjxl/libjxl/lib/jxl/image_bundle_test.cc new file mode 100644 index 0000000000..1a10598fe2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_bundle_test.cc @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/image_bundle.h" + +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(ImageBundleTest, ExtraChannelName) { + AuxOut aux_out; + BitWriter writer; + BitWriter::Allotment allotment(&writer, 99); + + ImageMetadata metadata; + ExtraChannelInfo eci; + eci.type = ExtraChannel::kBlack; + eci.name = "testK"; + metadata.extra_channel_info.push_back(std::move(eci)); + ASSERT_TRUE(WriteImageMetadata(metadata, &writer, /*layer=*/0, &aux_out)); + writer.ZeroPadToByte(); + allotment.ReclaimAndCharge(&writer, /*layer=*/0, &aux_out); + + BitReader reader(writer.GetSpan()); + ImageMetadata metadata_out; + ASSERT_TRUE(ReadImageMetadata(&reader, &metadata_out)); + EXPECT_TRUE(reader.Close()); + EXPECT_EQ("testK", metadata_out.Find(ExtraChannel::kBlack)->name); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/image_metadata.cc b/third-party/libjxl/libjxl/lib/jxl/image_metadata.cc new file mode 100644 index 0000000000..eef1f1f447 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_metadata.cc @@ -0,0 +1,477 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/image_metadata.h" + +#include +#include + +#include "lib/jxl/alpha.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/quantizer.h" + +namespace jxl { +BitDepth::BitDepth() { Bundle::Init(this); } +Status BitDepth::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &floating_point_sample)); + // The same fields (bits_per_sample and exponent_bits_per_sample) are read + // in a different way depending on floating_point_sample's value. It's still + // default-initialized correctly so using visitor->Conditional is not + // required. + if (!floating_point_sample) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val(8), Val(10), Val(12), BitsOffset(6, 1), 8, &bits_per_sample)); + exponent_bits_per_sample = 0; + } else { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val(32), Val(16), Val(24), BitsOffset(6, 1), 32, &bits_per_sample)); + // The encoded value is exponent_bits_per_sample - 1, encoded in 3 bits + // so the value can be in range [1, 8]. + const uint32_t offset = 1; + exponent_bits_per_sample -= offset; + JXL_QUIET_RETURN_IF_ERROR( + visitor->Bits(4, 8 - offset, &exponent_bits_per_sample)); + exponent_bits_per_sample += offset; + } + + // Error-checking for floating point ranges. + if (floating_point_sample) { + if (exponent_bits_per_sample < 2 || exponent_bits_per_sample > 8) { + return JXL_FAILURE("Invalid exponent_bits_per_sample: %u", + exponent_bits_per_sample); + } + int mantissa_bits = + static_cast(bits_per_sample) - exponent_bits_per_sample - 1; + if (mantissa_bits < 2 || mantissa_bits > 23) { + return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample); + } + } else { + if (bits_per_sample > 31) { + return JXL_FAILURE("Invalid bits_per_sample: %u", bits_per_sample); + } + } + return true; +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string BitDepth::DebugString() const { + std::ostringstream os; + os << (floating_point_sample ? "F" : "U"); + os << bits_per_sample; + if (floating_point_sample) os << "." << exponent_bits_per_sample; + return os.str(); +} +#endif + +CustomTransformData::CustomTransformData() { Bundle::Init(this); } +Status CustomTransformData::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + if (visitor->Conditional(nonserialized_xyb_encoded)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&opsin_inverse_matrix)); + } + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &custom_weights_mask)); + if (visitor->Conditional((custom_weights_mask & 0x1) != 0)) { + // 4 5x5 kernels, but all of them can be obtained by symmetry from one, + // which is symmetric along its main diagonal. The top-left kernel is + // defined by + // + // 0 1 2 3 4 + // 1 5 6 7 8 + // 2 6 9 10 11 + // 3 7 10 12 13 + // 4 8 11 13 14 + float constexpr kWeights2[15] = { + -0.01716200f, -0.03452303f, -0.04022174f, -0.02921014f, -0.00624645f, + 0.14111091f, 0.28896755f, 0.00278718f, -0.01610267f, 0.56661550f, + 0.03777607f, -0.01986694f, -0.03144731f, -0.01185068f, -0.00213539f}; + for (size_t i = 0; i < 15; i++) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(kWeights2[i], &upsampling2_weights[i])); + } + } + if (visitor->Conditional((custom_weights_mask & 0x2) != 0)) { + // 16 5x5 kernels, but all of them can be obtained by symmetry from + // three, two of which are symmetric along their main diagonals. The top + // left 4 kernels are defined by + // + // 0 1 2 3 4 5 6 7 8 9 + // 1 10 11 12 13 14 15 16 17 18 + // 2 11 19 20 21 22 23 24 25 26 + // 3 12 20 27 28 29 30 31 32 33 + // 4 13 21 28 34 35 36 37 38 39 + // + // 5 14 22 29 35 40 41 42 43 44 + // 6 15 23 30 36 41 45 46 47 48 + // 7 16 24 31 37 42 46 49 50 51 + // 8 17 25 32 38 43 47 50 52 53 + // 9 18 26 33 39 44 48 51 53 54 + constexpr float kWeights4[55] = { + -0.02419067f, -0.03491987f, -0.03693351f, -0.03094285f, -0.00529785f, + -0.01663432f, -0.03556863f, -0.03888905f, -0.03516850f, -0.00989469f, + 0.23651958f, 0.33392945f, -0.01073543f, -0.01313181f, -0.03556694f, + 0.13048175f, 0.40103025f, 0.03951150f, -0.02077584f, 0.46914198f, + -0.00209270f, -0.01484589f, -0.04064806f, 0.18942530f, 0.56279892f, + 0.06674400f, -0.02335494f, -0.03551682f, -0.00754830f, -0.02267919f, + -0.02363578f, 0.00315804f, -0.03399098f, -0.01359519f, -0.00091653f, + -0.00335467f, -0.01163294f, -0.01610294f, -0.00974088f, -0.00191622f, + -0.01095446f, -0.03198464f, -0.04455121f, -0.02799790f, -0.00645912f, + 0.06390599f, 0.22963888f, 0.00630981f, -0.01897349f, 0.67537268f, + 0.08483369f, -0.02534994f, -0.02205197f, -0.01667999f, -0.00384443f}; + for (size_t i = 0; i < 55; i++) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(kWeights4[i], &upsampling4_weights[i])); + } + } + if (visitor->Conditional((custom_weights_mask & 0x4) != 0)) { + // 64 5x5 kernels, all of them can be obtained by symmetry from + // 10, 4 of which are symmetric along their main diagonals. The top + // left 16 kernels are defined by + // 0 1 2 3 4 5 6 7 8 9 a b c d e f 10 11 12 13 + // 1 14 15 16 17 18 19 1a 1b 1c 1d 1e 1f 20 21 22 23 24 25 26 + // 2 15 27 28 29 2a 2b 2c 2d 2e 2f 30 31 32 33 34 35 36 37 38 + // 3 16 28 39 3a 3b 3c 3d 3e 3f 40 41 42 43 44 45 46 47 48 49 + // 4 17 29 3a 4a 4b 4c 4d 4e 4f 50 51 52 53 54 55 56 57 58 59 + + // 5 18 2a 3b 4b 5a 5b 5c 5d 5e 5f 60 61 62 63 64 65 66 67 68 + // 6 19 2b 3c 4c 5b 69 6a 6b 6c 6d 6e 6f 70 71 72 73 74 75 76 + // 7 1a 2c 3d 4d 5c 6a 77 78 79 7a 7b 7c 7d 7e 7f 80 81 82 83 + // 8 1b 2d 3e 4e 5d 6b 78 84 85 86 87 88 89 8a 8b 8c 8d 8e 8f + // 9 1c 2e 3f 4f 5e 6c 79 85 90 91 92 93 94 95 96 97 98 99 9a + + // a 1d 2f 40 50 5f 6d 7a 86 91 9b 9c 9d 9e 9f a0 a1 a2 a3 a4 + // b 1e 30 41 51 60 6e 7b 87 92 9c a5 a6 a7 a8 a9 aa ab ac ad + // c 1f 31 42 52 61 6f 7c 88 93 9d a6 ae af b0 b1 b2 b3 b4 b5 + // d 20 32 43 53 62 70 7d 89 94 9e a7 af b6 b7 b8 b9 ba bb bc + // e 21 33 44 54 63 71 7e 8a 95 9f a8 b0 b7 bd be bf c0 c1 c2 + + // f 22 34 45 55 64 72 7f 8b 96 a0 a9 b1 b8 be c3 c4 c5 c6 c7 + // 10 23 35 46 56 65 73 80 8c 97 a1 aa b2 b9 bf c4 c8 c9 ca cb + // 11 24 36 47 57 66 74 81 8d 98 a2 ab b3 ba c0 c5 c9 cc cd ce + // 12 25 37 48 58 67 75 82 8e 99 a3 ac b4 bb c1 c6 ca cd cf d0 + // 13 26 38 49 59 68 76 83 8f 9a a4 ad b5 bc c2 c7 cb ce d0 d1 + constexpr float kWeights8[210] = { + -0.02928613f, -0.03706353f, -0.03783812f, -0.03324558f, -0.00447632f, + -0.02519406f, -0.03752601f, -0.03901508f, -0.03663285f, -0.00646649f, + -0.02066407f, -0.03838633f, -0.04002101f, -0.03900035f, -0.00901973f, + -0.01626393f, -0.03954148f, -0.04046620f, -0.03979621f, -0.01224485f, + 0.29895328f, 0.35757708f, -0.02447552f, -0.01081748f, -0.04314594f, + 0.23903219f, 0.41119301f, -0.00573046f, -0.01450239f, -0.04246845f, + 0.17567618f, 0.45220643f, 0.02287757f, -0.01936783f, -0.03583255f, + 0.11572472f, 0.47416733f, 0.06284440f, -0.02685066f, 0.42720050f, + -0.02248939f, -0.01155273f, -0.04562755f, 0.28689496f, 0.49093869f, + -0.00007891f, -0.01545926f, -0.04562659f, 0.21238920f, 0.53980934f, + 0.03369474f, -0.02070211f, -0.03866988f, 0.14229550f, 0.56593398f, + 0.08045181f, -0.02888298f, -0.03680918f, -0.00542229f, -0.02920477f, + -0.02788574f, -0.02118180f, -0.03942402f, -0.00775547f, -0.02433614f, + -0.03193943f, -0.02030828f, -0.04044014f, -0.01074016f, -0.01930822f, + -0.03620399f, -0.01974125f, -0.03919545f, -0.01456093f, -0.00045072f, + -0.00360110f, -0.01020207f, -0.01231907f, -0.00638988f, -0.00071592f, + -0.00279122f, -0.00957115f, -0.01288327f, -0.00730937f, -0.00107783f, + -0.00210156f, -0.00890705f, -0.01317668f, -0.00813895f, -0.00153491f, + -0.02128481f, -0.04173044f, -0.04831487f, -0.03293190f, -0.00525260f, + -0.01720322f, -0.04052736f, -0.05045706f, -0.03607317f, -0.00738030f, + -0.01341764f, -0.03965629f, -0.05151616f, -0.03814886f, -0.01005819f, + 0.18968273f, 0.33063684f, -0.01300105f, -0.01372950f, -0.04017465f, + 0.13727832f, 0.36402234f, 0.01027890f, -0.01832107f, -0.03365072f, + 0.08734506f, 0.38194295f, 0.04338228f, -0.02525993f, 0.56408126f, + 0.00458352f, -0.01648227f, -0.04887868f, 0.24585519f, 0.62026135f, + 0.04314807f, -0.02213737f, -0.04158014f, 0.16637289f, 0.65027023f, + 0.09621636f, -0.03101388f, -0.04082742f, -0.00904519f, -0.02790922f, + -0.02117818f, 0.00798662f, -0.03995711f, -0.01243427f, -0.02231705f, + -0.02946266f, 0.00992055f, -0.03600283f, -0.01684920f, -0.00111684f, + -0.00411204f, -0.01297130f, -0.01723725f, -0.01022545f, -0.00165306f, + -0.00313110f, -0.01218016f, -0.01763266f, -0.01125620f, -0.00231663f, + -0.01374149f, -0.03797620f, -0.05142937f, -0.03117307f, -0.00581914f, + -0.01064003f, -0.03608089f, -0.05272168f, -0.03375670f, -0.00795586f, + 0.09628104f, 0.27129991f, -0.00353779f, -0.01734151f, -0.03153981f, + 0.05686230f, 0.28500998f, 0.02230594f, -0.02374955f, 0.68214326f, + 0.05018048f, -0.02320852f, -0.04383616f, 0.18459474f, 0.71517975f, + 0.10805613f, -0.03263677f, -0.03637639f, -0.01394373f, -0.02511203f, + -0.01728636f, 0.05407331f, -0.02867568f, -0.01893131f, -0.00240854f, + -0.00446511f, -0.01636187f, -0.02377053f, -0.01522848f, -0.00333334f, + -0.00819975f, -0.02964169f, -0.04499287f, -0.02745350f, -0.00612408f, + 0.02727416f, 0.19446600f, 0.00159832f, -0.02232473f, 0.74982506f, + 0.11452620f, -0.03348048f, -0.01605681f, -0.02070339f, -0.00458223f}; + for (size_t i = 0; i < 210; i++) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(kWeights8[i], &upsampling8_weights[i])); + } + } + return true; +} + +ExtraChannelInfo::ExtraChannelInfo() { Bundle::Init(this); } +Status ExtraChannelInfo::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + + // General + JXL_QUIET_RETURN_IF_ERROR(visitor->Enum(ExtraChannel::kAlpha, &type)); + + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth)); + + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), Val(3), Val(4), BitsOffset(3, 1), 0, &dim_shift)); + if ((1U << dim_shift) > 8) { + return JXL_FAILURE("dim_shift %u too large", dim_shift); + } + + JXL_QUIET_RETURN_IF_ERROR(VisitNameString(visitor, &name)); + + // Conditional + if (visitor->Conditional(type == ExtraChannel::kAlpha)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &alpha_associated)); + } + if (visitor->Conditional(type == ExtraChannel::kSpotColor)) { + for (float& c : spot_color) { + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0, &c)); + } + } + if (visitor->Conditional(type == ExtraChannel::kCFA)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(1), Bits(2), BitsOffset(4, 3), + BitsOffset(8, 19), 1, &cfa_channel)); + } + + if (type == ExtraChannel::kUnknown || + (int(ExtraChannel::kReserved0) <= int(type) && + int(type) <= int(ExtraChannel::kReserved7))) { + return JXL_FAILURE("Unknown extra channel (bits %u, shift %u, name '%s')\n", + bit_depth.bits_per_sample, dim_shift, name.c_str()); + } + return true; +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string ExtraChannelInfo::DebugString() const { + std::ostringstream os; + os << (type == ExtraChannel::kAlpha ? "Alpha" + : type == ExtraChannel::kDepth ? "Depth" + : type == ExtraChannel::kSpotColor ? "Spot" + : type == ExtraChannel::kSelectionMask ? "Mask" + : type == ExtraChannel::kBlack ? "Black" + : type == ExtraChannel::kCFA ? "CFA" + : type == ExtraChannel::kThermal ? "Thermal" + : "Unknown"); + if (type == ExtraChannel::kAlpha && alpha_associated) os << "(premul)"; + os << " " << bit_depth.DebugString(); + os << " shift: " << dim_shift; + return os.str(); +} +#endif + +ImageMetadata::ImageMetadata() { Bundle::Init(this); } +Status ImageMetadata::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + + // Bundle::AllDefault does not allow usage when reading (it may abort the + // program when a codestream has invalid values), but when reading we + // overwrite the extra_fields value, so do not need to call AllDefault. + bool tone_mapping_default = + visitor->IsReading() ? false : Bundle::AllDefault(tone_mapping); + + bool extra_fields = (orientation != 1 || have_preview || have_animation || + have_intrinsic_size || !tone_mapping_default); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &extra_fields)); + if (visitor->Conditional(extra_fields)) { + orientation--; + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(3, 0, &orientation)); + orientation++; + // (No need for bounds checking because we read exactly 3 bits) + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_intrinsic_size)); + if (visitor->Conditional(have_intrinsic_size)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&intrinsic_size)); + } + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_preview)); + if (visitor->Conditional(have_preview)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&preview_size)); + } + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &have_animation)); + if (visitor->Conditional(have_animation)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&animation)); + } + } else { + orientation = 1; // identity + have_intrinsic_size = false; + have_preview = false; + have_animation = false; + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&bit_depth)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->Bool(true, &modular_16_bit_buffer_sufficient)); + + num_extra_channels = extra_channel_info.size(); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2), + BitsOffset(12, 1), 0, + &num_extra_channels)); + + if (visitor->Conditional(num_extra_channels != 0)) { + if (visitor->IsReading()) { + extra_channel_info.resize(num_extra_channels); + } + for (ExtraChannelInfo& eci : extra_channel_info) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&eci)); + } + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &xyb_encoded)); + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&color_encoding)); + if (visitor->Conditional(extra_fields)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&tone_mapping)); + } + + // Treat as if only the fields up to extra channels exist. + if (visitor->IsReading() && nonserialized_only_parse_basic_info) { + return true; + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions)); + // Extensions: in chronological order of being added to the format. + return visitor->EndExtensions(); +} + +OpsinInverseMatrix::OpsinInverseMatrix() { Bundle::Init(this); } +Status OpsinInverseMatrix::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + for (int i = 0; i < 9; ++i) { + JXL_QUIET_RETURN_IF_ERROR(visitor->F16( + DefaultInverseOpsinAbsorbanceMatrix()[i], &inverse_matrix[i])); + } + for (int i = 0; i < 3; ++i) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(kNegOpsinAbsorbanceBiasRGB[i], &opsin_biases[i])); + } + for (int i = 0; i < 4; ++i) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(kDefaultQuantBias[i], &quant_biases[i])); + } + return true; +} + +ToneMapping::ToneMapping() { Bundle::Init(this); } +Status ToneMapping::VisitFields(Visitor* JXL_RESTRICT visitor) { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(kDefaultIntensityTarget, &intensity_target)); + if (intensity_target <= 0.f) { + return JXL_FAILURE("invalid intensity target"); + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &min_nits)); + if (min_nits < 0.f || min_nits > intensity_target) { + return JXL_FAILURE("invalid min %f vs max %f", min_nits, intensity_target); + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &relative_to_max_display)); + + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.0f, &linear_below)); + if (linear_below < 0 || (relative_to_max_display && linear_below > 1.0f)) { + return JXL_FAILURE("invalid linear_below %f (%s)", linear_below, + relative_to_max_display ? "relative" : "absolute"); + } + + return true; +} + +Status ReadImageMetadata(BitReader* JXL_RESTRICT reader, + ImageMetadata* JXL_RESTRICT metadata) { + return Bundle::Read(reader, metadata); +} + +void ImageMetadata::SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied) { + std::vector& eciv = extra_channel_info; + ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha); + if (bits == 0) { + if (alpha != nullptr) { + // Remove the alpha channel from the extra channel info. It's + // theoretically possible that there are multiple, remove all in that + // case. This ensure a next HasAlpha() will return false. + const auto is_alpha = [](const ExtraChannelInfo& eci) { + return eci.type == ExtraChannel::kAlpha; + }; + eciv.erase(std::remove_if(eciv.begin(), eciv.end(), is_alpha), + eciv.end()); + } + } else { + if (alpha == nullptr) { + ExtraChannelInfo info; + info.type = ExtraChannel::kAlpha; + info.bit_depth.bits_per_sample = bits; + info.dim_shift = 0; + info.alpha_associated = alpha_is_premultiplied; + // Prepend rather than append: in case there already are other extra + // channels, prefer alpha channel to be listed first. + eciv.insert(eciv.begin(), info); + } else { + // Ignores potential extra alpha channels, only sets to first one. + alpha->bit_depth.bits_per_sample = bits; + alpha->bit_depth.floating_point_sample = false; + alpha->bit_depth.exponent_bits_per_sample = 0; + alpha->alpha_associated = alpha_is_premultiplied; + } + } + num_extra_channels = extra_channel_info.size(); + if (bits > 12) modular_16_bit_buffer_sufficient = false; +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string ImageMetadata::DebugString() const { + std::ostringstream os; + os << bit_depth.DebugString(); + if (modular_16_bit_buffer_sufficient) { + os << " (modular 16)"; + } + os << (xyb_encoded ? " xyb encoded" : " orig profile"); + os << " " << Description(color_encoding); + if (num_extra_channels > 0) { + os << " extra channels:"; + for (size_t i = 0; i < num_extra_channels; ++i) { + os << " (" << extra_channel_info[i].DebugString() << ")"; + if (i + 1 < num_extra_channels) os << ","; + } + } + if (have_preview) { + os << " preview: " << preview_size.xsize() << "x" << preview_size.ysize(); + } + if (orientation != 1) { + os << " orientation: " << orientation; + } + return os.str(); +} + +std::string CodecMetadata::DebugString() const { + std::ostringstream os; + os << size.xsize() << "x" << size.ysize(); + os << " " << m.DebugString(); + return os.str(); +} +#endif + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/image_metadata.h b/third-party/libjxl/libjxl/lib/jxl/image_metadata.h new file mode 100644 index 0000000000..ca69eb3a3d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_metadata.h @@ -0,0 +1,425 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Main codestream header bundles, the metadata that applies to all frames. +// Enums must align with the C API definitions in codestream_header.h. + +#ifndef LIB_JXL_IMAGE_METADATA_H_ +#define LIB_JXL_IMAGE_METADATA_H_ + +#include +#include +#include + +#include +#include + +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/jpeg/jpeg_data.h" +#include "lib/jxl/opsin_params.h" + +namespace jxl { + +struct AuxOut; + +// EXIF orientation of the image. This field overrides any field present in +// actual EXIF metadata. The value tells which transformation the decoder must +// apply after decoding to display the image with the correct orientation. +enum class Orientation : uint32_t { + // Values 1..8 match the EXIF definitions. + kIdentity = JXL_ORIENT_IDENTITY, + kFlipHorizontal = JXL_ORIENT_FLIP_HORIZONTAL, + kRotate180 = JXL_ORIENT_ROTATE_180, + kFlipVertical = JXL_ORIENT_FLIP_VERTICAL, + kTranspose = JXL_ORIENT_TRANSPOSE, + kRotate90 = JXL_ORIENT_ROTATE_90_CW, + kAntiTranspose = JXL_ORIENT_ANTI_TRANSPOSE, + kRotate270 = JXL_ORIENT_ROTATE_90_CCW, +}; +// Don't need an EnumBits because Orientation is not read via Enum(). + +enum class ExtraChannel : uint32_t { + // First two enumerators (most common) are cheaper to encode + kAlpha = JXL_CHANNEL_ALPHA, + kDepth = JXL_CHANNEL_DEPTH, + + kSpotColor = JXL_CHANNEL_SPOT_COLOR, + kSelectionMask = JXL_CHANNEL_SELECTION_MASK, + kBlack = JXL_CHANNEL_BLACK, // for CMYK + kCFA = JXL_CHANNEL_CFA, // Bayer channel + kThermal = JXL_CHANNEL_THERMAL, + kReserved0 = JXL_CHANNEL_RESERVED0, + kReserved1 = JXL_CHANNEL_RESERVED1, + kReserved2 = JXL_CHANNEL_RESERVED2, + kReserved3 = JXL_CHANNEL_RESERVED3, + kReserved4 = JXL_CHANNEL_RESERVED4, + kReserved5 = JXL_CHANNEL_RESERVED5, + kReserved6 = JXL_CHANNEL_RESERVED6, + kReserved7 = JXL_CHANNEL_RESERVED7, + // disambiguated via name string, raise warning if unsupported + kUnknown = JXL_CHANNEL_UNKNOWN, + // like kUnknown but can silently be ignored + kOptional = JXL_CHANNEL_OPTIONAL +}; +static inline const char* EnumName(ExtraChannel /*unused*/) { + return "ExtraChannel"; +} +static inline constexpr uint64_t EnumBits(ExtraChannel /*unused*/) { + using EC = ExtraChannel; + return MakeBit(EC::kAlpha) | MakeBit(EC::kDepth) | MakeBit(EC::kSpotColor) | + MakeBit(EC::kSelectionMask) | MakeBit(EC::kBlack) | MakeBit(EC::kCFA) | + MakeBit(EC::kThermal) | MakeBit(EC::kUnknown) | MakeBit(EC::kOptional); +} + +// Used in ImageMetadata and ExtraChannelInfo. +struct BitDepth : public Fields { + BitDepth(); + JXL_FIELDS_NAME(BitDepth) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + std::string DebugString() const; + + // Whether the original (uncompressed) samples are floating point or + // unsigned integer. + bool floating_point_sample; + + // Bit depth of the original (uncompressed) image samples. Must be in the + // range [1, 32]. + uint32_t bits_per_sample; + + // Floating point exponent bits of the original (uncompressed) image samples, + // only used if floating_point_sample is true. + // If used, the samples are floating point with: + // - 1 sign bit + // - exponent_bits_per_sample exponent bits + // - (bits_per_sample - exponent_bits_per_sample - 1) mantissa bits + // If used, exponent_bits_per_sample must be in the range + // [2, 8] and amount of mantissa bits must be in the range [2, 23]. + // NOTE: exponent_bits_per_sample is 8 for single precision binary32 + // point, 5 for half precision binary16, 7 for fp24. + uint32_t exponent_bits_per_sample; +}; + +// Describes one extra channel. +struct ExtraChannelInfo : public Fields { + ExtraChannelInfo(); + JXL_FIELDS_NAME(ExtraChannelInfo) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + std::string DebugString() const; + + mutable bool all_default; + + ExtraChannel type; + BitDepth bit_depth; + uint32_t dim_shift; // downsampled by 2^dim_shift on each axis + + std::string name; // UTF-8 + + // Conditional: + bool alpha_associated; // i.e. premultiplied + float spot_color[4]; // spot color in linear RGBA + uint32_t cfa_channel; +}; + +struct OpsinInverseMatrix : public Fields { + OpsinInverseMatrix(); + JXL_FIELDS_NAME(OpsinInverseMatrix) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + mutable bool all_default; + + float inverse_matrix[9]; + float opsin_biases[3]; + float quant_biases[4]; +}; + +// Information useful for mapping HDR images to lower dynamic range displays. +struct ToneMapping : public Fields { + ToneMapping(); + JXL_FIELDS_NAME(ToneMapping) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + mutable bool all_default; + + // Upper bound on the intensity level present in the image. For unsigned + // integer pixel encodings, this is the brightness of the largest + // representable value. The image does not necessarily contain a pixel + // actually this bright. An encoder is allowed to set 255 for SDR images + // without computing a histogram. + float intensity_target; // [nits] + + // Lower bound on the intensity level present in the image. This may be + // loose, i.e. lower than the actual darkest pixel. When tone mapping, a + // decoder will map [min_nits, intensity_target] to the display range. + float min_nits; + + bool relative_to_max_display; // see below + // The tone mapping will leave unchanged (linear mapping) any pixels whose + // brightness is strictly below this. The interpretation depends on + // relative_to_max_display. If true, this is a ratio [0, 1] of the maximum + // display brightness [nits], otherwise an absolute brightness [nits]. + float linear_below; +}; + +// Contains weights to customize some trasnforms - in particular, XYB and +// upsampling. +struct CustomTransformData : public Fields { + CustomTransformData(); + JXL_FIELDS_NAME(CustomTransformData) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // Must be set before calling VisitFields. Must equal xyb_encoded of + // ImageMetadata, should be set by ImageMetadata during VisitFields. + bool nonserialized_xyb_encoded = false; + + mutable bool all_default; + + OpsinInverseMatrix opsin_inverse_matrix; + + uint32_t custom_weights_mask; + float upsampling2_weights[15]; + float upsampling4_weights[55]; + float upsampling8_weights[210]; +}; + +// Properties of the original image bundle. This enables Encode(Decode()) to +// re-create an equivalent image without user input. +struct ImageMetadata : public Fields { + ImageMetadata(); + JXL_FIELDS_NAME(ImageMetadata) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + // Returns bit depth of the JPEG XL compressed alpha channel, or 0 if no alpha + // channel present. In the theoretical case that there are multiple alpha + // channels, returns the bit depht of the first. + uint32_t GetAlphaBits() const { + const ExtraChannelInfo* alpha = Find(ExtraChannel::kAlpha); + if (alpha == nullptr) return 0; + JXL_ASSERT(alpha->bit_depth.bits_per_sample != 0); + return alpha->bit_depth.bits_per_sample; + } + + // Sets bit depth of alpha channel, adding extra channel if needed, or + // removing all alpha channels if bits is 0. + // Assumes integer alpha channel and not designed to support multiple + // alpha channels (it's possible to use those features by manipulating + // extra_channel_info directly). + // + // Callers must insert the actual channel image at the same index before any + // further modifications to extra_channel_info. + void SetAlphaBits(uint32_t bits, bool alpha_is_premultiplied = false); + + bool HasAlpha() const { return GetAlphaBits() != 0; } + + // Sets the original bit depth fields to indicate unsigned integer of the + // given bit depth. + // TODO(lode): move function to BitDepth + void SetUintSamples(uint32_t bits) { + bit_depth.bits_per_sample = bits; + bit_depth.exponent_bits_per_sample = 0; + bit_depth.floating_point_sample = false; + // RCT / Squeeze may add one bit each, and this is about int16_t, + // so uint13 should still be OK but limiting it to 12 seems safer. + // TODO(jon): figure out a better way to set this header field. + // (in particular, if modular mode is not used it doesn't matter, + // and if transforms are restricted, up to 15-bit could be done) + if (bits > 12) modular_16_bit_buffer_sufficient = false; + } + // Sets the original bit depth fields to indicate single precision floating + // point. + // TODO(lode): move function to BitDepth + void SetFloat32Samples() { + bit_depth.bits_per_sample = 32; + bit_depth.exponent_bits_per_sample = 8; + bit_depth.floating_point_sample = true; + modular_16_bit_buffer_sufficient = false; + } + + void SetFloat16Samples() { + bit_depth.bits_per_sample = 16; + bit_depth.exponent_bits_per_sample = 5; + bit_depth.floating_point_sample = true; + modular_16_bit_buffer_sufficient = false; + } + + void SetIntensityTarget(float intensity_target) { + tone_mapping.intensity_target = intensity_target; + } + float IntensityTarget() const { + JXL_ASSERT(tone_mapping.intensity_target != 0); + return tone_mapping.intensity_target; + } + + // Returns first ExtraChannelInfo of the given type, or nullptr if none. + const ExtraChannelInfo* Find(ExtraChannel type) const { + for (const ExtraChannelInfo& eci : extra_channel_info) { + if (eci.type == type) return &eci; + } + return nullptr; + } + + // Returns first ExtraChannelInfo of the given type, or nullptr if none. + ExtraChannelInfo* Find(ExtraChannel type) { + for (ExtraChannelInfo& eci : extra_channel_info) { + if (eci.type == type) return &eci; + } + return nullptr; + } + + Orientation GetOrientation() const { + return static_cast(orientation); + } + + bool ExtraFieldsDefault() const; + + std::string DebugString() const; + + mutable bool all_default; + + BitDepth bit_depth; + bool modular_16_bit_buffer_sufficient; // otherwise 32 is. + + // Whether the colors values of the pixels of frames are encoded in the + // codestream using the absolute XYB color space, or the using values that + // follow the color space defined by the ColorEncoding or ICC profile. This + // determines when or whether a CMS (Color Management System) is needed to get + // the pixels in a desired color space. In one case, the pixels have one known + // color space and a CMS is needed to convert them to the original image's + // color space, in the other case the pixels have the color space of the + // original image and a CMS is required if a different display space, or a + // single known consistent color space for multiple decoded images, is + // desired. In all cases, the color space of all frames from a single image is + // the same, both VarDCT and modular frames. + // + // If true: then frames can be decoded to XYB (which can also be converted to + // linear and non-linear sRGB with the built in conversion without CMS). The + // attached ColorEncoding or ICC profile has no effect on the meaning of the + // pixel's color values, but instead indicates what the color profile of the + // original image was, and what color profile one should convert to when + // decoding to integers to prevent clipping and precision loss. To do that + // conversion requires a CMS. + // + // If false: then the color values of decoded frames are in the space defined + // by the attached ColorEncoding or ICC profile. To instead get the pixels in + // a chosen known color space, such as sRGB, requires a CMS, since the + // attached ColorEncoding or ICC profile could be any arbitrary color space. + // This mode is typically used for lossless images encoded as integers. + // Frames can also use YCbCr encoding, some frames may and some may not, but + // this is not a different color space but a certain encoding of the RGB + // values. + // + // Note: if !xyb_encoded, but the attached color profile indicates XYB (which + // can happen either if it's a ColorEncoding with color_space_ == + // ColorSpace::kXYB, or if it's an ICC Profile that has been crafted to + // represent XYB), then the frames still may not use ColorEncoding kXYB, they + // must still use kNone (or kYCbCr, which would mean applying the YCbCr + // transform to the 3-channel XYB data), since with !xyb_encoded, the 3 + // channels are stored as-is, no matter what meaning the color profile assigns + // to them. To use ColorEncoding::kXYB, xyb_encoded must be true. + // + // This value is defined in image metadata because this is the global + // codestream header. This value does not affect the image itself, so is not + // image metadata per se, it only affects the encoding, and what color space + // the decoder can receive the pixels in without needing a CMS. + bool xyb_encoded; + + ColorEncoding color_encoding; + + // These values are initialized to defaults such that the 'extra_fields' + // condition in VisitFields uses correctly initialized values. + uint32_t orientation = 1; + bool have_preview = false; + bool have_animation = false; + bool have_intrinsic_size = false; + + // If present, the stored image has the dimensions of the first SizeHeader, + // but decoders are advised to resample or display per `intrinsic_size`. + SizeHeader intrinsic_size; // only if have_intrinsic_size + + ToneMapping tone_mapping; + + // When reading: deserialized. When writing: automatically set from vector. + uint32_t num_extra_channels; + std::vector extra_channel_info; + + // Only present if m.have_preview. + PreviewHeader preview_size; + // Only present if m.have_animation. + AnimationHeader animation; + + uint64_t extensions; + + // Option to stop parsing after basic info, and treat as if the later + // fields do not participate. Use to parse only basic image information + // excluding the final larger or variable sized data. + bool nonserialized_only_parse_basic_info = false; +}; + +Status ReadImageMetadata(BitReader* JXL_RESTRICT reader, + ImageMetadata* JXL_RESTRICT metadata); + +Status WriteImageMetadata(const ImageMetadata& metadata, + BitWriter* JXL_RESTRICT writer, size_t layer, + AuxOut* aux_out); + +// All metadata applicable to the entire codestream (dimensions, extra channels, +// ...) +struct CodecMetadata { + // TODO(lode): use the preview and animation fields too, in place of the + // nonserialized_ ones in ImageMetadata. + ImageMetadata m; + // The size of the codestream: this is the nominal size applicable to all + // frames, although some frames can have a different effective size through + // crop, dc_level or representing a the preview. + SizeHeader size; + // Often default. + CustomTransformData transform_data; + + size_t xsize() const { return size.xsize(); } + size_t ysize() const { return size.ysize(); } + size_t oriented_xsize(bool keep_orientation) const { + if (static_cast(m.GetOrientation()) > 4 && !keep_orientation) { + return ysize(); + } else { + return xsize(); + } + } + size_t oriented_preview_xsize(bool keep_orientation) const { + if (static_cast(m.GetOrientation()) > 4 && !keep_orientation) { + return m.preview_size.ysize(); + } else { + return m.preview_size.xsize(); + } + } + size_t oriented_ysize(bool keep_orientation) const { + if (static_cast(m.GetOrientation()) > 4 && !keep_orientation) { + return xsize(); + } else { + return ysize(); + } + } + size_t oriented_preview_ysize(bool keep_orientation) const { + if (static_cast(m.GetOrientation()) > 4 && !keep_orientation) { + return m.preview_size.xsize(); + } else { + return m.preview_size.ysize(); + } + } + + std::string DebugString() const; +}; + +} // namespace jxl + +#endif // LIB_JXL_IMAGE_METADATA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/image_ops.h b/third-party/libjxl/libjxl/lib/jxl/image_ops.h new file mode 100644 index 0000000000..574a6104d4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_ops.h @@ -0,0 +1,561 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_IMAGE_OPS_H_ +#define LIB_JXL_IMAGE_OPS_H_ + +// Operations on images. + +#include +#include +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image.h" + +namespace jxl { + +template +void CopyImageTo(const Plane& from, Plane* JXL_RESTRICT to) { + JXL_ASSERT(SameSize(from, *to)); + if (from.ysize() == 0 || from.xsize() == 0) return; + for (size_t y = 0; y < from.ysize(); ++y) { + const T* JXL_RESTRICT row_from = from.ConstRow(y); + T* JXL_RESTRICT row_to = to->Row(y); + memcpy(row_to, row_from, from.xsize() * sizeof(T)); + } +} + +// Copies `from:rect_from` to `to:rect_to`. +template +void CopyImageTo(const Rect& rect_from, const Plane& from, + const Rect& rect_to, Plane* JXL_RESTRICT to) { + JXL_DASSERT(SameSize(rect_from, rect_to)); + JXL_DASSERT(rect_from.IsInside(from)); + JXL_DASSERT(rect_to.IsInside(*to)); + if (rect_from.xsize() == 0) return; + for (size_t y = 0; y < rect_from.ysize(); ++y) { + const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y); + T* JXL_RESTRICT row_to = rect_to.Row(to, y); + memcpy(row_to, row_from, rect_from.xsize() * sizeof(T)); + } +} + +// Copies `from:rect_from` to `to:rect_to`. +template +void CopyImageTo(const Rect& rect_from, const Image3& from, + const Rect& rect_to, Image3* JXL_RESTRICT to) { + JXL_ASSERT(SameSize(rect_from, rect_to)); + for (size_t c = 0; c < 3; c++) { + CopyImageTo(rect_from, from.Plane(c), rect_to, &to->Plane(c)); + } +} + +template +void ConvertPlaneAndClamp(const Rect& rect_from, const Plane& from, + const Rect& rect_to, Plane* JXL_RESTRICT to) { + JXL_ASSERT(SameSize(rect_from, rect_to)); + using M = decltype(T() + U()); + for (size_t y = 0; y < rect_to.ysize(); ++y) { + const T* JXL_RESTRICT row_from = rect_from.ConstRow(from, y); + U* JXL_RESTRICT row_to = rect_to.Row(to, y); + for (size_t x = 0; x < rect_to.xsize(); ++x) { + row_to[x] = + std::min(std::max(row_from[x], std::numeric_limits::min()), + std::numeric_limits::max()); + } + } +} + +// Copies `from` to `to`. +template +void CopyImageTo(const T& from, T* JXL_RESTRICT to) { + return CopyImageTo(Rect(from), from, Rect(*to), to); +} + +// Copies `from:rect_from` to `to:rect_to`; also copies `padding` pixels of +// border around `from:rect_from`, in all directions, whenever they are inside +// the first image. +template +void CopyImageToWithPadding(const Rect& from_rect, const T& from, + size_t padding, const Rect& to_rect, T* to) { + size_t xextra0 = std::min(padding, from_rect.x0()); + size_t xextra1 = + std::min(padding, from.xsize() - from_rect.x0() - from_rect.xsize()); + size_t yextra0 = std::min(padding, from_rect.y0()); + size_t yextra1 = + std::min(padding, from.ysize() - from_rect.y0() - from_rect.ysize()); + JXL_DASSERT(to_rect.x0() >= xextra0); + JXL_DASSERT(to_rect.y0() >= yextra0); + + return CopyImageTo(Rect(from_rect.x0() - xextra0, from_rect.y0() - yextra0, + from_rect.xsize() + xextra0 + xextra1, + from_rect.ysize() + yextra0 + yextra1), + from, + Rect(to_rect.x0() - xextra0, to_rect.y0() - yextra0, + to_rect.xsize() + xextra0 + xextra1, + to_rect.ysize() + yextra0 + yextra1), + to); +} + +template +void Subtract(const ImageIn& image1, const ImageIn& image2, ImageOut* out) { + using T = typename ImageIn::T; + const size_t xsize = image1.xsize(); + const size_t ysize = image1.ysize(); + JXL_CHECK(xsize == image2.xsize()); + JXL_CHECK(ysize == image2.ysize()); + + for (size_t y = 0; y < ysize; ++y) { + const T* const JXL_RESTRICT row1 = image1.Row(y); + const T* const JXL_RESTRICT row2 = image2.Row(y); + T* const JXL_RESTRICT row_out = out->Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row1[x] - row2[x]; + } + } +} + +// In-place. +template +void SubtractFrom(const Plane& what, Plane* to) { + const size_t xsize = what.xsize(); + const size_t ysize = what.ysize(); + for (size_t y = 0; y < ysize; ++y) { + const Tin* JXL_RESTRICT row_what = what.ConstRow(y); + Tout* JXL_RESTRICT row_to = to->Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_to[x] -= row_what[x]; + } + } +} + +// In-place. +template +void AddTo(const Plane& what, Plane* to) { + const size_t xsize = what.xsize(); + const size_t ysize = what.ysize(); + for (size_t y = 0; y < ysize; ++y) { + const Tin* JXL_RESTRICT row_what = what.ConstRow(y); + Tout* JXL_RESTRICT row_to = to->Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_to[x] += row_what[x]; + } + } +} + +template +void AddTo(Rect rectFrom, const Plane& what, Rect rectTo, + Plane* to) { + JXL_ASSERT(SameSize(rectFrom, rectTo)); + const size_t xsize = rectTo.xsize(); + const size_t ysize = rectTo.ysize(); + for (size_t y = 0; y < ysize; ++y) { + const Tin* JXL_RESTRICT row_what = rectFrom.ConstRow(what, y); + Tout* JXL_RESTRICT row_to = rectTo.Row(to, y); + for (size_t x = 0; x < xsize; ++x) { + row_to[x] += row_what[x]; + } + } +} + +// Returns linear combination of two grayscale images. +template +Plane LinComb(const T lambda1, const Plane& image1, const T lambda2, + const Plane& image2) { + const size_t xsize = image1.xsize(); + const size_t ysize = image1.ysize(); + JXL_CHECK(xsize == image2.xsize()); + JXL_CHECK(ysize == image2.ysize()); + Plane out(xsize, ysize); + for (size_t y = 0; y < ysize; ++y) { + const T* const JXL_RESTRICT row1 = image1.Row(y); + const T* const JXL_RESTRICT row2 = image2.Row(y); + T* const JXL_RESTRICT row_out = out.Row(y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = lambda1 * row1[x] + lambda2 * row2[x]; + } + } + return out; +} + +// Multiplies image by lambda in-place +template +void ScaleImage(const T lambda, Plane* image) { + for (size_t y = 0; y < image->ysize(); ++y) { + T* const JXL_RESTRICT row = image->Row(y); + for (size_t x = 0; x < image->xsize(); ++x) { + row[x] = lambda * row[x]; + } + } +} + +// Multiplies image by lambda in-place +template +void ScaleImage(const T lambda, Image3* image) { + for (size_t c = 0; c < 3; ++c) { + ScaleImage(lambda, &image->Plane(c)); + } +} + +template +Plane Product(const Plane& a, const Plane& b) { + Plane c(a.xsize(), a.ysize()); + for (size_t y = 0; y < a.ysize(); ++y) { + const T* const JXL_RESTRICT row_a = a.Row(y); + const T* const JXL_RESTRICT row_b = b.Row(y); + T* const JXL_RESTRICT row_c = c.Row(y); + for (size_t x = 0; x < a.xsize(); ++x) { + row_c[x] = row_a[x] * row_b[x]; + } + } + return c; +} + +template +void FillImage(const T value, Plane* image) { + for (size_t y = 0; y < image->ysize(); ++y) { + T* const JXL_RESTRICT row = image->Row(y); + for (size_t x = 0; x < image->xsize(); ++x) { + row[x] = value; + } + } +} + +template +void ZeroFillImage(Plane* image) { + if (image->xsize() == 0) return; + for (size_t y = 0; y < image->ysize(); ++y) { + T* const JXL_RESTRICT row = image->Row(y); + memset(row, 0, image->xsize() * sizeof(T)); + } +} + +// Mirrors out of bounds coordinates and returns valid coordinates unchanged. +// We assume the radius (distance outside the image) is small compared to the +// image size, otherwise this might not terminate. +// The mirror is outside the last column (border pixel is also replicated). +static inline int64_t Mirror(int64_t x, const int64_t xsize) { + JXL_DASSERT(xsize != 0); + + // TODO(janwas): replace with branchless version + while (x < 0 || x >= xsize) { + if (x < 0) { + x = -x - 1; + } else { + x = 2 * xsize - 1 - x; + } + } + return x; +} + +// Wrap modes for ensuring X/Y coordinates are in the valid range [0, size): + +// Mirrors (repeating the edge pixel once). Useful for convolutions. +struct WrapMirror { + JXL_INLINE int64_t operator()(const int64_t coord, const int64_t size) const { + return Mirror(coord, size); + } +}; + +// Returns the same coordinate: required for TFNode with Border(), or useful +// when we know "coord" is already valid (e.g. interior of an image). +struct WrapUnchanged { + JXL_INLINE int64_t operator()(const int64_t coord, int64_t /*size*/) const { + return coord; + } +}; + +// Similar to Wrap* but for row pointers (reduces Row() multiplications). + +class WrapRowMirror { + public: + template + WrapRowMirror(const ImageOrView& image, size_t ysize) + : first_row_(image.ConstRow(0)), last_row_(image.ConstRow(ysize - 1)) {} + + const float* operator()(const float* const JXL_RESTRICT row, + const int64_t stride) const { + if (row < first_row_) { + const int64_t num_before = first_row_ - row; + // Mirrored; one row before => row 0, two before = row 1, ... + return first_row_ + num_before - stride; + } + if (row > last_row_) { + const int64_t num_after = row - last_row_; + // Mirrored; one row after => last row, two after = last - 1, ... + return last_row_ - num_after + stride; + } + return row; + } + + private: + const float* const JXL_RESTRICT first_row_; + const float* const JXL_RESTRICT last_row_; +}; + +struct WrapRowUnchanged { + JXL_INLINE const float* operator()(const float* const JXL_RESTRICT row, + int64_t /*stride*/) const { + return row; + } +}; + +// Sets "thickness" pixels on each border to "value". This is faster than +// initializing the entire image and overwriting valid/interior pixels. +template +void SetBorder(const size_t thickness, const T value, Plane* image) { + const size_t xsize = image->xsize(); + const size_t ysize = image->ysize(); + // Top: fill entire row + for (size_t y = 0; y < std::min(thickness, ysize); ++y) { + T* const JXL_RESTRICT row = image->Row(y); + std::fill(row, row + xsize, value); + } + + // Bottom: fill entire row + for (size_t y = ysize - thickness; y < ysize; ++y) { + T* const JXL_RESTRICT row = image->Row(y); + std::fill(row, row + xsize, value); + } + + // Left/right: fill the 'columns' on either side, but only if the image is + // big enough that they don't already belong to the top/bottom rows. + if (ysize >= 2 * thickness) { + for (size_t y = thickness; y < ysize - thickness; ++y) { + T* const JXL_RESTRICT row = image->Row(y); + std::fill(row, row + thickness, value); + std::fill(row + xsize - thickness, row + xsize, value); + } + } +} + +// Computes the minimum and maximum pixel value. +template +void ImageMinMax(const Plane& image, T* const JXL_RESTRICT min, + T* const JXL_RESTRICT max) { + *min = std::numeric_limits::max(); + *max = std::numeric_limits::lowest(); + for (size_t y = 0; y < image.ysize(); ++y) { + const T* const JXL_RESTRICT row = image.Row(y); + for (size_t x = 0; x < image.xsize(); ++x) { + *min = std::min(*min, row[x]); + *max = std::max(*max, row[x]); + } + } +} + +// Copies pixels, scaling their value relative to the "from" min/max by +// "to_range". Example: U8 [0, 255] := [0.0, 1.0], to_range = 1.0 => +// outputs [0.0, 1.0]. +template +void ImageConvert(const Plane& from, const float to_range, + Plane* const JXL_RESTRICT to) { + JXL_ASSERT(SameSize(from, *to)); + FromType min_from, max_from; + ImageMinMax(from, &min_from, &max_from); + const float scale = to_range / (max_from - min_from); + for (size_t y = 0; y < from.ysize(); ++y) { + const FromType* const JXL_RESTRICT row_from = from.Row(y); + ToType* const JXL_RESTRICT row_to = to->Row(y); + for (size_t x = 0; x < from.xsize(); ++x) { + row_to[x] = static_cast((row_from[x] - min_from) * scale); + } + } +} + +template +Plane ConvertToFloat(const Plane& from) { + float factor = 1.0f / std::numeric_limits::max(); + if (std::is_same::value || std::is_same::value) { + factor = 1.0f; + } + Plane to(from.xsize(), from.ysize()); + for (size_t y = 0; y < from.ysize(); ++y) { + const From* const JXL_RESTRICT row_from = from.Row(y); + float* const JXL_RESTRICT row_to = to.Row(y); + for (size_t x = 0; x < from.xsize(); ++x) { + row_to[x] = row_from[x] * factor; + } + } + return to; +} + +template +Plane ImageFromPacked(const std::vector& packed, const size_t xsize, + const size_t ysize) { + Plane out(xsize, ysize); + for (size_t y = 0; y < ysize; ++y) { + T* const JXL_RESTRICT row = out.Row(y); + const T* const JXL_RESTRICT packed_row = &packed[y * xsize]; + memcpy(row, packed_row, xsize * sizeof(T)); + } + return out; +} + +template +void Image3Max(const Image3& image, std::array* out_max) { + for (size_t c = 0; c < 3; ++c) { + T max = std::numeric_limits::min(); + for (size_t y = 0; y < image.ysize(); ++y) { + const T* JXL_RESTRICT row = image.ConstPlaneRow(c, y); + for (size_t x = 0; x < image.xsize(); ++x) { + max = std::max(max, row[x]); + } + } + (*out_max)[c] = max; + } +} + +// Computes the sum of the pixels in `rect`. +template +T ImageSum(const Plane& image, const Rect& rect) { + T result = 0; + for (size_t y = 0; y < rect.ysize(); ++y) { + const T* JXL_RESTRICT row = rect.ConstRow(image, y); + for (size_t x = 0; x < rect.xsize(); ++x) { + result += row[x]; + } + } + return result; +} + +template +std::vector PackedFromImage(const Plane& image, const Rect& rect) { + const size_t xsize = rect.xsize(); + const size_t ysize = rect.ysize(); + std::vector packed(xsize * ysize); + for (size_t y = 0; y < rect.ysize(); ++y) { + memcpy(&packed[y * xsize], rect.ConstRow(image, y), xsize * sizeof(T)); + } + return packed; +} + +template +std::vector PackedFromImage(const Plane& image) { + return PackedFromImage(image, Rect(image)); +} + +template +Image3F ConvertToFloat(const Image3& from) { + return Image3F(ConvertToFloat(from.Plane(0)), ConvertToFloat(from.Plane(1)), + ConvertToFloat(from.Plane(2))); +} + +template +void Subtract(const Image3& image1, const Image3& image2, + Image3* out) { + const size_t xsize = image1.xsize(); + const size_t ysize = image1.ysize(); + JXL_CHECK(xsize == image2.xsize()); + JXL_CHECK(ysize == image2.ysize()); + + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < ysize; ++y) { + const Tin* const JXL_RESTRICT row1 = image1.ConstPlaneRow(c, y); + const Tin* const JXL_RESTRICT row2 = image2.ConstPlaneRow(c, y); + Tout* const JXL_RESTRICT row_out = out->PlaneRow(c, y); + for (size_t x = 0; x < xsize; ++x) { + row_out[x] = row1[x] - row2[x]; + } + } + } +} + +// Adds `what` of the size of `rect` to `to` in the position of `rect`. +template +void AddTo(const Rect& rect, const Image3& what, Image3* to) { + const size_t xsize = what.xsize(); + const size_t ysize = what.ysize(); + JXL_ASSERT(xsize == rect.xsize()); + JXL_ASSERT(ysize == rect.ysize()); + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < ysize; ++y) { + const Tin* JXL_RESTRICT row_what = what.ConstPlaneRow(c, y); + Tout* JXL_RESTRICT row_to = rect.PlaneRow(to, c, y); + for (size_t x = 0; x < xsize; ++x) { + row_to[x] += row_what[x]; + } + } + } +} + +// Initializes all planes to the same "value". +template +void FillImage(const T value, Image3* image) { + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < image->ysize(); ++y) { + T* JXL_RESTRICT row = image->PlaneRow(c, y); + for (size_t x = 0; x < image->xsize(); ++x) { + row[x] = value; + } + } + } +} + +template +void FillPlane(const T value, Plane* image) { + for (size_t y = 0; y < image->ysize(); ++y) { + T* JXL_RESTRICT row = image->Row(y); + for (size_t x = 0; x < image->xsize(); ++x) { + row[x] = value; + } + } +} + +template +void FillImage(const T value, Image3* image, Rect rect) { + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < rect.ysize(); ++y) { + T* JXL_RESTRICT row = rect.PlaneRow(image, c, y); + for (size_t x = 0; x < rect.xsize(); ++x) { + row[x] = value; + } + } + } +} + +template +void FillPlane(const T value, Plane* image, Rect rect) { + for (size_t y = 0; y < rect.ysize(); ++y) { + T* JXL_RESTRICT row = rect.Row(image, y); + for (size_t x = 0; x < rect.xsize(); ++x) { + row[x] = value; + } + } +} + +template +void ZeroFillImage(Image3* image) { + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < image->ysize(); ++y) { + T* JXL_RESTRICT row = image->PlaneRow(c, y); + if (image->xsize() != 0) memset(row, 0, image->xsize() * sizeof(T)); + } + } +} + +template +void ZeroFillPlane(Plane* image, Rect rect) { + for (size_t y = 0; y < rect.ysize(); ++y) { + T* JXL_RESTRICT row = rect.Row(image, y); + memset(row, 0, rect.xsize() * sizeof(T)); + } +} + +// Same as above, but operates in-place. Assumes that the `in` image was +// allocated large enough. +void PadImageToBlockMultipleInPlace(Image3F* JXL_RESTRICT in, + size_t block_dim = kBlockDim); + +// Downsamples an image by a given factor. +void DownsampleImage(Image3F* opsin, size_t factor); +void DownsampleImage(ImageF* image, size_t factor); + +} // namespace jxl + +#endif // LIB_JXL_IMAGE_OPS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/image_ops_test.cc b/third-party/libjxl/libjxl/lib/jxl/image_ops_test.cc new file mode 100644 index 0000000000..44c021513d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_ops_test.cc @@ -0,0 +1,164 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/image_ops.h" + +#include +#include +#include + +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +template +void TestPacked(const size_t xsize, const size_t ysize) { + Plane image1(xsize, ysize); + RandomFillImage(&image1); + const std::vector& packed = PackedFromImage(image1); + const Plane& image2 = ImageFromPacked(packed, xsize, ysize); + JXL_EXPECT_OK(SamePixels(image1, image2, _)); +} + +TEST(ImageTest, TestPacked) { + TestPacked(1, 1); + TestPacked(7, 1); + TestPacked(1, 7); + + TestPacked(1, 1); + TestPacked(7, 1); + TestPacked(1, 7); + + TestPacked(1, 1); + TestPacked(7, 1); + TestPacked(1, 7); + + TestPacked(1, 1); + TestPacked(7, 1); + TestPacked(1, 7); +} + +// Ensure entire payload is readable/writable for various size/offset combos. +TEST(ImageTest, TestAllocator) { + Rng rng(0); + const size_t k32 = 32; + const size_t kAlign = CacheAligned::kAlignment; + for (size_t size : {k32 * 1, k32 * 2, k32 * 3, k32 * 4, k32 * 5, + CacheAligned::kAlias, 2 * CacheAligned::kAlias + 4}) { + for (size_t offset = 0; offset <= CacheAligned::kAlias; offset += kAlign) { + uint8_t* bytes = + static_cast(CacheAligned::Allocate(size, offset)); + JXL_CHECK(reinterpret_cast(bytes) % kAlign == 0); + // Ensure we can write/read the last byte. Use RNG to fool the compiler + // into thinking the write is necessary. + memset(bytes, 0, size); + bytes[size - 1] = 1; // greatest element + uint32_t pos = rng.UniformU(0, size - 1); // random but != greatest + JXL_CHECK(bytes[pos] < bytes[size - 1]); + + CacheAligned::Free(bytes); + } + } +} + +template +void TestFillImpl(Image3* img, const char* layout) { + FillImage(T(1), img); + for (size_t y = 0; y < img->ysize(); ++y) { + for (size_t c = 0; c < 3; ++c) { + T* JXL_RESTRICT row = img->PlaneRow(c, y); + for (size_t x = 0; x < img->xsize(); ++x) { + if (row[x] != T(1)) { + printf("Not 1 at c=%" PRIuS " %" PRIuS ", %" PRIuS " (%" PRIuS + " x %" PRIuS ") (%s)\n", + c, x, y, img->xsize(), img->ysize(), layout); + abort(); + } + row[x] = T(2); + } + } + } + + // Same for ZeroFillImage and swapped c/y loop ordering. + ZeroFillImage(img); + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < img->ysize(); ++y) { + T* JXL_RESTRICT row = img->PlaneRow(c, y); + for (size_t x = 0; x < img->xsize(); ++x) { + if (row[x] != T(0)) { + printf("Not 0 at c=%" PRIuS " %" PRIuS ", %" PRIuS " (%" PRIuS + " x %" PRIuS ") (%s)\n", + c, x, y, img->xsize(), img->ysize(), layout); + abort(); + } + row[x] = T(3); + } + } + } +} + +template +void TestFillT() { + for (uint32_t xsize : {0, 1, 15, 16, 31, 32}) { + for (uint32_t ysize : {0, 1, 15, 16, 31, 32}) { + Image3 image(xsize, ysize); + TestFillImpl(&image, "size ctor"); + + Image3 planar(Plane(xsize, ysize), Plane(xsize, ysize), + Plane(xsize, ysize)); + TestFillImpl(&planar, "planar"); + } + } +} + +// Ensure y/c/x and c/y/x loops visit pixels no more than once. +TEST(ImageTest, TestFill) { + TestFillT(); + TestFillT(); + TestFillT(); + TestFillT(); +} + +TEST(ImageTest, CopyImageToWithPaddingTest) { + Plane src(100, 61); + for (size_t y = 0; y < src.ysize(); y++) { + for (size_t x = 0; x < src.xsize(); x++) { + src.Row(y)[x] = x * 1000 + y; + } + } + Rect src_rect(10, 20, 30, 40); + EXPECT_TRUE(src_rect.IsInside(src)); + + Plane dst(60, 50); + FillImage(0u, &dst); + Rect dst_rect(20, 5, 30, 40); + EXPECT_TRUE(dst_rect.IsInside(dst)); + + CopyImageToWithPadding(src_rect, src, /*padding=*/2, dst_rect, &dst); + + // ysize is + 3 instead of + 4 because we are at the y image boundary on the + // source image. + Rect padded_dst_rect(20 - 2, 5 - 2, 30 + 4, 40 + 3); + for (size_t y = 0; y < dst.ysize(); y++) { + for (size_t x = 0; x < dst.xsize(); x++) { + if (Rect(x, y, 1, 1).IsInside(padded_dst_rect)) { + EXPECT_EQ((x - dst_rect.x0() + src_rect.x0()) * 1000 + + (y - dst_rect.y0() + src_rect.y0()), + dst.Row(y)[x]); + } else { + EXPECT_EQ(0u, dst.Row(y)[x]); + } + } + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/image_test_utils.h b/third-party/libjxl/libjxl/lib/jxl/image_test_utils.h new file mode 100644 index 0000000000..e7d72285e6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/image_test_utils.h @@ -0,0 +1,257 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_IMAGE_TEST_UTILS_H_ +#define LIB_JXL_IMAGE_TEST_UTILS_H_ + +#ifndef __STDC_FORMAT_MACROS +#define __STDC_FORMAT_MACROS +#endif + +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/image.h" + +namespace jxl { + +template +bool SamePixels(const Plane& image1, const Plane& image2, + std::stringstream& failures) { + const Rect rect(image1); + JXL_CHECK(SameSize(image1, image2)); + size_t mismatches = 0; + for (size_t y = rect.y0(); y < rect.ysize(); ++y) { + const T* const JXL_RESTRICT row1 = image1.Row(y); + const T* const JXL_RESTRICT row2 = image2.Row(y); + for (size_t x = rect.x0(); x < rect.xsize(); ++x) { + if (row1[x] != row2[x]) { + failures << "pixel mismatch" << x << ", " << y << ": " + << double(row1[x]) << " != " << double(row2[x]) << "\n"; + if (++mismatches > 4) { + return false; + } + } + } + } + return mismatches == 0; +} + +template +bool SamePixels(const Image3& image1, const Image3& image2, + std::stringstream& failures) { + JXL_CHECK(SameSize(image1, image2)); + for (size_t c = 0; c < 3; ++c) { + if (!SamePixels(image1.Plane(c), image2.Plane(c), failures)) { + return false; + } + } + return true; +} + +// Use for floating-point images with fairly large numbers; tolerates small +// absolute errors and/or small relative errors. +template +bool VerifyRelativeError(const Plane& expected, const Plane& actual, + const double threshold_l1, + const double threshold_relative, + std::stringstream& failures, const intptr_t border = 0, + const size_t c = 0) { + JXL_CHECK(SameSize(expected, actual)); + const intptr_t xsize = expected.xsize(); + const intptr_t ysize = expected.ysize(); + + // Max over current scanline to give a better idea whether there are + // systematic errors or just one outlier. Invalid if negative. + double max_l1 = -1; + double max_relative = -1; + bool any_bad = false; + for (intptr_t y = border; y < ysize - border; ++y) { + const T* const JXL_RESTRICT row_expected = expected.Row(y); + const T* const JXL_RESTRICT row_actual = actual.Row(y); + for (intptr_t x = border; x < xsize - border; ++x) { + const double l1 = std::abs(row_expected[x] - row_actual[x]); + + // Cannot compute relative, only check/update L1. + if (std::abs(row_expected[x]) < 1E-10) { + if (l1 > threshold_l1) { + any_bad = true; + max_l1 = std::max(max_l1, l1); + } + } else { + const double relative = l1 / std::abs(double(row_expected[x])); + if (l1 > threshold_l1 && relative > threshold_relative) { + // Fails both tolerances => will exit below, update max_*. + any_bad = true; + max_l1 = std::max(max_l1, l1); + max_relative = std::max(max_relative, relative); + } + } + } + } + if (!any_bad) { + return true; + } + // Never had a valid relative value, don't print it. + if (max_relative < 0) { + fprintf(stderr, "c=%" PRIu64 ": max +/- %E exceeds +/- %.2E\n", + static_cast(c), max_l1, threshold_l1); + } else { + fprintf(stderr, + "c=%" PRIu64 ": max +/- %E, x %E exceeds +/- %.2E, x %.2E\n", + static_cast(c), max_l1, max_relative, threshold_l1, + threshold_relative); + } + // Dump the expected image and actual image if the region is small enough. + const intptr_t kMaxTestDumpSize = 16; + if (xsize <= kMaxTestDumpSize + 2 * border && + ysize <= kMaxTestDumpSize + 2 * border) { + fprintf(stderr, "Expected image:\n"); + for (intptr_t y = border; y < ysize - border; ++y) { + const T* const JXL_RESTRICT row_expected = expected.Row(y); + for (intptr_t x = border; x < xsize - border; ++x) { + fprintf(stderr, "%10lf ", static_cast(row_expected[x])); + } + fprintf(stderr, "\n"); + } + + fprintf(stderr, "Actual image:\n"); + for (intptr_t y = border; y < ysize - border; ++y) { + const T* const JXL_RESTRICT row_expected = expected.Row(y); + const T* const JXL_RESTRICT row_actual = actual.Row(y); + for (intptr_t x = border; x < xsize - border; ++x) { + const double l1 = std::abs(row_expected[x] - row_actual[x]); + + bool bad = l1 > threshold_l1; + if (row_expected[x] > 1E-10) { + const double relative = l1 / std::abs(double(row_expected[x])); + bad &= relative > threshold_relative; + } + if (bad) { + fprintf(stderr, "%10lf ", static_cast(row_actual[x])); + } else { + fprintf(stderr, "%10s ", "=="); + } + } + fprintf(stderr, "\n"); + } + } + + // Find first failing x for further debugging. + for (intptr_t y = border; y < ysize - border; ++y) { + const T* const JXL_RESTRICT row_expected = expected.Row(y); + const T* const JXL_RESTRICT row_actual = actual.Row(y); + + for (intptr_t x = border; x < xsize - border; ++x) { + const double l1 = std::abs(row_expected[x] - row_actual[x]); + + bool bad = l1 > threshold_l1; + if (row_expected[x] > 1E-10) { + const double relative = l1 / std::abs(double(row_expected[x])); + bad &= relative > threshold_relative; + } + if (bad) { + failures << x << ", " << y << " (" << expected.xsize() << " x " + << expected.ysize() << ") expected " + << static_cast(row_expected[x]) << " actual " + << static_cast(row_actual[x]); + return false; + } + } + } + return false; +} + +template +bool VerifyRelativeError(const Image3& expected, const Image3& actual, + const float threshold_l1, + const float threshold_relative, + std::stringstream& failures, + const intptr_t border = 0) { + for (size_t c = 0; c < 3; ++c) { + bool ok = + VerifyRelativeError(expected.Plane(c), actual.Plane(c), threshold_l1, + threshold_relative, failures, border, c); + if (!ok) { + return false; + } + } + return true; +} + +template +void GenerateImage(Rng& rng, Plane* image, U begin, U end) { + for (size_t y = 0; y < image->ysize(); ++y) { + T* const JXL_RESTRICT row = image->Row(y); + for (size_t x = 0; x < image->xsize(); ++x) { + if (std::is_same::value || std::is_same::value) { + row[x] = rng.UniformF(begin, end); + } else if (std::is_signed::value) { + row[x] = rng.UniformI(begin, end); + } else { + row[x] = rng.UniformU(begin, end); + } + } + } +} + +template +void RandomFillImage(Plane* image, const T begin, const T end, + const int seed = 129) { + Rng rng(seed); + GenerateImage(rng, image, begin, end); +} + +template +typename std::enable_if::value>::type RandomFillImage( + Plane* image) { + Rng rng(129); + GenerateImage(rng, image, int64_t(0), + int64_t(std::numeric_limits::max()) + 1); +} + +JXL_INLINE void RandomFillImage(Plane* image) { + Rng rng(129); + GenerateImage(rng, image, 0.0f, std::numeric_limits::max()); +} + +template +void GenerateImage(Rng& rng, Image3* image, U begin, U end) { + for (size_t c = 0; c < 3; ++c) { + GenerateImage(rng, &image->Plane(c), begin, end); + } +} + +template +typename std::enable_if::value>::type RandomFillImage( + Image3* image) { + Rng rng(129); + GenerateImage(rng, image, int64_t(0), + int64_t(std::numeric_limits::max()) + 1); +} + +JXL_INLINE void RandomFillImage(Image3F* image) { + Rng rng(129); + GenerateImage(rng, image, 0.0f, std::numeric_limits::max()); +} + +template +void RandomFillImage(Image3* image, const U begin, const U end, + const int seed = 129) { + Rng rng(seed); + GenerateImage(rng, image, begin, end); +} + +} // namespace jxl + +#endif // LIB_JXL_IMAGE_TEST_UTILS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/inverse_mtf-inl.h b/third-party/libjxl/libjxl/lib/jxl/inverse_mtf-inl.h new file mode 100644 index 0000000000..fcb01d7396 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/inverse_mtf-inl.h @@ -0,0 +1,90 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// SIMDified inverse-move-to-front transform. + +#if defined(LIB_JXL_INVERSE_MTF_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_INVERSE_MTF_INL_H_ +#undef LIB_JXL_INVERSE_MTF_INL_H_ +#else +#define LIB_JXL_INVERSE_MTF_INL_H_ +#endif + +#include + +#include "lib/jxl/sanitizers.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::FirstN; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::Load; +using hwy::HWY_NAMESPACE::LoadU; +using hwy::HWY_NAMESPACE::StoreU; + +inline void MoveToFront(uint8_t* v, uint8_t index) { + uint8_t value = v[index]; + uint8_t i = index; + if (i < 4) { + for (; i; --i) v[i] = v[i - 1]; + } else { + const HWY_CAPPED(uint8_t, 64) d; + int tail = i & (Lanes(d) - 1); + if (tail) { + i -= tail; + const auto vec = Load(d, v + i); + const auto prev = LoadU(d, v + i + 1); + StoreU(IfThenElse(FirstN(d, tail), vec, prev), d, v + i + 1); + } + while (i) { + i -= Lanes(d); + const auto vec = Load(d, v + i); + StoreU(vec, d, v + i + 1); + } + } + v[0] = value; +} + +inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) { + HWY_ALIGN uint8_t mtf[256 + 64]; + int i; + for (i = 0; i < 256; ++i) { + mtf[i] = static_cast(i); + } +#if JXL_MEMORY_SANITIZER + const HWY_CAPPED(uint8_t, 64) d; + for (size_t j = 0; j < Lanes(d); ++j) { + mtf[256 + j] = 0; + } +#endif // JXL_MEMORY_SANITIZER + for (i = 0; i < v_len; ++i) { + uint8_t index = v[i]; + v[i] = mtf[index]; + if (index) MoveToFront(mtf, index); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_INVERSE_MTF_INL_H_ + +#if HWY_ONCE +#ifndef INVERSE_MTF_ONCE +#define INVERSE_MTF_ONCE + +namespace jxl { +inline void InverseMoveToFrontTransform(uint8_t* v, int v_len) { + return HWY_STATIC_DISPATCH(InverseMoveToFrontTransform)(v, v_len); +} +} // namespace jxl + +#endif // INVERSE_MTF_ONCE +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.cc new file mode 100644 index 0000000000..db49a1c215 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.cc @@ -0,0 +1,145 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/jpeg/dec_jpeg_data.h" + +#include + +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { +namespace jpeg { +Status DecodeJPEGData(Span encoded, JPEGData* jpeg_data) { + Status ret = true; + const uint8_t* in = encoded.data(); + size_t available_in = encoded.size(); + { + BitReader br(encoded); + BitReaderScopedCloser br_closer(&br, &ret); + JXL_RETURN_IF_ERROR(Bundle::Read(&br, jpeg_data)); + JXL_RETURN_IF_ERROR(br.JumpToByteBoundary()); + in += br.TotalBitsConsumed() / 8; + available_in -= br.TotalBitsConsumed() / 8; + } + JXL_RETURN_IF_ERROR(ret); + + BrotliDecoderState* brotli_dec = + BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + + struct BrotliDecDeleter { + BrotliDecoderState* brotli_dec; + ~BrotliDecDeleter() { BrotliDecoderDestroyInstance(brotli_dec); } + } brotli_dec_deleter{brotli_dec}; + + BrotliDecoderResult result = + BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS; + + auto br_read = [&](std::vector& data) -> Status { + size_t available_out = data.size(); + uint8_t* out = data.data(); + while (available_out != 0) { + if (BrotliDecoderIsFinished(brotli_dec)) { + return JXL_FAILURE("Not enough decompressed output"); + } + uint8_t* next_out_before = out; + size_t avail_out_before = available_out; + msan::MemoryIsInitialized(in, available_in); + result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in, + &available_out, &out, nullptr); + if (result != + BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT && + result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) { + return JXL_FAILURE( + "Brotli decoding error: %s\n", + BrotliDecoderErrorString(BrotliDecoderGetErrorCode(brotli_dec))); + } + msan::UnpoisonMemory(next_out_before, avail_out_before - available_out); + } + return true; + }; + size_t num_icc = 0; + for (size_t i = 0; i < jpeg_data->app_data.size(); i++) { + auto& marker = jpeg_data->app_data[i]; + if (jpeg_data->app_marker_type[i] != AppMarkerType::kUnknown) { + // Set the size of the marker. + size_t size_minus_1 = marker.size() - 1; + marker[1] = size_minus_1 >> 8; + marker[2] = size_minus_1 & 0xFF; + if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) { + if (marker.size() < 17) { + return JXL_FAILURE("ICC markers must be at least 17 bytes"); + } + marker[0] = 0xE2; + memcpy(&marker[3], kIccProfileTag, sizeof kIccProfileTag); + marker[15] = ++num_icc; + } + } else { + JXL_RETURN_IF_ERROR(br_read(marker)); + if (marker[1] * 256u + marker[2] + 1u != marker.size()) { + return JXL_FAILURE("Incorrect marker size"); + } + } + } + for (size_t i = 0; i < jpeg_data->app_data.size(); i++) { + auto& marker = jpeg_data->app_data[i]; + if (jpeg_data->app_marker_type[i] == AppMarkerType::kICC) { + marker[16] = num_icc; + } + if (jpeg_data->app_marker_type[i] == AppMarkerType::kExif) { + marker[0] = 0xE1; + if (marker.size() < 3 + sizeof kExifTag) { + return JXL_FAILURE("Incorrect Exif marker size"); + } + memcpy(&marker[3], kExifTag, sizeof kExifTag); + } + if (jpeg_data->app_marker_type[i] == AppMarkerType::kXMP) { + marker[0] = 0xE1; + if (marker.size() < 3 + sizeof kXMPTag) { + return JXL_FAILURE("Incorrect XMP marker size"); + } + memcpy(&marker[3], kXMPTag, sizeof kXMPTag); + } + } + // TODO(eustas): actually inject ICC profile and check it fits perfectly. + for (size_t i = 0; i < jpeg_data->com_data.size(); i++) { + auto& marker = jpeg_data->com_data[i]; + JXL_RETURN_IF_ERROR(br_read(marker)); + if (marker[1] * 256u + marker[2] + 1u != marker.size()) { + return JXL_FAILURE("Incorrect marker size"); + } + } + for (size_t i = 0; i < jpeg_data->inter_marker_data.size(); i++) { + JXL_RETURN_IF_ERROR(br_read(jpeg_data->inter_marker_data[i])); + } + JXL_RETURN_IF_ERROR(br_read(jpeg_data->tail_data)); + + // Check if there is more decompressed output. + size_t available_out = 1; + uint64_t dummy; + uint8_t* next_out = reinterpret_cast(&dummy); + result = BrotliDecoderDecompressStream(brotli_dec, &available_in, &in, + &available_out, &next_out, nullptr); + if (available_out == 0 || + result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { + return JXL_FAILURE("Excess data in compressed stream"); + } + if (result == BrotliDecoderResult::BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) { + return JXL_FAILURE("Incomplete brotli-stream"); + } + if (!BrotliDecoderIsFinished(brotli_dec) || + result != BrotliDecoderResult::BROTLI_DECODER_RESULT_SUCCESS) { + return JXL_FAILURE("Corrupted brotli-stream"); + } + if (available_in != 0) { + return JXL_FAILURE("Unused data after brotli stream"); + } + + return true; +} +} // namespace jpeg +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.h new file mode 100644 index 0000000000..b9d50bf9f8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data.h @@ -0,0 +1,19 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_H_ +#define LIB_JXL_JPEG_DEC_JPEG_DATA_H_ + +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { +Status DecodeJPEGData(Span encoded, JPEGData* jpeg_data); +} +} // namespace jxl + +#endif // LIB_JXL_JPEG_DEC_JPEG_DATA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.cc new file mode 100644 index 0000000000..1714c2b4fd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.cc @@ -0,0 +1,1042 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/jpeg/dec_jpeg_data_writer.h" + +#include +#include /* for memset, memcpy */ + +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/common.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h" +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { + +namespace { + +enum struct SerializationStatus { + NEEDS_MORE_INPUT, + NEEDS_MORE_OUTPUT, + ERROR, + DONE +}; + +const int kJpegPrecision = 8; + +// JpegBitWriter: buffer size +const size_t kJpegBitWriterChunkSize = 16384; + +// DCTCodingState: maximum number of correction bits to buffer +const int kJPEGMaxCorrectionBits = 1u << 16; + +// Returns non-zero if and only if x has a zero byte, i.e. one of +// x & 0xff, x & 0xff00, ..., x & 0xff00000000000000 is zero. +static JXL_INLINE uint64_t HasZeroByte(uint64_t x) { + return (x - 0x0101010101010101ULL) & ~x & 0x8080808080808080ULL; +} + +void JpegBitWriterInit(JpegBitWriter* bw, + std::deque* output_queue) { + bw->output = output_queue; + bw->chunk = OutputChunk(kJpegBitWriterChunkSize); + bw->pos = 0; + bw->put_buffer = 0; + bw->put_bits = 64; + bw->healthy = true; + bw->data = bw->chunk.buffer->data(); +} + +static JXL_NOINLINE void SwapBuffer(JpegBitWriter* bw) { + bw->chunk.len = bw->pos; + bw->output->emplace_back(std::move(bw->chunk)); + bw->chunk = OutputChunk(kJpegBitWriterChunkSize); + bw->data = bw->chunk.buffer->data(); + bw->pos = 0; +} + +static JXL_INLINE void Reserve(JpegBitWriter* bw, size_t n_bytes) { + if (JXL_UNLIKELY((bw->pos + n_bytes) > kJpegBitWriterChunkSize)) { + SwapBuffer(bw); + } +} + +/** + * Writes the given byte to the output, writes an extra zero if byte is 0xFF. + * + * This method is "careless" - caller must make sure that there is enough + * space in the output buffer. Emits up to 2 bytes to buffer. + */ +static JXL_INLINE void EmitByte(JpegBitWriter* bw, int byte) { + bw->data[bw->pos] = byte; + bw->data[bw->pos + 1] = 0; + bw->pos += (byte != 0xFF ? 1 : 2); +} + +static JXL_INLINE void DischargeBitBuffer(JpegBitWriter* bw, int nbits, + uint64_t bits) { + // At this point we are ready to emit the put_buffer to the output. + // The JPEG format requires that after every 0xff byte in the entropy + // coded section, there is a zero byte, therefore we first check if any of + // the 8 bytes of put_buffer is 0xFF. + bw->put_buffer |= (bits >> -bw->put_bits); + if (JXL_UNLIKELY(HasZeroByte(~bw->put_buffer))) { + // We have a 0xFF byte somewhere, examine each byte and append a zero + // byte if necessary. + EmitByte(bw, (bw->put_buffer >> 56) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 48) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 40) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 32) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 24) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 16) & 0xFF); + EmitByte(bw, (bw->put_buffer >> 8) & 0xFF); + EmitByte(bw, (bw->put_buffer) & 0xFF); + } else { + // We don't have any 0xFF bytes, output all 8 bytes without checking. + StoreBE64(bw->put_buffer, bw->data + bw->pos); + bw->pos += 8; + } + + bw->put_bits += 64; + bw->put_buffer = bits << bw->put_bits; +} + +static JXL_INLINE void WriteBits(JpegBitWriter* bw, int nbits, uint64_t bits) { + JXL_DASSERT(nbits > 0); + bw->put_bits -= nbits; + if (JXL_UNLIKELY(bw->put_bits < 0)) { + if (JXL_UNLIKELY(nbits > 64)) { + bw->put_bits += nbits; + bw->healthy = false; + } else { + DischargeBitBuffer(bw, nbits, bits); + } + } else { + bw->put_buffer |= (bits << bw->put_bits); + } +} + +void EmitMarker(JpegBitWriter* bw, int marker) { + Reserve(bw, 2); + JXL_DASSERT(marker != 0xFF); + bw->data[bw->pos++] = 0xFF; + bw->data[bw->pos++] = marker; +} + +bool JumpToByteBoundary(JpegBitWriter* bw, const uint8_t** pad_bits, + const uint8_t* pad_bits_end) { + size_t n_bits = bw->put_bits & 7u; + uint8_t pad_pattern; + if (*pad_bits == nullptr) { + pad_pattern = (1u << n_bits) - 1; + } else { + pad_pattern = 0; + const uint8_t* src = *pad_bits; + // TODO(eustas): bitwise reading looks insanely ineffective... + while (n_bits--) { + pad_pattern <<= 1; + if (src >= pad_bits_end) return false; + // TODO(eustas): DCHECK *src == {0, 1} + pad_pattern |= !!*(src++); + } + *pad_bits = src; + } + + Reserve(bw, 16); + + while (bw->put_bits <= 56) { + int c = (bw->put_buffer >> 56) & 0xFF; + EmitByte(bw, c); + bw->put_buffer <<= 8; + bw->put_bits += 8; + } + if (bw->put_bits < 64) { + int pad_mask = 0xFFu >> (64 - bw->put_bits); + int c = ((bw->put_buffer >> 56) & ~pad_mask) | pad_pattern; + EmitByte(bw, c); + } + bw->put_buffer = 0; + bw->put_bits = 64; + + return true; +} + +void JpegBitWriterFinish(JpegBitWriter* bw) { + if (bw->pos == 0) return; + bw->chunk.len = bw->pos; + bw->output->emplace_back(std::move(bw->chunk)); + bw->chunk = OutputChunk(nullptr, 0); + bw->data = nullptr; + bw->pos = 0; +} + +void DCTCodingStateInit(DCTCodingState* s) { + s->eob_run_ = 0; + s->cur_ac_huff_ = nullptr; + s->refinement_bits_.clear(); + s->refinement_bits_.reserve(kJPEGMaxCorrectionBits); +} + +static JXL_INLINE void WriteSymbol(int symbol, HuffmanCodeTable* table, + JpegBitWriter* bw) { + WriteBits(bw, table->depth[symbol], table->code[symbol]); +} + +static JXL_INLINE void WriteSymbolBits(int symbol, HuffmanCodeTable* table, + JpegBitWriter* bw, int nbits, + uint64_t bits) { + WriteBits(bw, nbits + table->depth[symbol], + bits | (table->code[symbol] << nbits)); +} + +// Emit all buffered data to the bit stream using the given Huffman code and +// bit writer. +static JXL_INLINE void Flush(DCTCodingState* s, JpegBitWriter* bw) { + if (s->eob_run_ > 0) { + int nbits = FloorLog2Nonzero(s->eob_run_); + int symbol = nbits << 4u; + WriteSymbol(symbol, s->cur_ac_huff_, bw); + if (nbits > 0) { + WriteBits(bw, nbits, s->eob_run_ & ((1 << nbits) - 1)); + } + s->eob_run_ = 0; + } + for (size_t i = 0; i < s->refinement_bits_.size(); ++i) { + WriteBits(bw, 1, s->refinement_bits_[i]); + } + s->refinement_bits_.clear(); +} + +// Buffer some more data at the end-of-band (the last non-zero or newly +// non-zero coefficient within the [Ss, Se] spectral band). +static JXL_INLINE void BufferEndOfBand(DCTCodingState* s, + HuffmanCodeTable* ac_huff, + const std::vector* new_bits, + JpegBitWriter* bw) { + if (s->eob_run_ == 0) { + s->cur_ac_huff_ = ac_huff; + } + ++s->eob_run_; + if (new_bits) { + s->refinement_bits_.insert(s->refinement_bits_.end(), new_bits->begin(), + new_bits->end()); + } + if (s->eob_run_ == 0x7FFF || + s->refinement_bits_.size() > kJPEGMaxCorrectionBits - kDCTBlockSize + 1) { + Flush(s, bw); + } +} + +bool BuildHuffmanCodeTable(const JPEGHuffmanCode& huff, + HuffmanCodeTable* table) { + int huff_code[kJpegHuffmanAlphabetSize]; + // +1 for a sentinel element. + uint32_t huff_size[kJpegHuffmanAlphabetSize + 1]; + int p = 0; + for (size_t l = 1; l <= kJpegHuffmanMaxBitLength; ++l) { + int i = huff.counts[l]; + if (p + i > kJpegHuffmanAlphabetSize + 1) { + return false; + } + while (i--) huff_size[p++] = l; + } + + if (p == 0) { + return true; + } + + // Reuse sentinel element. + int last_p = p - 1; + huff_size[last_p] = 0; + + int code = 0; + uint32_t si = huff_size[0]; + p = 0; + while (huff_size[p]) { + while ((huff_size[p]) == si) { + huff_code[p++] = code; + code++; + } + code <<= 1; + si++; + } + for (p = 0; p < last_p; p++) { + int i = huff.values[p]; + table->depth[i] = huff_size[p]; + table->code[i] = huff_code[p]; + } + return true; +} + +bool EncodeSOI(SerializationState* state) { + state->output_queue.push_back(OutputChunk({0xFF, 0xD8})); + return true; +} + +bool EncodeEOI(const JPEGData& jpg, SerializationState* state) { + state->output_queue.push_back(OutputChunk({0xFF, 0xD9})); + state->output_queue.emplace_back(jpg.tail_data); + return true; +} + +bool EncodeSOF(const JPEGData& jpg, uint8_t marker, SerializationState* state) { + if (marker <= 0xC2) state->is_progressive = (marker == 0xC2); + + const size_t n_comps = jpg.components.size(); + const size_t marker_len = 8 + 3 * n_comps; + state->output_queue.emplace_back(marker_len + 2); + uint8_t* data = state->output_queue.back().buffer->data(); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = marker; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + data[pos++] = kJpegPrecision; + data[pos++] = jpg.height >> 8u; + data[pos++] = jpg.height & 0xFFu; + data[pos++] = jpg.width >> 8u; + data[pos++] = jpg.width & 0xFFu; + data[pos++] = n_comps; + for (size_t i = 0; i < n_comps; ++i) { + data[pos++] = jpg.components[i].id; + data[pos++] = ((jpg.components[i].h_samp_factor << 4u) | + (jpg.components[i].v_samp_factor)); + const size_t quant_idx = jpg.components[i].quant_idx; + if (quant_idx >= jpg.quant.size()) return false; + data[pos++] = jpg.quant[quant_idx].index; + } + return true; +} + +bool EncodeSOS(const JPEGData& jpg, const JPEGScanInfo& scan_info, + SerializationState* state) { + const size_t n_scans = scan_info.num_components; + const size_t marker_len = 6 + 2 * n_scans; + state->output_queue.emplace_back(marker_len + 2); + uint8_t* data = state->output_queue.back().buffer->data(); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xDA; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + data[pos++] = n_scans; + for (size_t i = 0; i < n_scans; ++i) { + const JPEGComponentScanInfo& si = scan_info.components[i]; + if (si.comp_idx >= jpg.components.size()) return false; + data[pos++] = jpg.components[si.comp_idx].id; + data[pos++] = (si.dc_tbl_idx << 4u) + si.ac_tbl_idx; + } + data[pos++] = scan_info.Ss; + data[pos++] = scan_info.Se; + data[pos++] = ((scan_info.Ah << 4u) | (scan_info.Al)); + return true; +} + +bool EncodeDHT(const JPEGData& jpg, SerializationState* state) { + const std::vector& huffman_code = jpg.huffman_code; + + size_t marker_len = 2; + for (size_t i = state->dht_index; i < huffman_code.size(); ++i) { + const JPEGHuffmanCode& huff = huffman_code[i]; + marker_len += kJpegHuffmanMaxBitLength; + for (size_t j = 0; j < huff.counts.size(); ++j) { + marker_len += huff.counts[j]; + } + if (huff.is_last) break; + } + state->output_queue.emplace_back(marker_len + 2); + uint8_t* data = state->output_queue.back().buffer->data(); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xC4; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + while (true) { + const size_t huffman_code_index = state->dht_index++; + if (huffman_code_index >= huffman_code.size()) { + return false; + } + const JPEGHuffmanCode& huff = huffman_code[huffman_code_index]; + size_t index = huff.slot_id; + HuffmanCodeTable* huff_table; + if (index & 0x10) { + index -= 0x10; + huff_table = &state->ac_huff_table[index]; + } else { + huff_table = &state->dc_huff_table[index]; + } + // TODO(eustas): cache + huff_table->InitDepths(127); + if (!BuildHuffmanCodeTable(huff, huff_table)) { + return false; + } + huff_table->initialized = true; + size_t total_count = 0; + size_t max_length = 0; + for (size_t i = 0; i < huff.counts.size(); ++i) { + if (huff.counts[i] != 0) { + max_length = i; + } + total_count += huff.counts[i]; + } + --total_count; + data[pos++] = huff.slot_id; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + data[pos++] = (i == max_length ? huff.counts[i] - 1 : huff.counts[i]); + } + for (size_t i = 0; i < total_count; ++i) { + data[pos++] = huff.values[i]; + } + if (huff.is_last) break; + } + return true; +} + +bool EncodeDQT(const JPEGData& jpg, SerializationState* state) { + int marker_len = 2; + for (size_t i = state->dqt_index; i < jpg.quant.size(); ++i) { + const JPEGQuantTable& table = jpg.quant[i]; + marker_len += 1 + (table.precision ? 2 : 1) * kDCTBlockSize; + if (table.is_last) break; + } + state->output_queue.emplace_back(marker_len + 2); + uint8_t* data = state->output_queue.back().buffer->data(); + size_t pos = 0; + data[pos++] = 0xFF; + data[pos++] = 0xDB; + data[pos++] = marker_len >> 8u; + data[pos++] = marker_len & 0xFFu; + while (true) { + const size_t idx = state->dqt_index++; + if (idx >= jpg.quant.size()) { + return false; // corrupt input + } + const JPEGQuantTable& table = jpg.quant[idx]; + data[pos++] = (table.precision << 4u) + table.index; + for (size_t i = 0; i < kDCTBlockSize; ++i) { + int val_idx = kJPEGNaturalOrder[i]; + int val = table.values[val_idx]; + if (table.precision) { + data[pos++] = val >> 8u; + } + data[pos++] = val & 0xFFu; + } + if (table.is_last) break; + } + return true; +} + +bool EncodeDRI(const JPEGData& jpg, SerializationState* state) { + state->seen_dri_marker = true; + OutputChunk dri_marker = {0xFF, + 0xDD, + 0, + 4, + static_cast(jpg.restart_interval >> 8), + static_cast(jpg.restart_interval & 0xFF)}; + state->output_queue.push_back(std::move(dri_marker)); + return true; +} + +bool EncodeRestart(uint8_t marker, SerializationState* state) { + state->output_queue.push_back(OutputChunk({0xFF, marker})); + return true; +} + +bool EncodeAPP(const JPEGData& jpg, uint8_t marker, SerializationState* state) { + // TODO(eustas): check that marker corresponds to payload? + (void)marker; + + size_t app_index = state->app_index++; + if (app_index >= jpg.app_data.size()) return false; + state->output_queue.push_back(OutputChunk({0xFF})); + state->output_queue.emplace_back(jpg.app_data[app_index]); + return true; +} + +bool EncodeCOM(const JPEGData& jpg, SerializationState* state) { + size_t com_index = state->com_index++; + if (com_index >= jpg.com_data.size()) return false; + state->output_queue.push_back(OutputChunk({0xFF})); + state->output_queue.emplace_back(jpg.com_data[com_index]); + return true; +} + +bool EncodeInterMarkerData(const JPEGData& jpg, SerializationState* state) { + size_t index = state->data_index++; + if (index >= jpg.inter_marker_data.size()) return false; + state->output_queue.emplace_back(jpg.inter_marker_data[index]); + return true; +} + +bool EncodeDCTBlockSequential(const coeff_t* coeffs, HuffmanCodeTable* dc_huff, + HuffmanCodeTable* ac_huff, int num_zero_runs, + coeff_t* last_dc_coeff, JpegBitWriter* bw) { + coeff_t temp2; + coeff_t temp; + coeff_t litmus = 0; + temp2 = coeffs[0]; + temp = temp2 - *last_dc_coeff; + *last_dc_coeff = temp2; + temp2 = temp >> (8 * sizeof(coeff_t) - 1); + temp += temp2; + temp2 ^= temp; + + int dc_nbits = (temp2 == 0) ? 0 : (FloorLog2Nonzero(temp2) + 1); + WriteSymbol(dc_nbits, dc_huff, bw); +#if false + // If the input is corrupt, this could be triggered. Checking is + // costly though, so it makes more sense to avoid this branch. + // (producing a corrupt JPEG when the input is corrupt, instead + // of catching it and returning error) + if (dc_nbits >= 12) return false; +#endif + if (dc_nbits) { + WriteBits(bw, dc_nbits, temp & ((1u << dc_nbits) - 1)); + } + int16_t r = 0; + + for (size_t i = 1; i < 64; i++) { + if ((temp = coeffs[kJPEGNaturalOrder[i]]) == 0) { + r++; + } else { + temp2 = temp >> (8 * sizeof(coeff_t) - 1); + temp += temp2; + temp2 ^= temp; + if (JXL_UNLIKELY(r > 15)) { + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + if (r > 15) { + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + } + if (r > 15) { + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + } + } + litmus |= temp2; + int ac_nbits = + FloorLog2Nonzero(static_cast(temp2)) + 1; + int symbol = (r << 4u) + ac_nbits; + WriteSymbolBits(symbol, ac_huff, bw, ac_nbits, + temp & ((1 << ac_nbits) - 1)); + r = 0; + } + } + + for (int i = 0; i < num_zero_runs; ++i) { + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + } + if (r > 0) { + WriteSymbol(0, ac_huff, bw); + } + return (litmus >= 0); +} + +bool EncodeDCTBlockProgressive(const coeff_t* coeffs, HuffmanCodeTable* dc_huff, + HuffmanCodeTable* ac_huff, int Ss, int Se, + int Al, int num_zero_runs, + DCTCodingState* coding_state, + coeff_t* last_dc_coeff, JpegBitWriter* bw) { + bool eob_run_allowed = Ss > 0; + coeff_t temp2; + coeff_t temp; + if (Ss == 0) { + temp2 = coeffs[0] >> Al; + temp = temp2 - *last_dc_coeff; + *last_dc_coeff = temp2; + temp2 = temp; + if (temp < 0) { + temp = -temp; + if (temp < 0) return false; + temp2--; + } + int nbits = (temp == 0) ? 0 : (FloorLog2Nonzero(temp) + 1); + WriteSymbol(nbits, dc_huff, bw); + if (nbits) { + WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1)); + } + ++Ss; + } + if (Ss > Se) { + return true; + } + int r = 0; + for (int k = Ss; k <= Se; ++k) { + if ((temp = coeffs[kJPEGNaturalOrder[k]]) == 0) { + r++; + continue; + } + if (temp < 0) { + temp = -temp; + if (temp < 0) return false; + temp >>= Al; + temp2 = ~temp; + } else { + temp >>= Al; + temp2 = temp; + } + if (temp == 0) { + r++; + continue; + } + Flush(coding_state, bw); + while (r > 15) { + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + } + int nbits = FloorLog2Nonzero(temp) + 1; + int symbol = (r << 4u) + nbits; + WriteSymbol(symbol, ac_huff, bw); + WriteBits(bw, nbits, temp2 & ((1 << nbits) - 1)); + r = 0; + } + if (num_zero_runs > 0) { + Flush(coding_state, bw); + for (int i = 0; i < num_zero_runs; ++i) { + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + } + } + if (r > 0) { + BufferEndOfBand(coding_state, ac_huff, nullptr, bw); + if (!eob_run_allowed) { + Flush(coding_state, bw); + } + } + return true; +} + +bool EncodeRefinementBits(const coeff_t* coeffs, HuffmanCodeTable* ac_huff, + int Ss, int Se, int Al, DCTCodingState* coding_state, + JpegBitWriter* bw) { + bool eob_run_allowed = Ss > 0; + if (Ss == 0) { + // Emit next bit of DC component. + WriteBits(bw, 1, (coeffs[0] >> Al) & 1); + ++Ss; + } + if (Ss > Se) { + return true; + } + int abs_values[kDCTBlockSize]; + int eob = 0; + for (int k = Ss; k <= Se; k++) { + const coeff_t abs_val = std::abs(coeffs[kJPEGNaturalOrder[k]]); + abs_values[k] = abs_val >> Al; + if (abs_values[k] == 1) { + eob = k; + } + } + int r = 0; + std::vector refinement_bits; + refinement_bits.reserve(kDCTBlockSize); + for (int k = Ss; k <= Se; k++) { + if (abs_values[k] == 0) { + r++; + continue; + } + while (r > 15 && k <= eob) { + Flush(coding_state, bw); + WriteSymbol(0xf0, ac_huff, bw); + r -= 16; + for (int bit : refinement_bits) { + WriteBits(bw, 1, bit); + } + refinement_bits.clear(); + } + if (abs_values[k] > 1) { + refinement_bits.push_back(abs_values[k] & 1u); + continue; + } + Flush(coding_state, bw); + int symbol = (r << 4u) + 1; + int new_non_zero_bit = (coeffs[kJPEGNaturalOrder[k]] < 0) ? 0 : 1; + WriteSymbol(symbol, ac_huff, bw); + WriteBits(bw, 1, new_non_zero_bit); + for (int bit : refinement_bits) { + WriteBits(bw, 1, bit); + } + refinement_bits.clear(); + r = 0; + } + if (r > 0 || !refinement_bits.empty()) { + BufferEndOfBand(coding_state, ac_huff, &refinement_bits, bw); + if (!eob_run_allowed) { + Flush(coding_state, bw); + } + } + return true; +} + +size_t NumHistograms(const JPEGData& jpg) { + size_t num = 0; + for (const auto& si : jpg.scan_info) { + num += si.num_components; + } + return num; +} + +size_t HistogramIndex(const JPEGData& jpg, size_t scan_index, + size_t component_index) { + size_t idx = 0; + for (size_t i = 0; i < scan_index; ++i) { + idx += jpg.scan_info[i].num_components; + } + return idx + component_index; +} + +template +SerializationStatus JXL_NOINLINE DoEncodeScan(const JPEGData& jpg, + SerializationState* state) { + const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index]; + EncodeScanState& ss = state->scan_state; + + const int restart_interval = + state->seen_dri_marker ? jpg.restart_interval : 0; + + const auto get_next_extra_zero_run_index = [&ss, &scan_info]() -> int { + if (ss.extra_zero_runs_pos < scan_info.extra_zero_runs.size()) { + return scan_info.extra_zero_runs[ss.extra_zero_runs_pos].block_idx; + } else { + return -1; + } + }; + + const auto get_next_reset_point = [&ss, &scan_info]() -> int { + if (ss.next_reset_point_pos < scan_info.reset_points.size()) { + return scan_info.reset_points[ss.next_reset_point_pos++]; + } else { + return -1; + } + }; + + if (ss.stage == EncodeScanState::HEAD) { + if (!EncodeSOS(jpg, scan_info, state)) return SerializationStatus::ERROR; + JpegBitWriterInit(&ss.bw, &state->output_queue); + DCTCodingStateInit(&ss.coding_state); + ss.restarts_to_go = restart_interval; + ss.next_restart_marker = 0; + ss.block_scan_index = 0; + ss.extra_zero_runs_pos = 0; + ss.next_extra_zero_run_index = get_next_extra_zero_run_index(); + ss.next_reset_point_pos = 0; + ss.next_reset_point = get_next_reset_point(); + ss.mcu_y = 0; + memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff)); + ss.stage = EncodeScanState::BODY; + } + JpegBitWriter* bw = &ss.bw; + DCTCodingState* coding_state = &ss.coding_state; + + JXL_DASSERT(ss.stage == EncodeScanState::BODY); + + // "Non-interleaved" means color data comes in separate scans, in other words + // each scan can contain only one color component. + const bool is_interleaved = (scan_info.num_components > 1); + int MCUs_per_row = 0; + int MCU_rows = 0; + jpg.CalculateMcuSize(scan_info, &MCUs_per_row, &MCU_rows); + const bool is_progressive = state->is_progressive; + const int Al = is_progressive ? scan_info.Al : 0; + const int Ss = is_progressive ? scan_info.Ss : 0; + const int Se = is_progressive ? scan_info.Se : 63; + + // DC-only is defined by [0..0] spectral range. + const bool want_ac = ((Ss != 0) || (Se != 0)); + const bool want_dc = (Ss == 0); + // TODO: support streaming decoding again. + const bool complete_ac = true; + const bool has_ac = true; + if (want_ac && !has_ac) return SerializationStatus::NEEDS_MORE_INPUT; + + // |has_ac| implies |complete_dc| but not vice versa; for the sake of + // simplicity we pretend they are equal, because they are separated by just a + // few bytes of input. + const bool complete_dc = has_ac; + const bool complete = want_ac ? complete_ac : complete_dc; + // When "incomplete" |ac_dc| tracks information about current ("incomplete") + // band parsing progress. + + // FIXME: Is this always complete? + // const int last_mcu_y = + // complete ? MCU_rows : parsing_state.internal->ac_dc.next_mcu_y * + // v_group; + (void)complete; + const int last_mcu_y = complete ? MCU_rows : 0; + + for (; ss.mcu_y < last_mcu_y; ++ss.mcu_y) { + for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) { + // Possibly emit a restart marker. + if (restart_interval > 0 && ss.restarts_to_go == 0) { + Flush(coding_state, bw); + if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) { + return SerializationStatus::ERROR; + } + EmitMarker(bw, 0xD0 + ss.next_restart_marker); + ss.next_restart_marker += 1; + ss.next_restart_marker &= 0x7; + ss.restarts_to_go = restart_interval; + memset(ss.last_dc_coeff, 0, sizeof(ss.last_dc_coeff)); + } + + // Encode one MCU + for (size_t i = 0; i < scan_info.num_components; ++i) { + const JPEGComponentScanInfo& si = scan_info.components[i]; + const JPEGComponent& c = jpg.components[si.comp_idx]; + size_t dc_tbl_idx = si.dc_tbl_idx; + size_t ac_tbl_idx = si.ac_tbl_idx; + HuffmanCodeTable* dc_huff = &state->dc_huff_table[dc_tbl_idx]; + HuffmanCodeTable* ac_huff = &state->ac_huff_table[ac_tbl_idx]; + if (want_dc && !dc_huff->initialized) { + return SerializationStatus::ERROR; + } + if (want_ac && !ac_huff->initialized) { + return SerializationStatus::ERROR; + } + int n_blocks_y = is_interleaved ? c.v_samp_factor : 1; + int n_blocks_x = is_interleaved ? c.h_samp_factor : 1; + // compressed size per block cannot be more than 512 bytes per component + Reserve(bw, 512 * n_blocks_y * n_blocks_x); + for (int iy = 0; iy < n_blocks_y; ++iy) { + for (int ix = 0; ix < n_blocks_x; ++ix) { + int block_y = ss.mcu_y * n_blocks_y + iy; + int block_x = mcu_x * n_blocks_x + ix; + int block_idx = block_y * c.width_in_blocks + block_x; + if (ss.block_scan_index == ss.next_reset_point) { + Flush(coding_state, bw); + ss.next_reset_point = get_next_reset_point(); + } + int num_zero_runs = 0; + if (ss.block_scan_index == ss.next_extra_zero_run_index) { + num_zero_runs = scan_info.extra_zero_runs[ss.extra_zero_runs_pos] + .num_extra_zero_runs; + ++ss.extra_zero_runs_pos; + ss.next_extra_zero_run_index = get_next_extra_zero_run_index(); + } + const coeff_t* coeffs = &c.coeffs[block_idx << 6]; + bool ok; + if (kMode == 0) { + ok = EncodeDCTBlockSequential(coeffs, dc_huff, ac_huff, + num_zero_runs, + ss.last_dc_coeff + si.comp_idx, bw); + } else if (kMode == 1) { + ok = EncodeDCTBlockProgressive( + coeffs, dc_huff, ac_huff, Ss, Se, Al, num_zero_runs, + coding_state, ss.last_dc_coeff + si.comp_idx, bw); + } else { + ok = EncodeRefinementBits(coeffs, ac_huff, Ss, Se, Al, + coding_state, bw); + } + if (!ok) return SerializationStatus::ERROR; + ++ss.block_scan_index; + } + } + } + --ss.restarts_to_go; + } + } + if (ss.mcu_y < MCU_rows) { + if (!bw->healthy) return SerializationStatus::ERROR; + return SerializationStatus::NEEDS_MORE_INPUT; + } + Flush(coding_state, bw); + if (!JumpToByteBoundary(bw, &state->pad_bits, state->pad_bits_end)) { + return SerializationStatus::ERROR; + } + JpegBitWriterFinish(bw); + ss.stage = EncodeScanState::HEAD; + state->scan_index++; + if (!bw->healthy) return SerializationStatus::ERROR; + + return SerializationStatus::DONE; +} + +static SerializationStatus JXL_INLINE EncodeScan(const JPEGData& jpg, + SerializationState* state) { + const JPEGScanInfo& scan_info = jpg.scan_info[state->scan_index]; + const bool is_progressive = state->is_progressive; + const int Al = is_progressive ? scan_info.Al : 0; + const int Ah = is_progressive ? scan_info.Ah : 0; + const int Ss = is_progressive ? scan_info.Ss : 0; + const int Se = is_progressive ? scan_info.Se : 63; + const bool need_sequential = + !is_progressive || (Ah == 0 && Al == 0 && Ss == 0 && Se == 63); + if (need_sequential) { + return DoEncodeScan<0>(jpg, state); + } else if (Ah == 0) { + return DoEncodeScan<1>(jpg, state); + } else { + return DoEncodeScan<2>(jpg, state); + } +} + +SerializationStatus SerializeSection(uint8_t marker, SerializationState* state, + const JPEGData& jpg) { + const auto to_status = [](bool result) { + return result ? SerializationStatus::DONE : SerializationStatus::ERROR; + }; + // TODO(eustas): add and use marker enum + switch (marker) { + case 0xC0: + case 0xC1: + case 0xC2: + case 0xC9: + case 0xCA: + return to_status(EncodeSOF(jpg, marker, state)); + + case 0xC4: + return to_status(EncodeDHT(jpg, state)); + + case 0xD0: + case 0xD1: + case 0xD2: + case 0xD3: + case 0xD4: + case 0xD5: + case 0xD6: + case 0xD7: + return to_status(EncodeRestart(marker, state)); + + case 0xD9: + return to_status(EncodeEOI(jpg, state)); + + case 0xDA: + return EncodeScan(jpg, state); + + case 0xDB: + return to_status(EncodeDQT(jpg, state)); + + case 0xDD: + return to_status(EncodeDRI(jpg, state)); + + case 0xE0: + case 0xE1: + case 0xE2: + case 0xE3: + case 0xE4: + case 0xE5: + case 0xE6: + case 0xE7: + case 0xE8: + case 0xE9: + case 0xEA: + case 0xEB: + case 0xEC: + case 0xED: + case 0xEE: + case 0xEF: + return to_status(EncodeAPP(jpg, marker, state)); + + case 0xFE: + return to_status(EncodeCOM(jpg, state)); + + case 0xFF: + return to_status(EncodeInterMarkerData(jpg, state)); + + default: + return SerializationStatus::ERROR; + } +} + +// TODO(veluca): add streaming support again. +Status WriteJpegInternal(const JPEGData& jpg, const JPEGOutput& out, + SerializationState* ss) { + const auto maybe_push_output = [&]() -> Status { + if (ss->stage != SerializationState::STAGE_ERROR) { + while (!ss->output_queue.empty()) { + auto& chunk = ss->output_queue.front(); + size_t num_written = out(chunk.next, chunk.len); + if (num_written == 0 && chunk.len > 0) { + return StatusMessage(Status(StatusCode::kNotEnoughBytes), + "Failed to write output"); + } + chunk.len -= num_written; + if (chunk.len == 0) { + ss->output_queue.pop_front(); + } + } + } + return true; + }; + + while (true) { + switch (ss->stage) { + case SerializationState::STAGE_INIT: { + // Valid Brunsli requires, at least, 0xD9 marker. + // This might happen on corrupted stream, or on unconditioned JPEGData. + // TODO(eustas): check D9 in the only one and is the last one. + if (jpg.marker_order.empty()) { + ss->stage = SerializationState::STAGE_ERROR; + break; + } + ss->dc_huff_table.resize(kMaxHuffmanTables); + ss->ac_huff_table.resize(kMaxHuffmanTables); + if (jpg.has_zero_padding_bit) { + ss->pad_bits = jpg.padding_bits.data(); + ss->pad_bits_end = ss->pad_bits + jpg.padding_bits.size(); + } + + EncodeSOI(ss); + JXL_QUIET_RETURN_IF_ERROR(maybe_push_output()); + ss->stage = SerializationState::STAGE_SERIALIZE_SECTION; + break; + } + + case SerializationState::STAGE_SERIALIZE_SECTION: { + if (ss->section_index >= jpg.marker_order.size()) { + ss->stage = SerializationState::STAGE_DONE; + break; + } + uint8_t marker = jpg.marker_order[ss->section_index]; + SerializationStatus status = SerializeSection(marker, ss, jpg); + if (status == SerializationStatus::ERROR) { + JXL_WARNING("Failed to encode marker 0x%.2x", marker); + ss->stage = SerializationState::STAGE_ERROR; + break; + } + JXL_QUIET_RETURN_IF_ERROR(maybe_push_output()); + if (status == SerializationStatus::NEEDS_MORE_INPUT) { + return JXL_FAILURE("Incomplete serialization data"); + } else if (status != SerializationStatus::DONE) { + JXL_DASSERT(false); + ss->stage = SerializationState::STAGE_ERROR; + break; + } + ++ss->section_index; + break; + } + + case SerializationState::STAGE_DONE: + JXL_ASSERT(ss->output_queue.empty()); + if (ss->pad_bits != nullptr && ss->pad_bits != ss->pad_bits_end) { + return JXL_FAILURE("Invalid number of padding bits."); + } + return true; + + case SerializationState::STAGE_ERROR: + return JXL_FAILURE("JPEG serialization error"); + } + } +} + +} // namespace + +Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out) { + auto ss = jxl::make_unique(); + return WriteJpegInternal(jpg, out, ss.get()); +} + +} // namespace jpeg +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.h new file mode 100644 index 0000000000..c6f70ff8b1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_data_writer.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Functions for writing a JPEGData object into a jpeg byte stream. + +#ifndef LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_ +#define LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_ + +#include +#include + +#include + +#include "lib/jxl/jpeg/dec_jpeg_serialization_state.h" +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { + +// Function type used to write len bytes into buf. Returns the number of bytes +// written. +using JPEGOutput = std::function; + +Status WriteJpeg(const JPEGData& jpg, const JPEGOutput& out); + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_DEC_JPEG_DATA_WRITER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_output_chunk.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_output_chunk.h new file mode 100644 index 0000000000..e003c04952 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_output_chunk.h @@ -0,0 +1,72 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_ +#define LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_ + +#include +#include + +#include +#include +#include + +namespace jxl { +namespace jpeg { + +/** + * A chunk of output data. + * + * Data producer creates OutputChunks and adds them to the end output queue. + * Once control flow leaves the producer code, it is considered that chunk of + * data is final and can not be changed; to underline this fact |next| is a + * const-pointer. + * + * Data consumer removes OutputChunks from the beginning of the output queue. + * It is possible to consume OutputChunks partially, by updating |next| and + * |len|. + * + * There are 2 types of output chunks: + * - owning: actual data is stored in |buffer| field; producer fills data after + * the instance it created; it is legal to reduce |len| to show that not all + * the capacity of |buffer| is used + * - non-owning: represents the data stored (owned) somewhere else + */ +struct OutputChunk { + // Non-owning + template + explicit OutputChunk(Bytes& bytes) : len(bytes.size()) { + // Deal both with const qualifier and data type. + const void* src = bytes.data(); + next = reinterpret_cast(src); + } + + // Non-owning + OutputChunk(const uint8_t* data, size_t size) : next(data), len(size) {} + + // Owning + explicit OutputChunk(size_t size = 0) { + buffer.reset(new std::vector(size)); + next = buffer->data(); + len = size; + } + + // Owning + OutputChunk(std::initializer_list bytes) { + buffer.reset(new std::vector(bytes)); + next = buffer->data(); + len = bytes.size(); + } + + const uint8_t* next; + size_t len; + // TODO(veluca): consider removing the unique_ptr. + std::unique_ptr> buffer; +}; + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_DEC_JPEG_OUTPUT_CHUNK_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_serialization_state.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_serialization_state.h new file mode 100644 index 0000000000..4fca3ed643 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/dec_jpeg_serialization_state.h @@ -0,0 +1,99 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_ +#define LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_ + +#include +#include + +#include "lib/jxl/jpeg/dec_jpeg_output_chunk.h" +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { + +struct HuffmanCodeTable { + int8_t depth[256]; + uint16_t code[256]; + bool initialized = false; + void InitDepths(int value = 0) { + std::fill(std::begin(depth), std::end(depth), value); + } +}; + +// Handles the packing of bits into output bytes. +struct JpegBitWriter { + bool healthy; + std::deque* output; + OutputChunk chunk; + uint8_t* data; + size_t pos; + uint64_t put_buffer; + int put_bits; +}; + +// Holds data that is buffered between 8x8 blocks in progressive mode. +struct DCTCodingState { + // The run length of end-of-band symbols in a progressive scan. + int eob_run_; + // The huffman table to be used when flushing the state. + HuffmanCodeTable* cur_ac_huff_; + // The sequence of currently buffered refinement bits for a successive + // approximation scan (one where Ah > 0). + std::vector refinement_bits_; +}; + +struct EncodeScanState { + enum Stage { HEAD, BODY }; + + Stage stage = HEAD; + + int mcu_y; + JpegBitWriter bw; + coeff_t last_dc_coeff[kMaxComponents] = {0}; + int restarts_to_go; + int next_restart_marker; + int block_scan_index; + DCTCodingState coding_state; + size_t extra_zero_runs_pos; + int next_extra_zero_run_index; + size_t next_reset_point_pos; + int next_reset_point; +}; + +struct SerializationState { + enum Stage { + STAGE_INIT, + STAGE_SERIALIZE_SECTION, + STAGE_DONE, + STAGE_ERROR, + }; + + Stage stage = STAGE_INIT; + + std::deque output_queue; + + size_t section_index = 0; + int dht_index = 0; + int dqt_index = 0; + int app_index = 0; + int com_index = 0; + int data_index = 0; + int scan_index = 0; + std::vector dc_huff_table; + std::vector ac_huff_table; + const uint8_t* pad_bits = nullptr; + const uint8_t* pad_bits_end = nullptr; + bool seen_dri_marker = false; + bool is_progressive = false; + + EncodeScanState scan_state; +}; + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_DEC_JPEG_SERIALIZATION_STATE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.cc new file mode 100644 index 0000000000..460fc2f812 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.cc @@ -0,0 +1,384 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/jpeg/enc_jpeg_data.h" + +#include +#include + +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/jpeg/enc_jpeg_data_reader.h" +#include "lib/jxl/luminance.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { +namespace jpeg { + +namespace { + +constexpr int BITS_IN_JSAMPLE = 8; +using ByteSpan = Span; + +// TODO(eustas): move to jpeg_data, to use from codec_jpg as well. +// See if there is a canonically chunked ICC profile and mark corresponding +// app-tags with AppMarkerType::kICC. +Status DetectIccProfile(JPEGData& jpeg_data) { + JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size()); + size_t num_icc = 0; + size_t num_icc_jpeg = 0; + for (size_t i = 0; i < jpeg_data.app_data.size(); i++) { + const auto& app = jpeg_data.app_data[i]; + size_t pos = 0; + if (app[pos++] != 0xE2) continue; + // At least APPn + size; otherwise it should be intermarker-data. + JXL_DASSERT(app.size() >= 3); + size_t tag_length = (app[pos] << 8) + app[pos + 1]; + pos += 2; + JXL_DASSERT(app.size() == tag_length + 1); + // Empty payload is 2 bytes for tag length itself + signature + if (tag_length < 2 + sizeof kIccProfileTag) continue; + + if (memcmp(&app[pos], kIccProfileTag, sizeof kIccProfileTag) != 0) continue; + pos += sizeof kIccProfileTag; + uint8_t chunk_id = app[pos++]; + uint8_t num_chunks = app[pos++]; + if (chunk_id != num_icc + 1) continue; + if (num_icc_jpeg == 0) num_icc_jpeg = num_chunks; + if (num_icc_jpeg != num_chunks) continue; + num_icc++; + jpeg_data.app_marker_type[i] = AppMarkerType::kICC; + } + if (num_icc != num_icc_jpeg) { + return JXL_FAILURE("Invalid ICC chunks"); + } + return true; +} + +bool GetMarkerPayload(const uint8_t* data, size_t size, ByteSpan* payload) { + if (size < 3) { + return false; + } + size_t hi = data[1]; + size_t lo = data[2]; + size_t internal_size = (hi << 8u) | lo; + // Second byte of marker is not counted towards size. + if (internal_size != size - 1) { + return false; + } + // cut second marker byte and "length" from payload. + *payload = ByteSpan(data, size); + payload->remove_prefix(3); + return true; +} + +Status DetectBlobs(jpeg::JPEGData& jpeg_data) { + JXL_DASSERT(jpeg_data.app_data.size() == jpeg_data.app_marker_type.size()); + bool have_exif = false, have_xmp = false; + for (size_t i = 0; i < jpeg_data.app_data.size(); i++) { + auto& marker = jpeg_data.app_data[i]; + if (marker.empty() || marker[0] != kApp1) { + continue; + } + ByteSpan payload; + if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) { + // Something is wrong with this marker; does not care. + continue; + } + if (!have_exif && payload.size() >= sizeof kExifTag && + !memcmp(payload.data(), kExifTag, sizeof kExifTag)) { + jpeg_data.app_marker_type[i] = AppMarkerType::kExif; + have_exif = true; + } + if (!have_xmp && payload.size() >= sizeof kXMPTag && + !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) { + jpeg_data.app_marker_type[i] = AppMarkerType::kXMP; + have_xmp = true; + } + } + return true; +} + +Status ParseChunkedMarker(const jpeg::JPEGData& src, uint8_t marker_type, + const ByteSpan& tag, PaddedBytes* output, + bool allow_permutations = false) { + output->clear(); + + std::vector chunks; + std::vector presence; + size_t expected_number_of_parts = 0; + bool is_first_chunk = true; + size_t ordinal = 0; + for (const auto& marker : src.app_data) { + if (marker.empty() || marker[0] != marker_type) { + continue; + } + ByteSpan payload; + if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) { + // Something is wrong with this marker; does not care. + continue; + } + if ((payload.size() < tag.size()) || + memcmp(payload.data(), tag.data(), tag.size()) != 0) { + continue; + } + payload.remove_prefix(tag.size()); + if (payload.size() < 2) { + return JXL_FAILURE("Chunk is too small."); + } + uint8_t index = payload[0]; + uint8_t total = payload[1]; + ordinal++; + if (!allow_permutations) { + if (index != ordinal) return JXL_FAILURE("Invalid chunk order."); + } + + payload.remove_prefix(2); + + JXL_RETURN_IF_ERROR(total != 0); + if (is_first_chunk) { + is_first_chunk = false; + expected_number_of_parts = total; + // 1-based indices; 0-th element is added for convenience. + chunks.resize(total + 1); + presence.resize(total + 1); + } else { + JXL_RETURN_IF_ERROR(expected_number_of_parts == total); + } + + if (index == 0 || index > total) { + return JXL_FAILURE("Invalid chunk index."); + } + + if (presence[index]) { + return JXL_FAILURE("Duplicate chunk."); + } + presence[index] = true; + chunks[index] = payload; + } + + for (size_t i = 0; i < expected_number_of_parts; ++i) { + // 0-th element is not used. + size_t index = i + 1; + if (!presence[index]) { + return JXL_FAILURE("Missing chunk."); + } + output->append(chunks[index]); + } + + return true; +} + +Status SetBlobsFromJpegData(const jpeg::JPEGData& jpeg_data, Blobs* blobs) { + for (size_t i = 0; i < jpeg_data.app_data.size(); i++) { + auto& marker = jpeg_data.app_data[i]; + if (marker.empty() || marker[0] != kApp1) { + continue; + } + ByteSpan payload; + if (!GetMarkerPayload(marker.data(), marker.size(), &payload)) { + // Something is wrong with this marker; does not care. + continue; + } + if (payload.size() >= sizeof kExifTag && + !memcmp(payload.data(), kExifTag, sizeof kExifTag)) { + if (blobs->exif.empty()) { + blobs->exif.resize(payload.size() - sizeof kExifTag); + memcpy(blobs->exif.data(), payload.data() + sizeof kExifTag, + payload.size() - sizeof kExifTag); + } else { + JXL_WARNING( + "ReJPEG: multiple Exif blobs, storing only first one in the JPEG " + "XL container\n"); + } + } + if (payload.size() >= sizeof kXMPTag && + !memcmp(payload.data(), kXMPTag, sizeof kXMPTag)) { + if (blobs->xmp.empty()) { + blobs->xmp.resize(payload.size() - sizeof kXMPTag); + memcpy(blobs->xmp.data(), payload.data() + sizeof kXMPTag, + payload.size() - sizeof kXMPTag); + } else { + JXL_WARNING( + "ReJPEG: multiple XMP blobs, storing only first one in the JPEG " + "XL container\n"); + } + } + } + return true; +} + +static inline bool IsJPG(const Span bytes) { + return bytes.size() >= 2 && bytes[0] == 0xFF && bytes[1] == 0xD8; +} + +} // namespace + +Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg, + ColorEncoding* color_encoding) { + PaddedBytes icc_profile; + if (!ParseChunkedMarker(jpg, kApp2, ByteSpan(kIccProfileTag), &icc_profile)) { + JXL_WARNING("ReJPEG: corrupted ICC profile\n"); + icc_profile.clear(); + } + + if (icc_profile.empty()) { + bool is_gray = (jpg.components.size() == 1); + *color_encoding = ColorEncoding::SRGB(is_gray); + return true; + } + + return color_encoding->SetICC(std::move(icc_profile), /*cms=*/nullptr); +} + +Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes, + const CompressParams& cparams) { + jpeg_data.app_marker_type.resize(jpeg_data.app_data.size(), + AppMarkerType::kUnknown); + JXL_RETURN_IF_ERROR(DetectIccProfile(jpeg_data)); + JXL_RETURN_IF_ERROR(DetectBlobs(jpeg_data)); + BitWriter writer; + JXL_RETURN_IF_ERROR(Bundle::Write(jpeg_data, &writer, 0, nullptr)); + writer.ZeroPadToByte(); + *bytes = std::move(writer).TakeBytes(); + BrotliEncoderState* brotli_enc = + BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); + int effort = cparams.brotli_effort; + if (effort < 0) effort = 11 - static_cast(cparams.speed_tier); + BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_QUALITY, effort); + size_t total_data = 0; + for (size_t i = 0; i < jpeg_data.app_data.size(); i++) { + if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) { + continue; + } + total_data += jpeg_data.app_data[i].size(); + } + for (size_t i = 0; i < jpeg_data.com_data.size(); i++) { + total_data += jpeg_data.com_data[i].size(); + } + for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) { + total_data += jpeg_data.inter_marker_data[i].size(); + } + total_data += jpeg_data.tail_data.size(); + size_t initial_size = bytes->size(); + size_t brotli_capacity = BrotliEncoderMaxCompressedSize(total_data); + BrotliEncoderSetParameter(brotli_enc, BROTLI_PARAM_SIZE_HINT, total_data); + bytes->resize(bytes->size() + brotli_capacity); + size_t enc_size = 0; + auto br_append = [&](const std::vector& data, bool last) { + size_t available_in = data.size(); + const uint8_t* in = data.data(); + uint8_t* out = &(*bytes)[initial_size + enc_size]; + do { + uint8_t* out_before = out; + msan::MemoryIsInitialized(in, available_in); + JXL_CHECK(BrotliEncoderCompressStream( + brotli_enc, last ? BROTLI_OPERATION_FINISH : BROTLI_OPERATION_PROCESS, + &available_in, &in, &brotli_capacity, &out, &enc_size)); + msan::UnpoisonMemory(out_before, out - out_before); + } while (BrotliEncoderHasMoreOutput(brotli_enc) || available_in > 0); + }; + + for (size_t i = 0; i < jpeg_data.app_data.size(); i++) { + if (jpeg_data.app_marker_type[i] != AppMarkerType::kUnknown) { + continue; + } + br_append(jpeg_data.app_data[i], /*last=*/false); + } + for (size_t i = 0; i < jpeg_data.com_data.size(); i++) { + br_append(jpeg_data.com_data[i], /*last=*/false); + } + for (size_t i = 0; i < jpeg_data.inter_marker_data.size(); i++) { + br_append(jpeg_data.inter_marker_data[i], /*last=*/false); + } + br_append(jpeg_data.tail_data, /*last=*/true); + BrotliEncoderDestroyInstance(brotli_enc); + bytes->resize(initial_size + enc_size); + return true; +} + +Status DecodeImageJPG(const Span bytes, CodecInOut* io) { + if (!IsJPG(bytes)) return false; + io->frames.clear(); + io->frames.reserve(1); + io->frames.emplace_back(&io->metadata.m); + io->Main().jpeg_data = make_unique(); + jpeg::JPEGData* jpeg_data = io->Main().jpeg_data.get(); + if (!jpeg::ReadJpeg(bytes.data(), bytes.size(), jpeg::JpegReadMode::kReadAll, + jpeg_data)) { + return JXL_FAILURE("Error reading JPEG"); + } + JXL_RETURN_IF_ERROR( + SetColorEncodingFromJpegData(*jpeg_data, &io->metadata.m.color_encoding)); + JXL_RETURN_IF_ERROR(SetBlobsFromJpegData(*jpeg_data, &io->blobs)); + size_t nbcomp = jpeg_data->components.size(); + if (nbcomp != 1 && nbcomp != 3) { + return JXL_FAILURE("Cannot recompress JPEGs with neither 1 nor 3 channels"); + } + YCbCrChromaSubsampling cs; + if (nbcomp == 3) { + uint8_t hsample[3], vsample[3]; + for (size_t i = 0; i < nbcomp; i++) { + hsample[i] = jpeg_data->components[i].h_samp_factor; + vsample[i] = jpeg_data->components[i].v_samp_factor; + } + JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample)); + } else if (nbcomp == 1) { + uint8_t hsample[3], vsample[3]; + for (size_t i = 0; i < 3; i++) { + hsample[i] = jpeg_data->components[0].h_samp_factor; + vsample[i] = jpeg_data->components[0].v_samp_factor; + } + JXL_RETURN_IF_ERROR(cs.Set(hsample, vsample)); + } + bool is_rgb = false; + { + const auto& markers = jpeg_data->marker_order; + // If there is a JFIF marker, this is YCbCr. Otherwise... + if (std::find(markers.begin(), markers.end(), 0xE0) == markers.end()) { + // Try to find an 'Adobe' marker. + size_t app_markers = 0; + size_t i = 0; + for (; i < markers.size(); i++) { + // This is an APP marker. + if ((markers[i] & 0xF0) == 0xE0) { + JXL_CHECK(app_markers < jpeg_data->app_data.size()); + // APP14 marker + if (markers[i] == 0xEE) { + const auto& data = jpeg_data->app_data[app_markers]; + if (data.size() == 15 && data[3] == 'A' && data[4] == 'd' && + data[5] == 'o' && data[6] == 'b' && data[7] == 'e') { + // 'Adobe' marker. + is_rgb = data[14] == 0; + break; + } + } + app_markers++; + } + } + + if (i == markers.size()) { + // No 'Adobe' marker, guess from component IDs. + is_rgb = nbcomp == 3 && jpeg_data->components[0].id == 'R' && + jpeg_data->components[1].id == 'G' && + jpeg_data->components[2].id == 'B'; + } + } + } + + io->Main().chroma_subsampling = cs; + io->Main().color_transform = + (!is_rgb || nbcomp == 1) ? ColorTransform::kYCbCr : ColorTransform::kNone; + + io->metadata.m.SetIntensityTarget(kDefaultIntensityTarget); + io->metadata.m.SetUintSamples(BITS_IN_JSAMPLE); + io->SetFromImage(Image3F(jpeg_data->width, jpeg_data->height), + io->metadata.m.color_encoding); + SetIntensityTarget(&io->metadata.m); + return true; +} + +} // namespace jpeg +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.h new file mode 100644 index 0000000000..806128c465 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_H_ +#define LIB_JXL_JPEG_ENC_JPEG_DATA_H_ + +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { +Status EncodeJPEGData(JPEGData& jpeg_data, PaddedBytes* bytes, + const CompressParams& cparams); + +Status SetColorEncodingFromJpegData(const jpeg::JPEGData& jpg, + ColorEncoding* color_encoding); + +/** + * Decodes bytes containing JPEG codestream into a CodecInOut as coefficients + * only, for lossless JPEG transcoding. + */ +Status DecodeImageJPG(Span bytes, CodecInOut* io); + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_ENC_JPEG_DATA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.cc new file mode 100644 index 0000000000..f569b73363 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.cc @@ -0,0 +1,1053 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/jpeg/enc_jpeg_data_reader.h" + +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h" +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { + +namespace { +static const int kBrunsliMaxSampling = 15; + +// Macros for commonly used error conditions. + +#define JXL_JPEG_VERIFY_LEN(n) \ + if (*pos + (n) > len) { \ + return JXL_FAILURE("Unexpected end of input: pos=%" PRIuS \ + " need=%d len=%" PRIuS, \ + *pos, static_cast(n), len); \ + } + +#define JXL_JPEG_VERIFY_INPUT(var, low, high, code) \ + if ((var) < (low) || (var) > (high)) { \ + return JXL_FAILURE("Invalid " #var ": %d", static_cast(var)); \ + } + +#define JXL_JPEG_VERIFY_MARKER_END() \ + if (start_pos + marker_len != *pos) { \ + return JXL_FAILURE("Invalid marker length: declared=%" PRIuS \ + " actual=%" PRIuS, \ + marker_len, (*pos - start_pos)); \ + } + +#define JXL_JPEG_EXPECT_MARKER() \ + if (pos + 2 > len || data[pos] != 0xff) { \ + return JXL_FAILURE( \ + "Marker byte (0xff) expected, found: 0x%.2x pos=%" PRIuS \ + " len=%" PRIuS, \ + (pos < len ? data[pos] : 0), pos, len); \ + } + +inline int ReadUint8(const uint8_t* data, size_t* pos) { + return data[(*pos)++]; +} + +inline int ReadUint16(const uint8_t* data, size_t* pos) { + int v = (data[*pos] << 8) + data[*pos + 1]; + *pos += 2; + return v; +} + +// Reads the Start of Frame (SOF) marker segment and fills in *jpg with the +// parsed data. +bool ProcessSOF(const uint8_t* data, const size_t len, JpegReadMode mode, + size_t* pos, JPEGData* jpg) { + if (jpg->width != 0) { + return JXL_FAILURE("Duplicate SOF marker."); + } + const size_t start_pos = *pos; + JXL_JPEG_VERIFY_LEN(8); + size_t marker_len = ReadUint16(data, pos); + int precision = ReadUint8(data, pos); + int height = ReadUint16(data, pos); + int width = ReadUint16(data, pos); + int num_components = ReadUint8(data, pos); + // 'jbrd' is hardcoded for 8bits: + JXL_JPEG_VERIFY_INPUT(precision, 8, 8, PRECISION); + JXL_JPEG_VERIFY_INPUT(height, 1, kMaxDimPixels, HEIGHT); + JXL_JPEG_VERIFY_INPUT(width, 1, kMaxDimPixels, WIDTH); + JXL_JPEG_VERIFY_INPUT(num_components, 1, kMaxComponents, NUMCOMP); + JXL_JPEG_VERIFY_LEN(3 * num_components); + jpg->height = height; + jpg->width = width; + jpg->components.resize(num_components); + + // Read sampling factors and quant table index for each component. + std::vector ids_seen(256, false); + int max_h_samp_factor = 1; + int max_v_samp_factor = 1; + for (size_t i = 0; i < jpg->components.size(); ++i) { + const int id = ReadUint8(data, pos); + if (ids_seen[id]) { // (cf. section B.2.2, syntax of Ci) + return JXL_FAILURE("Duplicate ID %d in SOF.", id); + } + ids_seen[id] = true; + jpg->components[i].id = id; + int factor = ReadUint8(data, pos); + int h_samp_factor = factor >> 4; + int v_samp_factor = factor & 0xf; + JXL_JPEG_VERIFY_INPUT(h_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR); + JXL_JPEG_VERIFY_INPUT(v_samp_factor, 1, kBrunsliMaxSampling, SAMP_FACTOR); + jpg->components[i].h_samp_factor = h_samp_factor; + jpg->components[i].v_samp_factor = v_samp_factor; + jpg->components[i].quant_idx = ReadUint8(data, pos); + max_h_samp_factor = std::max(max_h_samp_factor, h_samp_factor); + max_v_samp_factor = std::max(max_v_samp_factor, v_samp_factor); + } + + // We have checked above that none of the sampling factors are 0, so the max + // sampling factors can not be 0. + int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8); + int MCU_cols = DivCeil(jpg->width, max_h_samp_factor * 8); + // Compute the block dimensions for each component. + for (size_t i = 0; i < jpg->components.size(); ++i) { + JPEGComponent* c = &jpg->components[i]; + if (max_h_samp_factor % c->h_samp_factor != 0 || + max_v_samp_factor % c->v_samp_factor != 0) { + return JXL_FAILURE("Non-integral subsampling ratios."); + } + c->width_in_blocks = MCU_cols * c->h_samp_factor; + c->height_in_blocks = MCU_rows * c->v_samp_factor; + const uint64_t num_blocks = + static_cast(c->width_in_blocks) * c->height_in_blocks; + if (mode == JpegReadMode::kReadAll) { + c->coeffs.resize(num_blocks * kDCTBlockSize); + } + } + JXL_JPEG_VERIFY_MARKER_END(); + return true; +} + +// Reads the Start of Scan (SOS) marker segment and fills in *scan_info with the +// parsed data. +bool ProcessSOS(const uint8_t* data, const size_t len, size_t* pos, + JPEGData* jpg) { + const size_t start_pos = *pos; + JXL_JPEG_VERIFY_LEN(3); + size_t marker_len = ReadUint16(data, pos); + size_t comps_in_scan = ReadUint8(data, pos); + JXL_JPEG_VERIFY_INPUT(comps_in_scan, 1, jpg->components.size(), + COMPS_IN_SCAN); + + JPEGScanInfo scan_info; + scan_info.num_components = comps_in_scan; + JXL_JPEG_VERIFY_LEN(2 * comps_in_scan); + std::vector ids_seen(256, false); + for (size_t i = 0; i < comps_in_scan; ++i) { + uint32_t id = ReadUint8(data, pos); + if (ids_seen[id]) { // (cf. section B.2.3, regarding CSj) + return JXL_FAILURE("Duplicate ID %d in SOS.", id); + } + ids_seen[id] = true; + bool found_index = false; + for (size_t j = 0; j < jpg->components.size(); ++j) { + if (jpg->components[j].id == id) { + scan_info.components[i].comp_idx = j; + found_index = true; + } + } + if (!found_index) { + return JXL_FAILURE("SOS marker: Could not find component with id %d", id); + } + int c = ReadUint8(data, pos); + int dc_tbl_idx = c >> 4; + int ac_tbl_idx = c & 0xf; + JXL_JPEG_VERIFY_INPUT(dc_tbl_idx, 0, 3, HUFFMAN_INDEX); + JXL_JPEG_VERIFY_INPUT(ac_tbl_idx, 0, 3, HUFFMAN_INDEX); + scan_info.components[i].dc_tbl_idx = dc_tbl_idx; + scan_info.components[i].ac_tbl_idx = ac_tbl_idx; + } + JXL_JPEG_VERIFY_LEN(3); + scan_info.Ss = ReadUint8(data, pos); + scan_info.Se = ReadUint8(data, pos); + JXL_JPEG_VERIFY_INPUT(static_cast(scan_info.Ss), 0, 63, START_OF_SCAN); + JXL_JPEG_VERIFY_INPUT(scan_info.Se, scan_info.Ss, 63, END_OF_SCAN); + int c = ReadUint8(data, pos); + scan_info.Ah = c >> 4; + scan_info.Al = c & 0xf; + if (scan_info.Ah != 0 && scan_info.Al != scan_info.Ah - 1) { + // section G.1.1.1.2 : Successive approximation control only improves + // by one bit at a time. But it's not always respected, so we just issue + // a warning. + JXL_WARNING("Invalid progressive parameters: Al=%d Ah=%d", scan_info.Al, + scan_info.Ah); + } + // Check that all the Huffman tables needed for this scan are defined. + for (size_t i = 0; i < comps_in_scan; ++i) { + bool found_dc_table = false; + bool found_ac_table = false; + for (size_t j = 0; j < jpg->huffman_code.size(); ++j) { + uint32_t slot_id = jpg->huffman_code[j].slot_id; + if (slot_id == scan_info.components[i].dc_tbl_idx) { + found_dc_table = true; + } else if (slot_id == scan_info.components[i].ac_tbl_idx + 16) { + found_ac_table = true; + } + } + if (scan_info.Ss == 0 && !found_dc_table) { + return JXL_FAILURE( + "SOS marker: Could not find DC Huffman table with index %d", + scan_info.components[i].dc_tbl_idx); + } + if (scan_info.Se > 0 && !found_ac_table) { + return JXL_FAILURE( + "SOS marker: Could not find AC Huffman table with index %d", + scan_info.components[i].ac_tbl_idx); + } + } + jpg->scan_info.push_back(scan_info); + JXL_JPEG_VERIFY_MARKER_END(); + return true; +} + +// Reads the Define Huffman Table (DHT) marker segment and fills in *jpg with +// the parsed data. Builds the Huffman decoding table in either dc_huff_lut or +// ac_huff_lut, depending on the type and solt_id of Huffman code being read. +bool ProcessDHT(const uint8_t* data, const size_t len, JpegReadMode mode, + std::vector* dc_huff_lut, + std::vector* ac_huff_lut, size_t* pos, + JPEGData* jpg) { + const size_t start_pos = *pos; + JXL_JPEG_VERIFY_LEN(2); + size_t marker_len = ReadUint16(data, pos); + if (marker_len == 2) { + return JXL_FAILURE("DHT marker: no Huffman table found"); + } + while (*pos < start_pos + marker_len) { + JXL_JPEG_VERIFY_LEN(1 + kJpegHuffmanMaxBitLength); + JPEGHuffmanCode huff; + huff.slot_id = ReadUint8(data, pos); + int huffman_index = huff.slot_id; + int is_ac_table = (huff.slot_id & 0x10) != 0; + HuffmanTableEntry* huff_lut; + if (is_ac_table) { + huffman_index -= 0x10; + JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX); + huff_lut = &(*ac_huff_lut)[huffman_index * kJpegHuffmanLutSize]; + } else { + JXL_JPEG_VERIFY_INPUT(huffman_index, 0, 3, HUFFMAN_INDEX); + huff_lut = &(*dc_huff_lut)[huffman_index * kJpegHuffmanLutSize]; + } + huff.counts[0] = 0; + int total_count = 0; + int space = 1 << kJpegHuffmanMaxBitLength; + int max_depth = 1; + for (size_t i = 1; i <= kJpegHuffmanMaxBitLength; ++i) { + int count = ReadUint8(data, pos); + if (count != 0) { + max_depth = i; + } + huff.counts[i] = count; + total_count += count; + space -= count * (1 << (kJpegHuffmanMaxBitLength - i)); + } + if (is_ac_table) { + JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegHuffmanAlphabetSize, + HUFFMAN_CODE); + } else { + JXL_JPEG_VERIFY_INPUT(total_count, 0, kJpegDCAlphabetSize, HUFFMAN_CODE); + } + JXL_JPEG_VERIFY_LEN(total_count); + std::vector values_seen(256, false); + for (int i = 0; i < total_count; ++i) { + int value = ReadUint8(data, pos); + if (!is_ac_table) { + JXL_JPEG_VERIFY_INPUT(value, 0, kJpegDCAlphabetSize - 1, HUFFMAN_CODE); + } + if (values_seen[value]) { + return JXL_FAILURE("Duplicate Huffman code value %d", value); + } + values_seen[value] = true; + huff.values[i] = value; + } + // Add an invalid symbol that will have the all 1 code. + ++huff.counts[max_depth]; + huff.values[total_count] = kJpegHuffmanAlphabetSize; + space -= (1 << (kJpegHuffmanMaxBitLength - max_depth)); + if (space < 0) { + return JXL_FAILURE("Invalid Huffman code lengths."); + } else if (space > 0 && huff_lut[0].value != 0xffff) { + // Re-initialize the values to an invalid symbol so that we can recognize + // it when reading the bit stream using a Huffman code with space > 0. + for (int i = 0; i < kJpegHuffmanLutSize; ++i) { + huff_lut[i].bits = 0; + huff_lut[i].value = 0xffff; + } + } + huff.is_last = (*pos == start_pos + marker_len); + if (mode == JpegReadMode::kReadAll) { + BuildJpegHuffmanTable(&huff.counts[0], &huff.values[0], huff_lut); + } + jpg->huffman_code.push_back(huff); + } + JXL_JPEG_VERIFY_MARKER_END(); + return true; +} + +// Reads the Define Quantization Table (DQT) marker segment and fills in *jpg +// with the parsed data. +bool ProcessDQT(const uint8_t* data, const size_t len, size_t* pos, + JPEGData* jpg) { + const size_t start_pos = *pos; + JXL_JPEG_VERIFY_LEN(2); + size_t marker_len = ReadUint16(data, pos); + if (marker_len == 2) { + return JXL_FAILURE("DQT marker: no quantization table found"); + } + while (*pos < start_pos + marker_len && jpg->quant.size() < kMaxQuantTables) { + JXL_JPEG_VERIFY_LEN(1); + int quant_table_index = ReadUint8(data, pos); + int quant_table_precision = quant_table_index >> 4; + JXL_JPEG_VERIFY_INPUT(quant_table_precision, 0, 1, QUANT_TBL_PRECISION); + quant_table_index &= 0xf; + JXL_JPEG_VERIFY_INPUT(quant_table_index, 0, 3, QUANT_TBL_INDEX); + JXL_JPEG_VERIFY_LEN((quant_table_precision + 1) * kDCTBlockSize); + JPEGQuantTable table; + table.index = quant_table_index; + table.precision = quant_table_precision; + for (size_t i = 0; i < kDCTBlockSize; ++i) { + int quant_val = + quant_table_precision ? ReadUint16(data, pos) : ReadUint8(data, pos); + JXL_JPEG_VERIFY_INPUT(quant_val, 1, 65535, QUANT_VAL); + table.values[kJPEGNaturalOrder[i]] = quant_val; + } + table.is_last = (*pos == start_pos + marker_len); + jpg->quant.push_back(table); + } + JXL_JPEG_VERIFY_MARKER_END(); + return true; +} + +// Reads the DRI marker and saves the restart interval into *jpg. +bool ProcessDRI(const uint8_t* data, const size_t len, size_t* pos, + bool* found_dri, JPEGData* jpg) { + if (*found_dri) { + return JXL_FAILURE("Duplicate DRI marker."); + } + *found_dri = true; + const size_t start_pos = *pos; + JXL_JPEG_VERIFY_LEN(4); + size_t marker_len = ReadUint16(data, pos); + int restart_interval = ReadUint16(data, pos); + jpg->restart_interval = restart_interval; + JXL_JPEG_VERIFY_MARKER_END(); + return true; +} + +// Saves the APP marker segment as a string to *jpg. +bool ProcessAPP(const uint8_t* data, const size_t len, size_t* pos, + JPEGData* jpg) { + JXL_JPEG_VERIFY_LEN(2); + size_t marker_len = ReadUint16(data, pos); + JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN); + JXL_JPEG_VERIFY_LEN(marker_len - 2); + JXL_DASSERT(*pos >= 3); + // Save the marker type together with the app data. + const uint8_t* app_str_start = data + *pos - 3; + std::vector app_str(app_str_start, app_str_start + marker_len + 1); + *pos += marker_len - 2; + jpg->app_data.push_back(app_str); + return true; +} + +// Saves the COM marker segment as a string to *jpg. +bool ProcessCOM(const uint8_t* data, const size_t len, size_t* pos, + JPEGData* jpg) { + JXL_JPEG_VERIFY_LEN(2); + size_t marker_len = ReadUint16(data, pos); + JXL_JPEG_VERIFY_INPUT(marker_len, 2, 65535, MARKER_LEN); + JXL_JPEG_VERIFY_LEN(marker_len - 2); + const uint8_t* com_str_start = data + *pos - 3; + std::vector com_str(com_str_start, com_str_start + marker_len + 1); + *pos += marker_len - 2; + jpg->com_data.push_back(com_str); + return true; +} + +// Helper structure to read bits from the entropy coded data segment. +struct BitReaderState { + BitReaderState(const uint8_t* data, const size_t len, size_t pos) + : data_(data), len_(len) { + Reset(pos); + } + + void Reset(size_t pos) { + pos_ = pos; + val_ = 0; + bits_left_ = 0; + next_marker_pos_ = len_ - 2; + FillBitWindow(); + } + + // Returns the next byte and skips the 0xff/0x00 escape sequences. + uint8_t GetNextByte() { + if (pos_ >= next_marker_pos_) { + ++pos_; + return 0; + } + uint8_t c = data_[pos_++]; + if (c == 0xff) { + uint8_t escape = data_[pos_]; + if (escape == 0) { + ++pos_; + } else { + // 0xff was followed by a non-zero byte, which means that we found the + // start of the next marker segment. + next_marker_pos_ = pos_ - 1; + } + } + return c; + } + + void FillBitWindow() { + if (bits_left_ <= 16) { + while (bits_left_ <= 56) { + val_ <<= 8; + val_ |= (uint64_t)GetNextByte(); + bits_left_ += 8; + } + } + } + + int ReadBits(int nbits) { + FillBitWindow(); + uint64_t val = (val_ >> (bits_left_ - nbits)) & ((1ULL << nbits) - 1); + bits_left_ -= nbits; + return val; + } + + // Sets *pos to the next stream position where parsing should continue. + // Enqueue the padding bits seen (0 or 1). + // Returns false if there is inconsistent or invalid padding or the stream + // ended too early. + bool FinishStream(JPEGData* jpg, size_t* pos) { + int npadbits = bits_left_ & 7; + if (npadbits > 0) { + uint64_t padmask = (1ULL << npadbits) - 1; + uint64_t padbits = (val_ >> (bits_left_ - npadbits)) & padmask; + if (padbits != padmask) { + jpg->has_zero_padding_bit = true; + } + for (int i = npadbits - 1; i >= 0; --i) { + jpg->padding_bits.push_back((padbits >> i) & 1); + } + } + // Give back some bytes that we did not use. + int unused_bytes_left = bits_left_ >> 3; + while (unused_bytes_left-- > 0) { + --pos_; + // If we give back a 0 byte, we need to check if it was a 0xff/0x00 escape + // sequence, and if yes, we need to give back one more byte. + if (pos_ < next_marker_pos_ && data_[pos_] == 0 && + data_[pos_ - 1] == 0xff) { + --pos_; + } + } + if (pos_ > next_marker_pos_) { + // Data ran out before the scan was complete. + return JXL_FAILURE("Unexpected end of scan."); + } + *pos = pos_; + return true; + } + + const uint8_t* data_; + const size_t len_; + size_t pos_; + uint64_t val_; + int bits_left_; + size_t next_marker_pos_; +}; + +// Returns the next Huffman-coded symbol. +int ReadSymbol(const HuffmanTableEntry* table, BitReaderState* br) { + int nbits; + br->FillBitWindow(); + int val = (br->val_ >> (br->bits_left_ - 8)) & 0xff; + table += val; + nbits = table->bits - 8; + if (nbits > 0) { + br->bits_left_ -= 8; + table += table->value; + val = (br->val_ >> (br->bits_left_ - nbits)) & ((1 << nbits) - 1); + table += val; + } + br->bits_left_ -= table->bits; + return table->value; +} + +/** + * Returns the DC diff or AC value for extra bits value x and prefix code s. + * + * CCITT Rec. T.81 (1992 E) + * Table F.1 – Difference magnitude categories for DC coding + * SSSS | DIFF values + * ------+-------------------------- + * 0 | 0 + * 1 | –1, 1 + * 2 | –3, –2, 2, 3 + * 3 | –7..–4, 4..7 + * ......|.......................... + * 11 | –2047..–1024, 1024..2047 + * + * CCITT Rec. T.81 (1992 E) + * Table F.2 – Categories assigned to coefficient values + * [ Same as Table F.1, but does not include SSSS equal to 0 and 11] + * + * + * CCITT Rec. T.81 (1992 E) + * F.1.2.1.1 Structure of DC code table + * For each category,... additional bits... appended... to uniquely identify + * which difference... occurred... When DIFF is positive... SSSS... bits of DIFF + * are appended. When DIFF is negative... SSSS... bits of (DIFF – 1) are + * appended... Most significant bit... is 0 for negative differences and 1 for + * positive differences. + * + * In other words the upper half of extra bits range represents DIFF as is. + * The lower half represents the negative DIFFs with an offset. + */ +int HuffExtend(int x, int s) { + JXL_DASSERT(s >= 1); + int half = 1 << (s - 1); + if (x >= half) { + JXL_DASSERT(x < (1 << s)); + return x; + } else { + return x - (1 << s) + 1; + } +} + +// Decodes one 8x8 block of DCT coefficients from the bit stream. +bool DecodeDCTBlock(const HuffmanTableEntry* dc_huff, + const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, + int* eobrun, bool* reset_state, int* num_zero_runs, + BitReaderState* br, JPEGData* jpg, coeff_t* last_dc_coeff, + coeff_t* coeffs) { + // Nowadays multiplication is even faster than variable shift. + int Am = 1 << Al; + bool eobrun_allowed = Ss > 0; + if (Ss == 0) { + int s = ReadSymbol(dc_huff, br); + if (s >= kJpegDCAlphabetSize) { + return JXL_FAILURE("Invalid Huffman symbol %d for DC coefficient.", s); + } + int diff = 0; + if (s > 0) { + int bits = br->ReadBits(s); + diff = HuffExtend(bits, s); + } + int coeff = diff + *last_dc_coeff; + const int dc_coeff = coeff * Am; + coeffs[0] = dc_coeff; + // TODO(eustas): is there a more elegant / explicit way to check this? + if (dc_coeff != coeffs[0]) { + return JXL_FAILURE("Invalid DC coefficient %d", dc_coeff); + } + *last_dc_coeff = coeff; + ++Ss; + } + if (Ss > Se) { + return true; + } + if (*eobrun > 0) { + --(*eobrun); + return true; + } + *num_zero_runs = 0; + for (int k = Ss; k <= Se; k++) { + int sr = ReadSymbol(ac_huff, br); + if (sr >= kJpegHuffmanAlphabetSize) { + return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", sr, + k); + } + int r = sr >> 4; + int s = sr & 15; + if (s > 0) { + k += r; + if (k > Se) { + return JXL_FAILURE("Out-of-band coefficient %d band was %d-%d", k, Ss, + Se); + } + if (s + Al >= kJpegDCAlphabetSize) { + return JXL_FAILURE( + "Out of range AC coefficient value: s = %d Al = %d k = %d", s, Al, + k); + } + int bits = br->ReadBits(s); + int coeff = HuffExtend(bits, s); + coeffs[kJPEGNaturalOrder[k]] = coeff * Am; + *num_zero_runs = 0; + } else if (r == 15) { + k += 15; + ++(*num_zero_runs); + } else { + if (eobrun_allowed && k == Ss && *eobrun == 0) { + // We have two end-of-block runs right after each other, so we signal + // the jpeg encoder to force a state reset at this point. + *reset_state = true; + } + *eobrun = 1 << r; + if (r > 0) { + if (!eobrun_allowed) { + return JXL_FAILURE("End-of-block run crossing DC coeff."); + } + *eobrun += br->ReadBits(r); + } + break; + } + } + --(*eobrun); + return true; +} + +bool RefineDCTBlock(const HuffmanTableEntry* ac_huff, int Ss, int Se, int Al, + int* eobrun, bool* reset_state, BitReaderState* br, + JPEGData* jpg, coeff_t* coeffs) { + // Nowadays multiplication is even faster than variable shift. + int Am = 1 << Al; + bool eobrun_allowed = Ss > 0; + if (Ss == 0) { + int s = br->ReadBits(1); + coeff_t dc_coeff = coeffs[0]; + dc_coeff |= s * Am; + coeffs[0] = dc_coeff; + ++Ss; + } + if (Ss > Se) { + return true; + } + int p1 = Am; + int m1 = -Am; + int k = Ss; + int r; + int s; + bool in_zero_run = false; + if (*eobrun <= 0) { + for (; k <= Se; k++) { + s = ReadSymbol(ac_huff, br); + if (s >= kJpegHuffmanAlphabetSize) { + return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", s, + k); + } + r = s >> 4; + s &= 15; + if (s) { + if (s != 1) { + return JXL_FAILURE("Invalid Huffman symbol %d for AC coefficient %d", + s, k); + } + s = br->ReadBits(1) ? p1 : m1; + in_zero_run = false; + } else { + if (r != 15) { + if (eobrun_allowed && k == Ss && *eobrun == 0) { + // We have two end-of-block runs right after each other, so we + // signal the jpeg encoder to force a state reset at this point. + *reset_state = true; + } + *eobrun = 1 << r; + if (r > 0) { + if (!eobrun_allowed) { + return JXL_FAILURE("End-of-block run crossing DC coeff."); + } + *eobrun += br->ReadBits(r); + } + break; + } + in_zero_run = true; + } + do { + coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; + if (thiscoef != 0) { + if (br->ReadBits(1)) { + if ((thiscoef & p1) == 0) { + if (thiscoef >= 0) { + thiscoef += p1; + } else { + thiscoef += m1; + } + } + } + coeffs[kJPEGNaturalOrder[k]] = thiscoef; + } else { + if (--r < 0) { + break; + } + } + k++; + } while (k <= Se); + if (s) { + if (k > Se) { + return JXL_FAILURE("Out-of-band coefficient %d band was %d-%d", k, Ss, + Se); + } + coeffs[kJPEGNaturalOrder[k]] = s; + } + } + } + if (in_zero_run) { + return JXL_FAILURE("Extra zero run before end-of-block."); + } + if (*eobrun > 0) { + for (; k <= Se; k++) { + coeff_t thiscoef = coeffs[kJPEGNaturalOrder[k]]; + if (thiscoef != 0) { + if (br->ReadBits(1)) { + if ((thiscoef & p1) == 0) { + if (thiscoef >= 0) { + thiscoef += p1; + } else { + thiscoef += m1; + } + } + } + coeffs[kJPEGNaturalOrder[k]] = thiscoef; + } + } + } + --(*eobrun); + return true; +} + +bool ProcessRestart(const uint8_t* data, const size_t len, + int* next_restart_marker, BitReaderState* br, + JPEGData* jpg) { + size_t pos = 0; + if (!br->FinishStream(jpg, &pos)) { + return JXL_FAILURE("Invalid scan"); + } + int expected_marker = 0xd0 + *next_restart_marker; + JXL_JPEG_EXPECT_MARKER(); + int marker = data[pos + 1]; + if (marker != expected_marker) { + return JXL_FAILURE("Did not find expected restart marker %d actual %d", + expected_marker, marker); + } + br->Reset(pos + 2); + *next_restart_marker += 1; + *next_restart_marker &= 0x7; + return true; +} + +bool ProcessScan(const uint8_t* data, const size_t len, + const std::vector& dc_huff_lut, + const std::vector& ac_huff_lut, + uint16_t scan_progression[kMaxComponents][kDCTBlockSize], + bool is_progressive, size_t* pos, JPEGData* jpg) { + if (!ProcessSOS(data, len, pos, jpg)) { + return false; + } + JPEGScanInfo* scan_info = &jpg->scan_info.back(); + bool is_interleaved = (scan_info->num_components > 1); + int max_h_samp_factor = 1; + int max_v_samp_factor = 1; + for (size_t i = 0; i < jpg->components.size(); ++i) { + max_h_samp_factor = + std::max(max_h_samp_factor, jpg->components[i].h_samp_factor); + max_v_samp_factor = + std::max(max_v_samp_factor, jpg->components[i].v_samp_factor); + } + + int MCU_rows = DivCeil(jpg->height, max_v_samp_factor * 8); + int MCUs_per_row = DivCeil(jpg->width, max_h_samp_factor * 8); + if (!is_interleaved) { + const JPEGComponent& c = jpg->components[scan_info->components[0].comp_idx]; + MCUs_per_row = DivCeil(jpg->width * c.h_samp_factor, 8 * max_h_samp_factor); + MCU_rows = DivCeil(jpg->height * c.v_samp_factor, 8 * max_v_samp_factor); + } + coeff_t last_dc_coeff[kMaxComponents] = {0}; + BitReaderState br(data, len, *pos); + int restarts_to_go = jpg->restart_interval; + int next_restart_marker = 0; + int eobrun = -1; + int block_scan_index = 0; + const int Al = is_progressive ? scan_info->Al : 0; + const int Ah = is_progressive ? scan_info->Ah : 0; + const int Ss = is_progressive ? scan_info->Ss : 0; + const int Se = is_progressive ? scan_info->Se : 63; + const uint16_t scan_bitmask = Ah == 0 ? (0xffff << Al) : (1u << Al); + const uint16_t refinement_bitmask = (1 << Al) - 1; + for (size_t i = 0; i < scan_info->num_components; ++i) { + int comp_idx = scan_info->components[i].comp_idx; + for (int k = Ss; k <= Se; ++k) { + if (scan_progression[comp_idx][k] & scan_bitmask) { + return JXL_FAILURE( + "Overlapping scans: component=%d k=%d prev_mask: %u cur_mask %u", + comp_idx, k, scan_progression[i][k], scan_bitmask); + } + if (scan_progression[comp_idx][k] & refinement_bitmask) { + return JXL_FAILURE( + "Invalid scan order, a more refined scan was already done: " + "component=%d k=%d prev_mask=%u cur_mask=%u", + comp_idx, k, scan_progression[i][k], scan_bitmask); + } + scan_progression[comp_idx][k] |= scan_bitmask; + } + } + if (Al > 10) { + return JXL_FAILURE("Scan parameter Al=%d is not supported.", Al); + } + for (int mcu_y = 0; mcu_y < MCU_rows; ++mcu_y) { + for (int mcu_x = 0; mcu_x < MCUs_per_row; ++mcu_x) { + // Handle the restart intervals. + if (jpg->restart_interval > 0) { + if (restarts_to_go == 0) { + if (ProcessRestart(data, len, &next_restart_marker, &br, jpg)) { + restarts_to_go = jpg->restart_interval; + memset(static_cast(last_dc_coeff), 0, sizeof(last_dc_coeff)); + if (eobrun > 0) { + return JXL_FAILURE("End-of-block run too long."); + } + eobrun = -1; // fresh start + } else { + return JXL_FAILURE("Could not process restart."); + } + } + --restarts_to_go; + } + // Decode one MCU. + for (size_t i = 0; i < scan_info->num_components; ++i) { + JPEGComponentScanInfo* si = &scan_info->components[i]; + JPEGComponent* c = &jpg->components[si->comp_idx]; + const HuffmanTableEntry* dc_lut = + &dc_huff_lut[si->dc_tbl_idx * kJpegHuffmanLutSize]; + const HuffmanTableEntry* ac_lut = + &ac_huff_lut[si->ac_tbl_idx * kJpegHuffmanLutSize]; + int nblocks_y = is_interleaved ? c->v_samp_factor : 1; + int nblocks_x = is_interleaved ? c->h_samp_factor : 1; + for (int iy = 0; iy < nblocks_y; ++iy) { + for (int ix = 0; ix < nblocks_x; ++ix) { + int block_y = mcu_y * nblocks_y + iy; + int block_x = mcu_x * nblocks_x + ix; + int block_idx = block_y * c->width_in_blocks + block_x; + bool reset_state = false; + int num_zero_runs = 0; + coeff_t* coeffs = &c->coeffs[block_idx * kDCTBlockSize]; + if (Ah == 0) { + if (!DecodeDCTBlock(dc_lut, ac_lut, Ss, Se, Al, &eobrun, + &reset_state, &num_zero_runs, &br, jpg, + &last_dc_coeff[si->comp_idx], coeffs)) { + return false; + } + } else { + if (!RefineDCTBlock(ac_lut, Ss, Se, Al, &eobrun, &reset_state, + &br, jpg, coeffs)) { + return false; + } + } + if (reset_state) { + scan_info->reset_points.emplace_back(block_scan_index); + } + if (num_zero_runs > 0) { + JPEGScanInfo::ExtraZeroRunInfo info; + info.block_idx = block_scan_index; + info.num_extra_zero_runs = num_zero_runs; + scan_info->extra_zero_runs.push_back(info); + } + ++block_scan_index; + } + } + } + } + } + if (eobrun > 0) { + return JXL_FAILURE("End-of-block run too long."); + } + if (!br.FinishStream(jpg, pos)) { + return JXL_FAILURE("Invalid scan."); + } + if (*pos > len) { + return JXL_FAILURE("Unexpected end of file during scan. pos=%" PRIuS + " len=%" PRIuS, + *pos, len); + } + return true; +} + +// Changes the quant_idx field of the components to refer to the index of the +// quant table in the jpg->quant array. +bool FixupIndexes(JPEGData* jpg) { + for (size_t i = 0; i < jpg->components.size(); ++i) { + JPEGComponent* c = &jpg->components[i]; + bool found_index = false; + for (size_t j = 0; j < jpg->quant.size(); ++j) { + if (jpg->quant[j].index == c->quant_idx) { + c->quant_idx = j; + found_index = true; + break; + } + } + if (!found_index) { + return JXL_FAILURE("Quantization table with index %u not found", + c->quant_idx); + } + } + return true; +} + +size_t FindNextMarker(const uint8_t* data, const size_t len, size_t pos) { + // kIsValidMarker[i] == 1 means (0xc0 + i) is a valid marker. + static const uint8_t kIsValidMarker[] = { + 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + }; + size_t num_skipped = 0; + while (pos + 1 < len && (data[pos] != 0xff || data[pos + 1] < 0xc0 || + !kIsValidMarker[data[pos + 1] - 0xc0])) { + ++pos; + ++num_skipped; + } + return num_skipped; +} + +} // namespace + +bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode, + JPEGData* jpg) { + size_t pos = 0; + // Check SOI marker. + JXL_JPEG_EXPECT_MARKER(); + int marker = data[pos + 1]; + pos += 2; + if (marker != 0xd8) { + return JXL_FAILURE("Did not find expected SOI marker, actual=%d", marker); + } + int lut_size = kMaxHuffmanTables * kJpegHuffmanLutSize; + std::vector dc_huff_lut(lut_size); + std::vector ac_huff_lut(lut_size); + bool found_sof = false; + bool found_dri = false; + uint16_t scan_progression[kMaxComponents][kDCTBlockSize] = {{0}}; + + jpg->padding_bits.resize(0); + bool is_progressive = false; // default + do { + // Read next marker. + size_t num_skipped = FindNextMarker(data, len, pos); + if (num_skipped > 0) { + // Add a fake marker to indicate arbitrary in-between-markers data. + jpg->marker_order.push_back(0xff); + jpg->inter_marker_data.emplace_back(data + pos, data + pos + num_skipped); + pos += num_skipped; + } + JXL_JPEG_EXPECT_MARKER(); + marker = data[pos + 1]; + pos += 2; + bool ok = true; + switch (marker) { + case 0xc0: + case 0xc1: + case 0xc2: + is_progressive = (marker == 0xc2); + ok = ProcessSOF(data, len, mode, &pos, jpg); + found_sof = true; + break; + case 0xc4: + ok = ProcessDHT(data, len, mode, &dc_huff_lut, &ac_huff_lut, &pos, jpg); + break; + case 0xd0: + case 0xd1: + case 0xd2: + case 0xd3: + case 0xd4: + case 0xd5: + case 0xd6: + case 0xd7: + // RST markers do not have any data. + break; + case 0xd9: + // Found end marker. + break; + case 0xda: + if (mode == JpegReadMode::kReadAll) { + ok = ProcessScan(data, len, dc_huff_lut, ac_huff_lut, + scan_progression, is_progressive, &pos, jpg); + } + break; + case 0xdb: + ok = ProcessDQT(data, len, &pos, jpg); + break; + case 0xdd: + ok = ProcessDRI(data, len, &pos, &found_dri, jpg); + break; + case 0xe0: + case 0xe1: + case 0xe2: + case 0xe3: + case 0xe4: + case 0xe5: + case 0xe6: + case 0xe7: + case 0xe8: + case 0xe9: + case 0xea: + case 0xeb: + case 0xec: + case 0xed: + case 0xee: + case 0xef: + if (mode != JpegReadMode::kReadTables) { + ok = ProcessAPP(data, len, &pos, jpg); + } + break; + case 0xfe: + if (mode != JpegReadMode::kReadTables) { + ok = ProcessCOM(data, len, &pos, jpg); + } + break; + default: + return JXL_FAILURE("Unsupported marker: %d pos=%" PRIuS " len=%" PRIuS, + marker, pos, len); + } + if (!ok) { + return false; + } + jpg->marker_order.push_back(marker); + if (mode == JpegReadMode::kReadHeader && found_sof) { + break; + } + } while (marker != 0xd9); + + if (!found_sof) { + return JXL_FAILURE("Missing SOF marker."); + } + + // Supplemental checks. + if (mode == JpegReadMode::kReadAll) { + if (pos < len) { + jpg->tail_data = std::vector(data + pos, data + len); + } + if (!FixupIndexes(jpg)) { + return false; + } + if (jpg->huffman_code.empty()) { + // Section B.2.4.2: "If a table has never been defined for a particular + // destination, then when this destination is specified in a scan header, + // the results are unpredictable." + return JXL_FAILURE("Need at least one Huffman code table."); + } + if (jpg->huffman_code.size() >= kMaxDHTMarkers) { + return JXL_FAILURE("Too many Huffman tables."); + } + } + return true; +} + +} // namespace jpeg +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.h new file mode 100644 index 0000000000..3fad820e9d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_data_reader.h @@ -0,0 +1,36 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Functions for reading a jpeg byte stream into a JPEGData object. + +#ifndef LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_ +#define LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_ + +#include +#include + +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { + +enum class JpegReadMode { + kReadHeader, // only basic headers + kReadTables, // headers and tables (quant, Huffman, ...) + kReadAll, // everything +}; + +// Parses the JPEG stream contained in data[*pos ... len) and fills in *jpg with +// the parsed information. +// If mode is kReadHeader, it fills in only the image dimensions in *jpg. +// Returns false if the data is not valid JPEG, or if it contains an unsupported +// JPEG feature. +bool ReadJpeg(const uint8_t* data, const size_t len, JpegReadMode mode, + JPEGData* jpg); + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_ENC_JPEG_DATA_READER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc new file mode 100644 index 0000000000..38282e640a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.cc @@ -0,0 +1,103 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/jpeg/enc_jpeg_huffman_decode.h" + +#include "lib/jxl/jpeg/jpeg_data.h" + +namespace jxl { +namespace jpeg { + +// Returns the table width of the next 2nd level table, count is the histogram +// of bit lengths for the remaining symbols, len is the code length of the next +// processed symbol. +static inline int NextTableBitSize(const int* count, int len) { + int left = 1 << (len - kJpegHuffmanRootTableBits); + while (len < static_cast(kJpegHuffmanMaxBitLength)) { + left -= count[len]; + if (left <= 0) break; + ++len; + left <<= 1; + } + return len - kJpegHuffmanRootTableBits; +} + +void BuildJpegHuffmanTable(const uint32_t* count, const uint32_t* symbols, + HuffmanTableEntry* lut) { + HuffmanTableEntry code; // current table entry + HuffmanTableEntry* table; // next available space in table + int len; // current code length + int idx; // symbol index + int key; // prefix code + int reps; // number of replicate key values in current table + int low; // low bits for current root entry + int table_bits; // key length of current table + int table_size; // size of current table + + // Make a local copy of the input bit length histogram. + int tmp_count[kJpegHuffmanMaxBitLength + 1] = {0}; + int total_count = 0; + for (len = 1; len <= static_cast(kJpegHuffmanMaxBitLength); ++len) { + tmp_count[len] = count[len]; + total_count += tmp_count[len]; + } + + table = lut; + table_bits = kJpegHuffmanRootTableBits; + table_size = 1 << table_bits; + + // Special case code with only one value. + if (total_count == 1) { + code.bits = 0; + code.value = symbols[0]; + for (key = 0; key < table_size; ++key) { + table[key] = code; + } + return; + } + + // Fill in root table. + key = 0; + idx = 0; + for (len = 1; len <= kJpegHuffmanRootTableBits; ++len) { + for (; tmp_count[len] > 0; --tmp_count[len]) { + code.bits = len; + code.value = symbols[idx++]; + reps = 1 << (kJpegHuffmanRootTableBits - len); + while (reps--) { + table[key++] = code; + } + } + } + + // Fill in 2nd level tables and add pointers to root table. + table += table_size; + table_size = 0; + low = 0; + for (len = kJpegHuffmanRootTableBits + 1; + len <= static_cast(kJpegHuffmanMaxBitLength); ++len) { + for (; tmp_count[len] > 0; --tmp_count[len]) { + // Start a new sub-table if the previous one is full. + if (low >= table_size) { + table += table_size; + table_bits = NextTableBitSize(tmp_count, len); + table_size = 1 << table_bits; + low = 0; + lut[key].bits = table_bits + kJpegHuffmanRootTableBits; + lut[key].value = (table - lut) - key; + ++key; + } + code.bits = len - kJpegHuffmanRootTableBits; + code.value = symbols[idx++]; + reps = 1 << (table_bits - code.bits); + while (reps--) { + table[low++] = code; + } + } + } +} + +} // namespace jpeg +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h new file mode 100644 index 0000000000..b8a60e4107 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/enc_jpeg_huffman_decode.h @@ -0,0 +1,41 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Utility function for building a Huffman lookup table for the jpeg decoder. + +#ifndef LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_ +#define LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_ + +#include + +namespace jxl { +namespace jpeg { + +constexpr int kJpegHuffmanRootTableBits = 8; +// Maximum huffman lookup table size. +// According to zlib/examples/enough.c, 758 entries are always enough for +// an alphabet of 257 symbols (256 + 1 special symbol for the all 1s code) and +// max bit length 16 if the root table has 8 bits. +constexpr int kJpegHuffmanLutSize = 758; + +struct HuffmanTableEntry { + // Initialize the value to an invalid symbol so that we can recognize it + // when reading the bit stream using a Huffman code with space > 0. + HuffmanTableEntry() : bits(0), value(0xffff) {} + + uint8_t bits; // number of bits used for this symbol + uint16_t value; // symbol value or table offset +}; + +// Builds jpeg-style Huffman lookup table from the given symbols. +// The symbols are in order of increasing bit lengths. The number of symbols +// with bit length n is given in counts[n] for each n >= 1. +void BuildJpegHuffmanTable(const uint32_t* counts, const uint32_t* symbols, + HuffmanTableEntry* lut); + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_ENC_JPEG_HUFFMAN_DECODE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.cc b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.cc new file mode 100644 index 0000000000..9b4c778c9c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.cc @@ -0,0 +1,478 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/jpeg/jpeg_data.h" + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" + +namespace jxl { +namespace jpeg { + +#if JPEGXL_ENABLE_TRANSCODE_JPEG + +namespace { +enum JPEGComponentType : uint32_t { + kGray = 0, + kYCbCr = 1, + kRGB = 2, + kCustom = 3, +}; + +struct JPEGInfo { + size_t num_app_markers = 0; + size_t num_com_markers = 0; + size_t num_scans = 0; + size_t num_intermarker = 0; + bool has_dri = false; +}; + +Status VisitMarker(uint8_t* marker, Visitor* visitor, JPEGInfo* info) { + uint32_t marker32 = *marker - 0xc0; + JXL_RETURN_IF_ERROR(visitor->Bits(6, 0x00, &marker32)); + *marker = marker32 + 0xc0; + if ((*marker & 0xf0) == 0xe0) { + info->num_app_markers++; + } + if (*marker == 0xfe) { + info->num_com_markers++; + } + if (*marker == 0xda) { + info->num_scans++; + } + // We use a fake 0xff marker to signal intermarker data. + if (*marker == 0xff) { + info->num_intermarker++; + } + if (*marker == 0xdd) { + info->has_dri = true; + } + return true; +} + +} // namespace + +Status JPEGData::VisitFields(Visitor* visitor) { + bool is_gray = components.size() == 1; + JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_gray)); + if (visitor->IsReading()) { + components.resize(is_gray ? 1 : 3); + } + JPEGInfo info; + if (visitor->IsReading()) { + uint8_t marker = 0xc0; + do { + JXL_RETURN_IF_ERROR(VisitMarker(&marker, visitor, &info)); + marker_order.push_back(marker); + if (marker_order.size() > 16384) { + return JXL_FAILURE("Too many markers: %" PRIuS "\n", + marker_order.size()); + } + } while (marker != 0xd9); + } else { + if (marker_order.size() > 16384) { + return JXL_FAILURE("Too many markers: %" PRIuS "\n", marker_order.size()); + } + for (size_t i = 0; i < marker_order.size(); i++) { + JXL_RETURN_IF_ERROR(VisitMarker(&marker_order[i], visitor, &info)); + } + if (!marker_order.empty()) { + // Last marker should always be EOI marker. + JXL_CHECK(marker_order.back() == 0xd9); + } + } + + // Size of the APP and COM markers. + if (visitor->IsReading()) { + app_data.resize(info.num_app_markers); + app_marker_type.resize(info.num_app_markers); + com_data.resize(info.num_com_markers); + scan_info.resize(info.num_scans); + } + JXL_ASSERT(app_data.size() == info.num_app_markers); + JXL_ASSERT(app_marker_type.size() == info.num_app_markers); + JXL_ASSERT(com_data.size() == info.num_com_markers); + JXL_ASSERT(scan_info.size() == info.num_scans); + for (size_t i = 0; i < app_data.size(); i++) { + auto& app = app_data[i]; + // Encodes up to 8 different values. + JXL_RETURN_IF_ERROR( + visitor->U32(Val(0), Val(1), BitsOffset(1, 2), BitsOffset(2, 4), 0, + reinterpret_cast(&app_marker_type[i]))); + if (app_marker_type[i] != AppMarkerType::kUnknown && + app_marker_type[i] != AppMarkerType::kICC && + app_marker_type[i] != AppMarkerType::kExif && + app_marker_type[i] != AppMarkerType::kXMP) { + return JXL_FAILURE("Unknown app marker type %u", + static_cast(app_marker_type[i])); + } + uint32_t len = app.size() - 1; + JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len)); + if (visitor->IsReading()) app.resize(len + 1); + if (app.size() < 3) { + return JXL_FAILURE("Invalid marker size: %" PRIuS "\n", app.size()); + } + } + for (auto& com : com_data) { + uint32_t len = com.size() - 1; + JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len)); + if (visitor->IsReading()) com.resize(len + 1); + if (com.size() < 3) { + return JXL_FAILURE("Invalid marker size: %" PRIuS "\n", com.size()); + } + } + + uint32_t num_quant_tables = quant.size(); + JXL_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), Val(4), 2, &num_quant_tables)); + if (num_quant_tables == 4) { + return JXL_FAILURE("Invalid number of quant tables"); + } + if (visitor->IsReading()) { + quant.resize(num_quant_tables); + } + for (size_t i = 0; i < num_quant_tables; i++) { + if (quant[i].precision > 1) { + return JXL_FAILURE( + "Quant tables with more than 16 bits are not supported"); + } + JXL_RETURN_IF_ERROR(visitor->Bits(1, 0, &quant[i].precision)); + JXL_RETURN_IF_ERROR(visitor->Bits(2, i, &quant[i].index)); + JXL_RETURN_IF_ERROR(visitor->Bool(true, &quant[i].is_last)); + } + + JPEGComponentType component_type = + components.size() == 1 && components[0].id == 1 ? JPEGComponentType::kGray + : components.size() == 3 && components[0].id == 1 && + components[1].id == 2 && components[2].id == 3 + ? JPEGComponentType::kYCbCr + : components.size() == 3 && components[0].id == 'R' && + components[1].id == 'G' && components[2].id == 'B' + ? JPEGComponentType::kRGB + : JPEGComponentType::kCustom; + JXL_RETURN_IF_ERROR( + visitor->Bits(2, JPEGComponentType::kYCbCr, + reinterpret_cast(&component_type))); + uint32_t num_components; + if (component_type == JPEGComponentType::kGray) { + num_components = 1; + } else if (component_type != JPEGComponentType::kCustom) { + num_components = 3; + } else { + num_components = components.size(); + JXL_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), Val(4), 3, &num_components)); + if (num_components != 1 && num_components != 3) { + return JXL_FAILURE("Invalid number of components: %u", num_components); + } + } + if (visitor->IsReading()) { + components.resize(num_components); + } + if (component_type == JPEGComponentType::kCustom) { + for (size_t i = 0; i < components.size(); i++) { + JXL_RETURN_IF_ERROR(visitor->Bits(8, 0, &components[i].id)); + } + } else if (component_type == JPEGComponentType::kGray) { + components[0].id = 1; + } else if (component_type == JPEGComponentType::kRGB) { + components[0].id = 'R'; + components[1].id = 'G'; + components[2].id = 'B'; + } else { + components[0].id = 1; + components[1].id = 2; + components[2].id = 3; + } + size_t used_tables = 0; + for (size_t i = 0; i < components.size(); i++) { + JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &components[i].quant_idx)); + if (components[i].quant_idx >= quant.size()) { + return JXL_FAILURE("Invalid quant table for component %" PRIuS ": %u\n", + i, components[i].quant_idx); + } + used_tables |= 1U << components[i].quant_idx; + } + for (size_t i = 0; i < quant.size(); i++) { + if (used_tables & (1 << i)) continue; + if (i == 0) return JXL_FAILURE("First quant table unused."); + // Unused quant table has to be set to copy of previous quant table + for (size_t j = 0; j < 64; j++) { + if (quant[i].values[j] != quant[i - 1].values[j]) { + return JXL_FAILURE("Non-trivial unused quant table"); + } + } + } + + uint32_t num_huff = huffman_code.size(); + JXL_RETURN_IF_ERROR(visitor->U32(Val(4), BitsOffset(3, 2), BitsOffset(4, 10), + BitsOffset(6, 26), 4, &num_huff)); + if (visitor->IsReading()) { + huffman_code.resize(num_huff); + } + for (JPEGHuffmanCode& hc : huffman_code) { + bool is_ac = hc.slot_id >> 4; + uint32_t id = hc.slot_id & 0xF; + JXL_RETURN_IF_ERROR(visitor->Bool(false, &is_ac)); + JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &id)); + hc.slot_id = (static_cast(is_ac) << 4) | id; + JXL_RETURN_IF_ERROR(visitor->Bool(true, &hc.is_last)); + size_t num_symbols = 0; + for (size_t i = 0; i <= 16; i++) { + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(3, 2), + Bits(8), 0, &hc.counts[i])); + num_symbols += hc.counts[i]; + } + if (num_symbols < 1) { + // Actually, at least 2 symbols are required, since one of them is EOI. + return JXL_FAILURE("Empty Huffman table"); + } + if (num_symbols > hc.values.size()) { + return JXL_FAILURE("Huffman code too large (%" PRIuS ")", num_symbols); + } + // Presence flags for 4 * 64 + 1 values. + uint64_t value_slots[5] = {}; + for (size_t i = 0; i < num_symbols; i++) { + // Goes up to 256, included. Might have the same symbol appear twice... + JXL_RETURN_IF_ERROR(visitor->U32(Bits(2), BitsOffset(2, 4), + BitsOffset(4, 8), BitsOffset(8, 1), 0, + &hc.values[i])); + value_slots[hc.values[i] >> 6] |= (uint64_t)1 << (hc.values[i] & 0x3F); + } + if (hc.values[num_symbols - 1] != kJpegHuffmanAlphabetSize) { + return JXL_FAILURE("Missing EOI symbol"); + } + // Last element, denoting EOI, have to be 1 after the loop. + JXL_ASSERT(value_slots[4] == 1); + size_t num_values = 1; + for (size_t i = 0; i < 4; ++i) num_values += hwy::PopCount(value_slots[i]); + if (num_values != num_symbols) { + return JXL_FAILURE("Duplicate Huffman symbols"); + } + if (!is_ac) { + bool only_dc = ((value_slots[0] >> kJpegDCAlphabetSize) | value_slots[1] | + value_slots[2] | value_slots[3]) == 0; + if (!only_dc) return JXL_FAILURE("Huffman symbols out of DC range"); + } + } + + for (auto& scan : scan_info) { + JXL_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), Val(4), 1, &scan.num_components)); + if (scan.num_components >= 4) { + return JXL_FAILURE("Invalid number of components in SOS marker"); + } + JXL_RETURN_IF_ERROR(visitor->Bits(6, 0, &scan.Ss)); + JXL_RETURN_IF_ERROR(visitor->Bits(6, 63, &scan.Se)); + JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Al)); + JXL_RETURN_IF_ERROR(visitor->Bits(4, 0, &scan.Ah)); + for (size_t i = 0; i < scan.num_components; i++) { + JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].comp_idx)); + if (scan.components[i].comp_idx >= components.size()) { + return JXL_FAILURE("Invalid component idx in SOS marker"); + } + JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].ac_tbl_idx)); + JXL_RETURN_IF_ERROR(visitor->Bits(2, 0, &scan.components[i].dc_tbl_idx)); + } + // TODO(veluca): actually set and use this value. + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), Val(2), BitsOffset(3, 3), + kMaxNumPasses - 1, + &scan.last_needed_pass)); + } + + // From here on, this is data that is not strictly necessary to get a valid + // JPEG, but necessary for bit-exact JPEG reconstruction. + if (info.has_dri) { + JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &restart_interval)); + } + + for (auto& scan : scan_info) { + uint32_t num_reset_points = scan.reset_points.size(); + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4), + BitsOffset(16, 20), 0, &num_reset_points)); + if (visitor->IsReading()) { + scan.reset_points.resize(num_reset_points); + } + int last_block_idx = -1; + for (auto& block_idx : scan.reset_points) { + block_idx -= last_block_idx + 1; + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1), + BitsOffset(5, 9), BitsOffset(28, 41), 0, + &block_idx)); + block_idx += last_block_idx + 1; + if (block_idx >= (3u << 26)) { + // At most 8K x 8K x num_channels blocks are possible in a JPEG. + // So valid block indices are below 3 * 2^26. + return JXL_FAILURE("Invalid block ID: %u", block_idx); + } + last_block_idx = block_idx; + } + + uint32_t num_extra_zero_runs = scan.extra_zero_runs.size(); + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(2, 1), BitsOffset(4, 4), + BitsOffset(16, 20), 0, + &num_extra_zero_runs)); + if (visitor->IsReading()) { + scan.extra_zero_runs.resize(num_extra_zero_runs); + } + last_block_idx = -1; + for (size_t i = 0; i < scan.extra_zero_runs.size(); ++i) { + uint32_t& block_idx = scan.extra_zero_runs[i].block_idx; + JXL_RETURN_IF_ERROR(visitor->U32( + Val(1), BitsOffset(2, 2), BitsOffset(4, 5), BitsOffset(8, 20), 1, + &scan.extra_zero_runs[i].num_extra_zero_runs)); + block_idx -= last_block_idx + 1; + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(3, 1), + BitsOffset(5, 9), BitsOffset(28, 41), 0, + &block_idx)); + block_idx += last_block_idx + 1; + if (block_idx > (3u << 26)) { + return JXL_FAILURE("Invalid block ID: %u", block_idx); + } + last_block_idx = block_idx; + } + } + std::vector inter_marker_data_sizes; + inter_marker_data_sizes.reserve(info.num_intermarker); + for (size_t i = 0; i < info.num_intermarker; ++i) { + uint32_t len = visitor->IsReading() ? 0 : inter_marker_data[i].size(); + JXL_RETURN_IF_ERROR(visitor->Bits(16, 0, &len)); + if (visitor->IsReading()) inter_marker_data_sizes.emplace_back(len); + } + uint32_t tail_data_len = tail_data.size(); + if (!visitor->IsReading() && tail_data_len > 4260096) { + return JXL_FAILURE("Tail data too large (max size = 4260096, size = %u)", + tail_data_len); + } + JXL_RETURN_IF_ERROR(visitor->U32(Val(0), BitsOffset(8, 1), + BitsOffset(16, 257), BitsOffset(22, 65793), + 0, &tail_data_len)); + + JXL_RETURN_IF_ERROR(visitor->Bool(false, &has_zero_padding_bit)); + if (has_zero_padding_bit) { + uint32_t nbit = padding_bits.size(); + JXL_RETURN_IF_ERROR(visitor->Bits(24, 0, &nbit)); + if (visitor->IsReading()) { + JXL_RETURN_IF_ERROR(CheckHasEnoughBits(visitor, nbit)); + padding_bits.reserve(std::min(1024u, nbit)); + for (uint32_t i = 0; i < nbit; i++) { + bool bbit = false; + JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit)); + padding_bits.push_back(bbit); + } + } else { + for (uint8_t& bit : padding_bits) { + bool bbit = bit; + JXL_RETURN_IF_ERROR(visitor->Bool(false, &bbit)); + bit = bbit; + } + } + } + + { + size_t dht_index = 0; + size_t scan_index = 0; + bool is_progressive = false; + bool ac_ok[kMaxHuffmanTables] = {false}; + bool dc_ok[kMaxHuffmanTables] = {false}; + for (uint8_t marker : marker_order) { + if (marker == 0xC2) { + is_progressive = true; + } else if (marker == 0xC4) { + for (; dht_index < huffman_code.size();) { + const JPEGHuffmanCode& huff = huffman_code[dht_index++]; + size_t index = huff.slot_id; + if (index & 0x10) { + index -= 0x10; + ac_ok[index] = true; + } else { + dc_ok[index] = true; + } + if (huff.is_last) break; + } + } else if (marker == 0xDA) { + const JPEGScanInfo& si = scan_info[scan_index++]; + for (size_t i = 0; i < si.num_components; ++i) { + const JPEGComponentScanInfo& csi = si.components[i]; + size_t dc_tbl_idx = csi.dc_tbl_idx; + size_t ac_tbl_idx = csi.ac_tbl_idx; + bool want_dc = !is_progressive || (si.Ss == 0); + if (want_dc && !dc_ok[dc_tbl_idx]) { + return JXL_FAILURE("DC Huffman table used before defined"); + } + bool want_ac = !is_progressive || (si.Ss != 0) || (si.Se != 0); + if (want_ac && !ac_ok[ac_tbl_idx]) { + return JXL_FAILURE("AC Huffman table used before defined"); + } + } + } + } + } + + // Apply postponed actions. + if (visitor->IsReading()) { + tail_data.resize(tail_data_len); + JXL_ASSERT(inter_marker_data_sizes.size() == info.num_intermarker); + inter_marker_data.reserve(info.num_intermarker); + for (size_t i = 0; i < info.num_intermarker; ++i) { + inter_marker_data.emplace_back(inter_marker_data_sizes[i]); + } + } + + return true; +} + +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +void JPEGData::CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row, + int* MCU_rows) const { + const bool is_interleaved = (scan.num_components > 1); + const JPEGComponent& base_component = components[scan.components[0].comp_idx]; + // h_group / v_group act as numerators for converting number of blocks to + // number of MCU. In interleaved mode it is 1, so MCU is represented with + // max_*_samp_factor blocks. In non-interleaved mode we choose numerator to + // be the samping factor, consequently MCU is always represented with single + // block. + const int h_group = is_interleaved ? 1 : base_component.h_samp_factor; + const int v_group = is_interleaved ? 1 : base_component.v_samp_factor; + int max_h_samp_factor = 1; + int max_v_samp_factor = 1; + for (const auto& c : components) { + max_h_samp_factor = std::max(c.h_samp_factor, max_h_samp_factor); + max_v_samp_factor = std::max(c.v_samp_factor, max_v_samp_factor); + } + *MCUs_per_row = DivCeil(width * h_group, 8 * max_h_samp_factor); + *MCU_rows = DivCeil(height * v_group, 8 * max_v_samp_factor); +} + +#if JPEGXL_ENABLE_TRANSCODE_JPEG + +Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data) { + size_t icc_pos = 0; + for (size_t i = 0; i < jpeg_data->app_data.size(); i++) { + if (jpeg_data->app_marker_type[i] != jpeg::AppMarkerType::kICC) { + continue; + } + size_t len = jpeg_data->app_data[i].size() - 17; + if (icc_pos + len > icc.size()) { + return JXL_FAILURE( + "ICC length is less than APP markers: requested %" PRIuS + " more bytes, " + "%" PRIuS " available", + len, icc.size() - icc_pos); + } + memcpy(&jpeg_data->app_data[i][17], icc.data() + icc_pos, len); + icc_pos += len; + } + if (icc_pos != icc.size() && icc_pos != 0) { + return JXL_FAILURE("ICC length is more than APP markers"); + } + return true; +} + +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +} // namespace jpeg +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.h b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.h new file mode 100644 index 0000000000..a9c86ac139 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jpeg/jpeg_data.h @@ -0,0 +1,216 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Data structures that represent the non-pixel contents of a jpeg file. + +#ifndef LIB_JXL_JPEG_JPEG_DATA_H_ +#define LIB_JXL_JPEG_JPEG_DATA_H_ + +#include +#include + +#include +#include + +#include "lib/jxl/common.h" // JPEGXL_ENABLE_TRANSCODE_JPEG +#include "lib/jxl/fields.h" + +namespace jxl { +namespace jpeg { + +constexpr int kMaxComponents = 4; +constexpr int kMaxQuantTables = 4; +constexpr int kMaxHuffmanTables = 4; +constexpr size_t kJpegHuffmanMaxBitLength = 16; +constexpr int kJpegHuffmanAlphabetSize = 256; +constexpr int kJpegDCAlphabetSize = 12; +constexpr int kMaxDHTMarkers = 512; +constexpr int kMaxDimPixels = 65535; +constexpr uint8_t kApp1 = 0xE1; +constexpr uint8_t kApp2 = 0xE2; +const uint8_t kIccProfileTag[12] = "ICC_PROFILE"; +const uint8_t kExifTag[6] = "Exif\0"; +const uint8_t kXMPTag[29] = "http://ns.adobe.com/xap/1.0/"; + +/* clang-format off */ +constexpr uint32_t kJPEGNaturalOrder[80] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // extra entries for safety in decoder + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63, 63 +}; + +constexpr uint32_t kJPEGZigZagOrder[64] = { + 0, 1, 5, 6, 14, 15, 27, 28, + 2, 4, 7, 13, 16, 26, 29, 42, + 3, 8, 12, 17, 25, 30, 41, 43, + 9, 11, 18, 24, 31, 40, 44, 53, + 10, 19, 23, 32, 39, 45, 52, 54, + 20, 22, 33, 38, 46, 51, 55, 60, + 21, 34, 37, 47, 50, 56, 59, 61, + 35, 36, 48, 49, 57, 58, 62, 63 +}; +/* clang-format on */ + +// Quantization values for an 8x8 pixel block. +struct JPEGQuantTable { + std::array values; + uint32_t precision = 0; + // The index of this quantization table as it was parsed from the input JPEG. + // Each DQT marker segment contains an 'index' field, and we save this index + // here. Valid values are 0 to 3. + uint32_t index = 0; + // Set to true if this table is the last one within its marker segment. + bool is_last = true; +}; + +// Huffman code and decoding lookup table used for DC and AC coefficients. +struct JPEGHuffmanCode { + // Bit length histogram. + std::array counts = {}; + // Symbol values sorted by increasing bit lengths. + std::array values = {}; + // The index of the Huffman code in the current set of Huffman codes. For AC + // component Huffman codes, 0x10 is added to the index. + int slot_id = 0; + // Set to true if this Huffman code is the last one within its marker segment. + bool is_last = true; +}; + +// Huffman table indexes used for one component of one scan. +struct JPEGComponentScanInfo { + uint32_t comp_idx; + uint32_t dc_tbl_idx; + uint32_t ac_tbl_idx; +}; + +// Contains information that is used in one scan. +struct JPEGScanInfo { + // Parameters used for progressive scans (named the same way as in the spec): + // Ss : Start of spectral band in zig-zag sequence. + // Se : End of spectral band in zig-zag sequence. + // Ah : Successive approximation bit position, high. + // Al : Successive approximation bit position, low. + uint32_t Ss; + uint32_t Se; + uint32_t Ah; + uint32_t Al; + uint32_t num_components = 0; + std::array components; + // Last codestream pass that is needed to write this scan. + uint32_t last_needed_pass = 0; + + // Extra information required for bit-precise JPEG file reconstruction. + + // Set of block indexes where the JPEG encoder has to flush the end-of-block + // runs and refinement bits. + std::vector reset_points; + // The number of extra zero runs (Huffman symbol 0xf0) before the end of + // block (if nonzero), indexed by block index. + // All of these symbols can be omitted without changing the pixel values, but + // some jpeg encoders put these at the end of blocks. + typedef struct { + uint32_t block_idx; + uint32_t num_extra_zero_runs; + } ExtraZeroRunInfo; + std::vector extra_zero_runs; +}; + +typedef int16_t coeff_t; + +// Represents one component of a jpeg file. +struct JPEGComponent { + JPEGComponent() + : id(0), + h_samp_factor(1), + v_samp_factor(1), + quant_idx(0), + width_in_blocks(0), + height_in_blocks(0) {} + + // One-byte id of the component. + uint32_t id; + // Horizontal and vertical sampling factors. + // In interleaved mode, each minimal coded unit (MCU) has + // h_samp_factor x v_samp_factor DCT blocks from this component. + int h_samp_factor; + int v_samp_factor; + // The index of the quantization table used for this component. + uint32_t quant_idx; + // The dimensions of the component measured in 8x8 blocks. + uint32_t width_in_blocks; + uint32_t height_in_blocks; + // The DCT coefficients of this component, laid out block-by-block, divided + // through the quantization matrix values. + std::vector coeffs; +}; + +enum class AppMarkerType : uint32_t { + kUnknown = 0, + kICC = 1, + kExif = 2, + kXMP = 3, +}; + +// Represents a parsed jpeg file. +struct JPEGData : public Fields { + JPEGData() + : width(0), height(0), restart_interval(0), has_zero_padding_bit(false) {} + + JXL_FIELDS_NAME(JPEGData) +#if JPEGXL_ENABLE_TRANSCODE_JPEG + // Doesn't serialize everything - skips brotli-encoded data and what is + // already encoded in the codestream. + Status VisitFields(Visitor* visitor) override; +#else + Status VisitFields(Visitor* /* visitor */) override { + JXL_UNREACHABLE("JPEG transcoding support not enabled"); + } +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + + void CalculateMcuSize(const JPEGScanInfo& scan, int* MCUs_per_row, + int* MCU_rows) const; + + int width; + int height; + uint32_t restart_interval; + std::vector> app_data; + std::vector app_marker_type; + std::vector> com_data; + std::vector quant; + std::vector huffman_code; + std::vector components; + std::vector scan_info; + std::vector marker_order; + std::vector> inter_marker_data; + std::vector tail_data; + + // Extra information required for bit-precise JPEG file reconstruction. + + bool has_zero_padding_bit; + std::vector padding_bits; +}; + +#if JPEGXL_ENABLE_TRANSCODE_JPEG +// Set ICC profile in jpeg_data. +Status SetJPEGDataFromICC(const PaddedBytes& icc, jpeg::JPEGData* jpeg_data); +#else +static JXL_INLINE Status SetJPEGDataFromICC(const PaddedBytes& /* icc */, + jpeg::JPEGData* /* jpeg_data */) { + JXL_UNREACHABLE("JPEG transcoding support not enabled"); +} +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +} // namespace jpeg +} // namespace jxl + +#endif // LIB_JXL_JPEG_JPEG_DATA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl.syms b/third-party/libjxl/libjxl/lib/jxl/jxl.syms new file mode 100644 index 0000000000..0f398d7151 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jxl.syms @@ -0,0 +1,5 @@ +{ + extern "C" { + jpegxl_*; + }; +}; diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl.version b/third-party/libjxl/libjxl/lib/jxl/jxl.version new file mode 100644 index 0000000000..26b0e9e54d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jxl.version @@ -0,0 +1,17 @@ +JXL_0 { + global: + Jxl*; + + local: + # Hide all the std namespace symbols. std namespace is explicitly marked + # as visibility(default) and header-only functions or methods (such as those + # from templates) should be exposed in shared libraries as weak symbols but + # this is only needed when we expose those types in the shared library API + # in any way. We don't use C++ std types in the API and we also don't + # support exceptions in the library. + # See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36022 for a discussion + # about this. + extern "C++" { + *std::*; + }; +}; diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl_osx.syms b/third-party/libjxl/libjxl/lib/jxl/jxl_osx.syms new file mode 100644 index 0000000000..96bc568025 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jxl_osx.syms @@ -0,0 +1 @@ +_Jxl* diff --git a/third-party/libjxl/libjxl/lib/jxl/jxl_test.cc b/third-party/libjxl/libjxl/lib/jxl/jxl_test.cc new file mode 100644 index 0000000000..c43942a939 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/jxl_test.cc @@ -0,0 +1,1569 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/dec/jxl.h" + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/extras/dec/decode.h" +#include "lib/extras/enc/encode.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/alpha.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/fake_parallel_runner_testonly.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/jpeg/dec_jpeg_data.h" +#include "lib/jxl/jpeg/dec_jpeg_data_writer.h" +#include "lib/jxl/jpeg/enc_jpeg_data.h" +#include "lib/jxl/jpeg/jpeg_data.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/test_image.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" +#include "tools/box/box.h" + +namespace jxl { + +struct AuxOut; + +namespace { +using extras::JXLCompressParams; +using extras::JXLDecompressParams; +using extras::PackedPixelFile; +using test::ButteraugliDistance; +using test::ComputeDistance2; +using test::Roundtrip; +using test::TestImage; +using test::ThreadPoolForTests; + +#define JXL_TEST_NL 0 // Disabled in code + +TEST(JxlTest, RoundtripSinglePixel) { + TestImage t; + t.SetDimensions(1, 1).AddFrame().ZeroFill(); + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), {}, {}, nullptr, &ppf_out), 55); +} + +TEST(JxlTest, RoundtripSinglePixelWithAlpha) { + TestImage t; + t.SetDimensions(1, 1).SetChannels(4).AddFrame().ZeroFill(); + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), {}, {}, nullptr, &ppf_out), 59); +} + +// Changing serialized signature causes Decode to fail. +#ifndef JXL_CRASH_ON_ERROR +TEST(JxlTest, RoundtripMarker) { + TestImage t; + t.SetDimensions(1, 1).AddFrame().ZeroFill(); + for (size_t i = 0; i < 2; ++i) { + std::vector compressed; + EXPECT_TRUE(extras::EncodeImageJXL({}, t.ppf(), /*jpeg_bytes=*/nullptr, + &compressed)); + compressed[i] ^= 0xFF; + PackedPixelFile ppf_out; + EXPECT_FALSE(extras::DecodeImageJXL(compressed.data(), compressed.size(), + {}, /*decodec_bytes=*/nullptr, + &ppf_out)); + } +} +#endif + +TEST(JxlTest, RoundtripTinyFast) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(32, 32); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); + cparams.distance = 4.0f; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 181, 15); +} + +TEST(JxlTest, RoundtripSmallD1) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + size_t xsize = t.ppf().info.xsize / 8; + size_t ysize = t.ppf().info.ysize / 8; + t.SetDimensions(xsize, ysize); + + { + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 816, 40); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.888)); + } + + // With a lower intensity target than the default, the bitrate should be + // smaller. + t.ppf().info.intensity_target = 100.0f; + + { + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 659, 20); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.3)); + EXPECT_EQ(ppf_out.info.intensity_target, t.ppf().info.intensity_target); + } +} +TEST(JxlTest, RoundtripResample2) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 18500, 200); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(90)); +} + +TEST(JxlTest, RoundtripResample2Slow) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9); // kTortoise + cparams.distance = 10.0; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 3888, 200); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(250)); +} + +TEST(JxlTest, RoundtripResample2MT) { + ThreadPoolForTests pool(4); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + // image has to be large enough to have multiple groups after downsampling + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 223310, 2000); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(340)); +} + +// Roundtrip the image using a parallel runner that executes single-threaded but +// in random order. +TEST(JxlTest, RoundtripOutOfOrderProcessing) { + FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8); + ThreadPool pool(&JxlFakeParallelRunner, &fake_pool); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + // Image size is selected so that the block border needed is larger than the + // amount of pixels available on the next block. + t.SetDimensions(513, 515); + + JXLCompressParams cparams; + // Force epf so we end up needing a lot of border. + cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 3); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 22999, 400); + EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 1.35); +} + +TEST(JxlTest, RoundtripOutOfOrderProcessingBorder) { + FakeParallelRunner fake_pool(/*order_seed=*/47, /*num_threads=*/8); + ThreadPool pool(&JxlFakeParallelRunner, &fake_pool); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + // Image size is selected so that the block border needed is larger than the + // amount of pixels available on the next block. + t.SetDimensions(513, 515); + + JXLCompressParams cparams; + // Force epf so we end up needing a lot of border. + cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 3); + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 11015, 200); + EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 2.9); +} + +TEST(JxlTest, RoundtripResample4) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 4); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 5758, 100); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(22)); +} + +TEST(JxlTest, RoundtripResample8) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 8); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 2036, 50); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(50)); +} + +TEST(JxlTest, RoundtripUnalignedD2) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + size_t xsize = t.ppf().info.xsize / 12; + size_t ysize = t.ppf().info.ysize / 7; + t.SetDimensions(xsize, ysize); + + JXLCompressParams cparams; + cparams.distance = 2.0; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 506, 30); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.72)); +} + +TEST(JxlTest, RoundtripMultiGroup) { + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024); + + auto test = [&](jxl::SpeedTier speed_tier, float target_distance, + size_t expected_size, float expected_distance) { + ThreadPoolForTests pool(4); + JXLCompressParams cparams; + int64_t effort = 10 - static_cast(speed_tier); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, effort); + cparams.distance = target_distance; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), expected_size, + 700); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), + IsSlightlyBelow(expected_distance)); + }; + + auto run_kitten = std::async(std::launch::async, test, SpeedTier::kKitten, + 1.0f, 55602u, 11.7); + auto run_wombat = std::async(std::launch::async, test, SpeedTier::kWombat, + 2.0f, 33624u, 20.0); +} + +TEST(JxlTest, RoundtripRGBToGrayscale) { + ThreadPoolForTests pool(4); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + io.ShrinkTo(600, 1024); + + CompressParams cparams; + cparams.butteraugli_distance = 1.0f; + cparams.speed_tier = SpeedTier::kFalcon; + + JXLDecompressParams dparams; + dparams.color_space = "Gra_D65_Rel_SRG"; + + CodecInOut io2; + EXPECT_FALSE(io.Main().IsGray()); + size_t compressed_size; + JXL_EXPECT_OK( + Roundtrip(&io, cparams, dparams, &io2, _, &compressed_size, &pool)); + EXPECT_LE(compressed_size, 65000u); + EXPECT_TRUE(io2.Main().IsGray()); + + // Convert original to grayscale here, because TransformTo refuses to + // convert between grayscale and RGB. + ColorEncoding srgb_lin = ColorEncoding::LinearSRGB(/*is_gray=*/false); + ASSERT_TRUE(io.frames[0].TransformTo(srgb_lin, GetJxlCms())); + Image3F* color = io.Main().color(); + for (size_t y = 0; y < color->ysize(); ++y) { + float* row_r = color->PlaneRow(0, y); + float* row_g = color->PlaneRow(1, y); + float* row_b = color->PlaneRow(2, y); + for (size_t x = 0; x < color->xsize(); ++x) { + float luma = 0.2126 * row_r[x] + 0.7152 * row_g[x] + 0.0722 * row_b[x]; + row_r[x] = row_g[x] = row_b[x] = luma; + } + } + ColorEncoding srgb_gamma = ColorEncoding::SRGB(/*is_gray=*/false); + ASSERT_TRUE(io.frames[0].TransformTo(srgb_gamma, GetJxlCms())); + io.metadata.m.color_encoding = io2.Main().c_current(); + io.Main().OverrideProfile(io2.Main().c_current()); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr, &pool), + IsSlightlyBelow(1.36)); +} + +TEST(JxlTest, RoundtripLargeFast) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 445555, 5000); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(100)); +} + +TEST(JxlTest, RoundtripDotsForceEpf) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel + cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 2); + cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 41777, 300); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(18)); +} + +// Checks for differing size/distance in two consecutive runs of distance 2, +// which involves additional processing including adaptive reconstruction. +// Failing this may be a sign of race conditions or invalid memory accesses. +TEST(JxlTest, RoundtripD2Consistent) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel + cparams.distance = 2.0; + + // Try each xsize mod kBlockDim to verify right border handling. + for (size_t xsize = 48; xsize > 40; --xsize) { + t.SetDimensions(xsize, 15); + + PackedPixelFile ppf2; + const size_t size2 = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf2); + + PackedPixelFile ppf3; + const size_t size3 = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf3); + + // Exact same compressed size. + EXPECT_EQ(size2, size3); + + // Exact same distance. + const float dist2 = ComputeDistance2(t.ppf(), ppf2); + const float dist3 = ComputeDistance2(t.ppf(), ppf3); + EXPECT_EQ(dist2, dist3); + } +} + +// Same as above, but for full image, testing multiple groups. +TEST(JxlTest, RoundtripLargeConsistent) { + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel + cparams.distance = 2.0; + + auto roundtrip_and_compare = [&]() { + ThreadPoolForTests pool(8); + PackedPixelFile ppf2; + size_t size = Roundtrip(t.ppf(), cparams, {}, &pool, &ppf2); + double dist = ComputeDistance2(t.ppf(), ppf2); + return std::tuple(size, dist); + }; + + // Try each xsize mod kBlockDim to verify right border handling. + auto future2 = std::async(std::launch::async, roundtrip_and_compare); + auto future3 = std::async(std::launch::async, roundtrip_and_compare); + + const auto result2 = future2.get(); + const auto result3 = future3.get(); + + // Exact same compressed size. + EXPECT_EQ(std::get<0>(result2), std::get<0>(result3)); + + // Exact same distance. + EXPECT_EQ(std::get<1>(result2), std::get<1>(result3)); +} + +TEST(JxlTest, RoundtripSmallNL) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + size_t xsize = t.ppf().info.xsize / 8; + size_t ysize = t.ppf().info.ysize / 8; + t.SetDimensions(xsize, ysize); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 801, 45); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1)); +} + +TEST(JxlTest, RoundtripNoGaborishNoAR) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EPF, 0); + cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 38900, 200); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.8)); +} + +TEST(JxlTest, RoundtripSmallNoGaborish) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + size_t xsize = t.ppf().info.xsize / 8; + size_t ysize = t.ppf().info.ysize / 8; + t.SetDimensions(xsize, ysize); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_GABORISH, 0); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 811, 20); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.1)); +} + +TEST(JxlTest, RoundtripSmallPatchesAlpha) { + ThreadPool* pool = nullptr; + TestImage t; + t.SetDimensions(256, 256).SetChannels(4); + t.SetColorEncoding("RGB_D65_SRG_Rel_Lin"); + TestImage::Frame frame = t.AddFrame(); + frame.ZeroFill(); + // This pattern should be picked up by the patch detection heuristics. + for (size_t y = 0; y < t.ppf().info.ysize; ++y) { + for (size_t x = 0; x < t.ppf().info.xsize; ++x) { + if (x % 4 == 0 && (y / 32) % 4 == 0) { + frame.SetValue(y, x, 1, 127.0f / 255.0f); + } + frame.SetValue(y, x, 3, 1.0f); + } + } + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel + cparams.distance = 0.1f; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 597, 100); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.018f)); +} + +TEST(JxlTest, RoundtripSmallPatches) { + ThreadPool* pool = nullptr; + TestImage t; + t.SetDimensions(256, 256); + t.SetColorEncoding("RGB_D65_SRG_Rel_Lin"); + TestImage::Frame frame = t.AddFrame(); + frame.ZeroFill(); + // This pattern should be picked up by the patch detection heuristics. + for (size_t y = 0; y < t.ppf().info.ysize; ++y) { + for (size_t x = 0; x < t.ppf().info.xsize; ++x) { + if (x % 4 == 0 && (y / 32) % 4 == 0) { + frame.SetValue(y, x, 1, 127.0f / 255.0f); + } + } + } + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSquirrel + cparams.distance = 0.1f; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 486, 100); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.018f)); +} + +// TODO(szabadka) Add encoder and decoder API functions that accept frame +// buffers in arbitrary unsigned and floating point formats, and then roundtrip +// test the lossless codepath to make sure the exact binary representations +// are preserved. +#if 0 +TEST(JxlTest, RoundtripImageBundleOriginalBits) { + // Image does not matter, only io.metadata.m and io2.metadata.m are tested. + Image3F image(1, 1); + ZeroFillImage(&image); + CodecInOut io; + io.metadata.m.color_encoding = ColorEncoding::LinearSRGB(); + io.SetFromImage(std::move(image), ColorEncoding::LinearSRGB()); + + CompressParams cparams; + + // Test unsigned integers from 1 to 32 bits + for (uint32_t bit_depth = 1; bit_depth <= 32; bit_depth++) { + if (bit_depth == 32) { + // TODO(lode): allow testing 32, however the code below ends up in + // enc_modular which does not support 32. We only want to test the header + // encoding though, so try without modular. + break; + } + + io.metadata.m.SetUintSamples(bit_depth); + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _)); + + EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample); + EXPECT_FALSE(io2.metadata.m.bit_depth.floating_point_sample); + EXPECT_EQ(0u, io2.metadata.m.bit_depth.exponent_bits_per_sample); + EXPECT_EQ(0u, io2.metadata.m.GetAlphaBits()); + } + + // Test various existing and non-existing floating point formats + for (uint32_t bit_depth = 8; bit_depth <= 32; bit_depth++) { + if (bit_depth != 32) { + // TODO: test other float types once they work + break; + } + + uint32_t exponent_bit_depth; + if (bit_depth < 10) { + exponent_bit_depth = 2; + } else if (bit_depth < 12) { + exponent_bit_depth = 3; + } else if (bit_depth < 16) { + exponent_bit_depth = 4; + } else if (bit_depth < 20) { + exponent_bit_depth = 5; + } else if (bit_depth < 24) { + exponent_bit_depth = 6; + } else if (bit_depth < 28) { + exponent_bit_depth = 7; + } else { + exponent_bit_depth = 8; + } + + io.metadata.m.bit_depth.bits_per_sample = bit_depth; + io.metadata.m.bit_depth.floating_point_sample = true; + io.metadata.m.bit_depth.exponent_bits_per_sample = exponent_bit_depth; + + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2)); + + EXPECT_EQ(bit_depth, io2.metadata.m.bit_depth.bits_per_sample); + EXPECT_TRUE(io2.metadata.m.bit_depth.floating_point_sample); + EXPECT_EQ(exponent_bit_depth, + io2.metadata.m.bit_depth.exponent_bits_per_sample); + EXPECT_EQ(0u, io2.metadata.m.GetAlphaBits()); + } +} +#endif + +TEST(JxlTest, RoundtripGrayscale) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + ASSERT_NE(io.xsize(), 0u); + io.ShrinkTo(128, 128); + EXPECT_TRUE(io.Main().IsGray()); + EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample); + EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample); + EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample); + EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB()); + + PassesEncoderState enc_state; + AuxOut* aux_out = nullptr; + + { + CompressParams cparams; + cparams.butteraugli_distance = 1.0; + + PaddedBytes compressed; + EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + aux_out)); + CodecInOut io2; + EXPECT_TRUE(test::DecodeFile({}, Span(compressed), &io2)); + EXPECT_TRUE(io2.Main().IsGray()); + + EXPECT_LE(compressed.size(), 7000u); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.6)); + } + + // Test with larger butteraugli distance and other settings enabled so + // different jxl codepaths trigger. + { + CompressParams cparams; + cparams.butteraugli_distance = 8.0; + + PaddedBytes compressed; + EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + aux_out)); + CodecInOut io2; + EXPECT_TRUE(test::DecodeFile({}, Span(compressed), &io2)); + EXPECT_TRUE(io2.Main().IsGray()); + + EXPECT_LE(compressed.size(), 1300u); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(6.0)); + } + + { + CompressParams cparams; + cparams.butteraugli_distance = 1.0; + + PaddedBytes compressed; + EXPECT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + aux_out)); + + CodecInOut io2; + JXLDecompressParams dparams; + dparams.color_space = "RGB_D65_SRG_Rel_SRG"; + EXPECT_TRUE( + test::DecodeFile(dparams, Span(compressed), &io2)); + EXPECT_FALSE(io2.Main().IsGray()); + + EXPECT_LE(compressed.size(), 7000u); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.6)); + } +} + +TEST(JxlTest, RoundtripAlpha) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_alpha.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + + ASSERT_NE(io.xsize(), 0u); + ASSERT_TRUE(io.metadata.m.HasAlpha()); + ASSERT_TRUE(io.Main().HasAlpha()); + io.ShrinkTo(300, 300); + + CompressParams cparams; + cparams.butteraugli_distance = 1.0; + + EXPECT_EQ(8u, io.metadata.m.bit_depth.bits_per_sample); + EXPECT_FALSE(io.metadata.m.bit_depth.floating_point_sample); + EXPECT_EQ(0u, io.metadata.m.bit_depth.exponent_bits_per_sample); + EXPECT_TRUE(io.metadata.m.color_encoding.tf.IsSRGB()); + PassesEncoderState enc_state; + AuxOut* aux_out = nullptr; + PaddedBytes compressed; + EXPECT_TRUE( + EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), aux_out)); + + EXPECT_LE(compressed.size(), 10077u); + + for (bool use_image_callback : {false, true}) { + for (bool unpremul_alpha : {false, true}) { + CodecInOut io2; + JXLDecompressParams dparams; + dparams.use_image_callback = use_image_callback; + dparams.unpremultiply_alpha = unpremul_alpha; + EXPECT_TRUE( + test::DecodeFile(dparams, Span(compressed), &io2)); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, + ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.15)); + } + } +} + +namespace { +// Performs "PremultiplyAlpha" for each ImageBundle (preview/frames). +bool PremultiplyAlpha(CodecInOut& io) { + const auto doPremultiplyAlpha = [](ImageBundle& bundle) { + if (!bundle.HasAlpha()) return; + if (!bundle.HasColor()) return; + auto* color = bundle.color(); + const auto* alpha = bundle.alpha(); + JXL_CHECK(color->ysize() == alpha->ysize()); + JXL_CHECK(color->xsize() == alpha->xsize()); + for (size_t y = 0; y < color->ysize(); y++) { + ::jxl::PremultiplyAlpha(color->PlaneRow(0, y), color->PlaneRow(1, y), + color->PlaneRow(2, y), alpha->Row(y), + color->xsize()); + } + }; + ExtraChannelInfo* eci = io.metadata.m.Find(ExtraChannel::kAlpha); + if (eci == nullptr || eci->alpha_associated) return false; + if (io.metadata.m.have_preview) { + doPremultiplyAlpha(io.preview_frame); + } + for (ImageBundle& ib : io.frames) { + doPremultiplyAlpha(ib); + } + eci->alpha_associated = true; + return true; +} + +bool UnpremultiplyAlpha(CodecInOut& io) { + const auto doUnpremultiplyAlpha = [](ImageBundle& bundle) { + if (!bundle.HasAlpha()) return; + if (!bundle.HasColor()) return; + auto* color = bundle.color(); + const auto* alpha = bundle.alpha(); + JXL_CHECK(color->ysize() == alpha->ysize()); + JXL_CHECK(color->xsize() == alpha->xsize()); + for (size_t y = 0; y < color->ysize(); y++) { + ::jxl::UnpremultiplyAlpha(color->PlaneRow(0, y), color->PlaneRow(1, y), + color->PlaneRow(2, y), alpha->Row(y), + color->xsize()); + } + }; + ExtraChannelInfo* eci = io.metadata.m.Find(ExtraChannel::kAlpha); + if (eci == nullptr || !eci->alpha_associated) return false; + if (io.metadata.m.have_preview) { + doUnpremultiplyAlpha(io.preview_frame); + } + for (ImageBundle& ib : io.frames) { + doUnpremultiplyAlpha(ib); + } + eci->alpha_associated = false; + return true; +} +} // namespace + +TEST(JxlTest, RoundtripAlphaPremultiplied) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_alpha.png"); + CodecInOut io, io_nopremul; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + ASSERT_TRUE(SetFromBytes(Span(orig), &io_nopremul)); + + ASSERT_NE(io.xsize(), 0u); + ASSERT_TRUE(io.metadata.m.HasAlpha()); + ASSERT_TRUE(io.Main().HasAlpha()); + io.ShrinkTo(300, 300); + io_nopremul.ShrinkTo(300, 300); + + CompressParams cparams; + cparams.butteraugli_distance = 1.0; + cparams.SetCms(GetJxlCms()); + + EXPECT_FALSE(io.Main().AlphaIsPremultiplied()); + EXPECT_TRUE(PremultiplyAlpha(io)); + EXPECT_TRUE(io.Main().AlphaIsPremultiplied()); + + EXPECT_FALSE(io_nopremul.Main().AlphaIsPremultiplied()); + + PassesEncoderState enc_state; + AuxOut* aux_out = nullptr; + PaddedBytes compressed; + EXPECT_TRUE( + EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), aux_out)); + EXPECT_LE(compressed.size(), 10000u); + + for (bool use_image_callback : {false, true}) { + for (bool unpremul_alpha : {false, true}) { + for (bool use_uint8 : {false, true}) { + printf( + "Testing premultiplied alpha using %s %s requesting " + "%spremultiplied output.\n", + use_uint8 ? "uint8" : "float", + use_image_callback ? "image callback" : "image_buffer", + unpremul_alpha ? "un" : ""); + CodecInOut io2; + JXLDecompressParams dparams; + dparams.use_image_callback = use_image_callback; + dparams.unpremultiply_alpha = unpremul_alpha; + if (use_uint8) { + dparams.accepted_formats = { + {4, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}}; + } + EXPECT_TRUE( + test::DecodeFile(dparams, Span(compressed), &io2)); + + EXPECT_EQ(unpremul_alpha, !io2.Main().AlphaIsPremultiplied()); + if (!unpremul_alpha) { + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, + ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.111)); + EXPECT_TRUE(UnpremultiplyAlpha(io2)); + EXPECT_FALSE(io2.Main().AlphaIsPremultiplied()); + } + EXPECT_THAT(ButteraugliDistance(io_nopremul.frames, io2.frames, + ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.55)); + } + } + } +} + +TEST(JxlTest, RoundtripAlphaResampling) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_alpha.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + ASSERT_NE(t.ppf().info.xsize, 0); + ASSERT_TRUE(t.ppf().info.alpha_bits > 0); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 5); // kHare + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESAMPLING, 2); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 13155, 130); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(5.2)); +} + +TEST(JxlTest, RoundtripAlphaResamplingOnlyAlpha) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_alpha.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + ASSERT_NE(t.ppf().info.xsize, 0); + ASSERT_TRUE(t.ppf().info.alpha_bits > 0); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon + cparams.AddOption(JXL_ENC_FRAME_SETTING_EXTRA_CHANNEL_RESAMPLING, 2); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 33571, 400); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.49)); +} + +TEST(JxlTest, RoundtripAlphaNonMultipleOf8) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_alpha.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(12, 12); + ASSERT_NE(t.ppf().info.xsize, 0); + ASSERT_TRUE(t.ppf().info.alpha_bits > 0); + EXPECT_EQ(t.ppf().frames[0].color.format.data_type, JXL_TYPE_UINT8); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), {}, {}, pool, &ppf_out), 107, 10); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.95)); +} + +TEST(JxlTest, RoundtripAlpha16) { + ThreadPoolForTests pool(4); + // The image is wider than 512 pixels to ensure multiple groups are tested. + size_t xsize = 1200, ysize = 160; + TestImage t; + t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16); + TestImage::Frame frame = t.AddFrame(); + // Generate 16-bit pattern that uses various colors and alpha values. + const float mul = 1.0f / 65535; + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + uint16_t r = y * 65535 / ysize; + uint16_t g = x * 65535 / xsize; + uint16_t b = (y + x) * 65535 / (xsize + ysize); + frame.SetValue(y, x, 0, r * mul); + frame.SetValue(y, x, 1, g * mul); + frame.SetValue(y, x, 2, b * mul); + frame.SetValue(y, x, 3, g * mul); + } + } + + ASSERT_NE(t.ppf().info.xsize, 0); + ASSERT_EQ(t.ppf().info.alpha_bits, 16); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 6); // kWombat + cparams.distance = 0.5; + + PackedPixelFile ppf_out; + // TODO(szabadka) Investigate big size difference on i686 + // This still keeps happening (2023-04-18). + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 3466, 120); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.65)); +} + +namespace { +JXLCompressParams CompressParamsForLossless() { + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR, 1); + cparams.AddOption(JXL_ENC_FRAME_SETTING_COLOR_TRANSFORM, 1); + cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 6); // Weighted + cparams.distance = 0; + return cparams; +} +} // namespace + +TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams = CompressParamsForLossless(); + + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 223058); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); +} + +TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8ThunderGradient)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams = CompressParamsForLossless(); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 2); // kThunder + cparams.AddOption(JXL_ENC_FRAME_SETTING_MODULAR_PREDICTOR, 5); // Gradient + + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 261684); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); +} + +TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8LightningGradient)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams = CompressParamsForLossless(); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1); // kLightning + + PackedPixelFile ppf_out; + // Lax comparison because different SIMD will cause different compression. + EXPECT_THAT(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), + IsSlightlyBelow(286848u)); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); +} + +TEST(JxlTest, JXL_SLOW_TEST(RoundtripLossless8Falcon)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + + JXLCompressParams cparams = CompressParamsForLossless(); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 3); // kFalcon + + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 230766); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); +} + +TEST(JxlTest, RoundtripLossless8Alpha) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_alpha.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + ASSERT_EQ(t.ppf().info.alpha_bits, 8); + EXPECT_EQ(t.ppf().frames[0].color.format.data_type, JXL_TYPE_UINT8); + + JXLCompressParams cparams = CompressParamsForLossless(); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 251470); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); + EXPECT_EQ(ppf_out.info.alpha_bits, 8); + EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out)); +} + +TEST(JxlTest, RoundtripLossless16Alpha) { + ThreadPool* pool = nullptr; + size_t xsize = 1200, ysize = 160; + TestImage t; + t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16); + TestImage::Frame frame = t.AddFrame(); + // Generate 16-bit pattern that uses various colors and alpha values. + const float mul = 1.0f / 65535; + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + uint16_t r = y * 65535 / ysize; + uint16_t g = x * 65535 / xsize + 37; + uint16_t b = (y + x) * 65535 / (xsize + ysize); + frame.SetValue(y, x, 0, r * mul); + frame.SetValue(y, x, 1, g * mul); + frame.SetValue(y, x, 2, b * mul); + frame.SetValue(y, x, 3, g * mul); + } + } + ASSERT_EQ(t.ppf().info.bits_per_sample, 16); + ASSERT_EQ(t.ppf().info.alpha_bits, 16); + + JXLCompressParams cparams = CompressParamsForLossless(); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + // TODO(szabadka) Investigate big size difference on i686 + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 4884, 100); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); + EXPECT_EQ(ppf_out.info.alpha_bits, 16); + EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out)); +} + +TEST(JxlTest, RoundtripLossless16AlphaNotMisdetectedAs8Bit) { + ThreadPool* pool = nullptr; + size_t xsize = 128, ysize = 128; + TestImage t; + t.SetDimensions(xsize, ysize).SetChannels(4).SetAllBitDepths(16); + TestImage::Frame frame = t.AddFrame(); + // All 16-bit values, both color and alpha, of this image are below 64. + // This allows testing if a code path wrongly concludes it's an 8-bit instead + // of 16-bit image (or even 6-bit). + const float mul = 1.0f / 65535; + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + uint16_t r = y * 64 / ysize; + uint16_t g = x * 64 / xsize + 37; + uint16_t b = (y + x) * 64 / (xsize + ysize); + frame.SetValue(y, x, 0, r * mul); + frame.SetValue(y, x, 1, g * mul); + frame.SetValue(y, x, 2, b * mul); + frame.SetValue(y, x, 3, g * mul); + } + } + ASSERT_EQ(t.ppf().info.bits_per_sample, 16); + ASSERT_EQ(t.ppf().info.alpha_bits, 16); + + JXLCompressParams cparams = CompressParamsForLossless(); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 591, 50); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); + EXPECT_EQ(ppf_out.info.bits_per_sample, 16); + EXPECT_EQ(ppf_out.info.alpha_bits, 16); + EXPECT_TRUE(test::SameAlpha(t.ppf(), ppf_out)); +} + +TEST(JxlTest, RoundtripDots) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/cvo9xd_keong_macan_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + ASSERT_NE(t.ppf().info.xsize, 0); + EXPECT_EQ(t.ppf().info.bits_per_sample, 8); + EXPECT_EQ(t.ppf().color_encoding.transfer_function, + JXL_TRANSFER_FUNCTION_SRGB); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSkirrel + cparams.AddOption(JXL_ENC_FRAME_SETTING_DOTS, 1); + cparams.distance = 0.04; + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 273333, 4000); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(0.35)); +} + +TEST(JxlTest, RoundtripNoise) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + ASSERT_NE(t.ppf().info.xsize, 0); + EXPECT_EQ(t.ppf().info.bits_per_sample, 8); + EXPECT_EQ(t.ppf().color_encoding.transfer_function, + JXL_TRANSFER_FUNCTION_SRGB); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 7); // kSkirrel + cparams.AddOption(JXL_ENC_FRAME_SETTING_NOISE, 1); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, pool, &ppf_out), 39261, 750); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.35)); +} + +TEST(JxlTest, RoundtripLossless8Gray) { + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png"); + TestImage t; + t.SetColorEncoding("Gra_D65_Rel_SRG").DecodeFromBytes(orig).ClearMetadata(); + EXPECT_EQ(t.ppf().color_encoding.color_space, JXL_COLOR_SPACE_GRAY); + EXPECT_EQ(t.ppf().info.bits_per_sample, 8); + + JXLCompressParams cparams = CompressParamsForLossless(); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + EXPECT_EQ(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), 92185); + EXPECT_EQ(ComputeDistance2(t.ppf(), ppf_out), 0.0); + EXPECT_EQ(ppf_out.color_encoding.color_space, JXL_COLOR_SPACE_GRAY); + EXPECT_EQ(ppf_out.info.bits_per_sample, 8); +} + +TEST(JxlTest, RoundtripAnimation) { + if (!jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) { + fprintf(stderr, "Skipping test because of missing GIF decoder.\n"); + return; + } + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData("jxl/traffic_light.gif"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + EXPECT_EQ(4, t.ppf().frames.size()); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + EXPECT_THAT(Roundtrip(t.ppf(), {}, dparams, pool, &ppf_out), + IsSlightlyBelow(2600)); + + t.CoalesceGIFAnimationWithAlpha(); + ASSERT_EQ(ppf_out.frames.size(), t.ppf().frames.size()); + EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), +#if JXL_HIGH_PRECISION + 1.55); +#else + 1.75); +#endif +} + +TEST(JxlTest, RoundtripLosslessAnimation) { + if (!jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) { + fprintf(stderr, "Skipping test because of missing GIF decoder.\n"); + return; + } + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData("jxl/traffic_light.gif"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + EXPECT_EQ(4, t.ppf().frames.size()); + + JXLCompressParams cparams = CompressParamsForLossless(); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), + IsSlightlyBelow(958)); + + t.CoalesceGIFAnimationWithAlpha(); + ASSERT_EQ(ppf_out.frames.size(), t.ppf().frames.size()); + EXPECT_LE(ButteraugliDistance(t.ppf(), ppf_out), 5e-4); +} + +TEST(JxlTest, RoundtripAnimationPatches) { + if (!jxl::extras::CanDecode(jxl::extras::Codec::kGIF)) { + fprintf(stderr, "Skipping test because of missing GIF decoder.\n"); + return; + } + ThreadPool* pool = nullptr; + const PaddedBytes orig = jxl::test::ReadTestData("jxl/animation_patches.gif"); + + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata(); + ASSERT_EQ(2u, t.ppf().frames.size()); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_PATCHES, 1); + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + + PackedPixelFile ppf_out; + // 40k with no patches, 27k with patch frames encoded multiple times. + EXPECT_THAT(Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out), + IsSlightlyBelow(16789)); + EXPECT_EQ(ppf_out.frames.size(), t.ppf().frames.size()); + // >10 with broken patches + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.0999)); +} + +size_t RoundtripJpeg(const PaddedBytes& jpeg_in, ThreadPool* pool) { + std::vector jpeg_bytes(jpeg_in.data(), + jpeg_in.data() + jpeg_in.size()); + std::vector compressed; + EXPECT_TRUE(extras::EncodeImageJXL({}, extras::PackedPixelFile(), &jpeg_bytes, + &compressed)); + + jxl::JXLDecompressParams dparams; + test::DefaultAcceptedFormats(dparams); + test::SetThreadParallelRunner(dparams, pool); + std::vector out; + jxl::PackedPixelFile ppf; + EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams, + nullptr, &ppf, &out)); + EXPECT_EQ(out.size(), jpeg_in.size()); + size_t failures = 0; + for (size_t i = 0; i < std::min(out.size(), jpeg_in.size()); i++) { + if (out[i] != jpeg_in[i]) { + EXPECT_EQ(out[i], jpeg_in[i]) + << "byte mismatch " << i << " " << out[i] << " != " << jpeg_in[i]; + if (++failures > 4) { + return compressed.size(); + } + } + } + return compressed.size(); +} + +void RoundtripJpegToPixels(const PaddedBytes& jpeg_in, + JXLDecompressParams dparams, ThreadPool* pool, + PackedPixelFile* ppf_out) { + std::vector jpeg_bytes(jpeg_in.data(), + jpeg_in.data() + jpeg_in.size()); + std::vector compressed; + EXPECT_TRUE(extras::EncodeImageJXL({}, extras::PackedPixelFile(), &jpeg_bytes, + &compressed)); + + test::DefaultAcceptedFormats(dparams); + test::SetThreadParallelRunner(dparams, pool); + EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), dparams, + nullptr, ppf_out, nullptr)); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444.jpg"); + // JPEG size is 696,659 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 568940u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels)) { + TEST_LIBJPEG_SUPPORT(); + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444.jpg"); + TestImage t; + t.DecodeFromBytes(orig); + + PackedPixelFile ppf_out; + RoundtripJpegToPixels(orig, {}, &pool, &ppf_out); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(12)); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420)) { + TEST_LIBJPEG_SUPPORT(); + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg"); + TestImage t; + t.DecodeFromBytes(orig); + + PackedPixelFile ppf_out; + RoundtripJpegToPixels(orig, {}, &pool, &ppf_out); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(11)); +} + +TEST(JxlTest, + JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420EarlyFlush)) { + TEST_LIBJPEG_SUPPORT(); + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg"); + TestImage t; + t.DecodeFromBytes(orig); + + JXLDecompressParams dparams; + dparams.max_downsampling = 8; + + PackedPixelFile ppf_out; + RoundtripJpegToPixels(orig, dparams, &pool, &ppf_out); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(4410)); +} + +TEST(JxlTest, + JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels420Mul16)) { + TEST_LIBJPEG_SUPPORT(); + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower_cropped.jpg"); + TestImage t; + t.DecodeFromBytes(orig); + + PackedPixelFile ppf_out; + RoundtripJpegToPixels(orig, {}, &pool, &ppf_out); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(4)); +} + +TEST(JxlTest, + JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionToPixels_asymmetric)) { + TEST_LIBJPEG_SUPPORT(); + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_asymmetric.jpg"); + TestImage t; + t.DecodeFromBytes(orig); + + PackedPixelFile ppf_out; + RoundtripJpegToPixels(orig, {}, &pool, &ppf_out); + EXPECT_THAT(ComputeDistance2(t.ppf(), ppf_out), IsSlightlyBelow(10)); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionGray)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_gray.jpg"); + // JPEG size is 456,528 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 387496u, 200); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420.jpg"); + // JPEG size is 546,797 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 455560u, 10); +} + +TEST(JxlTest, + JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_luma_subsample)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "jxl/flower/flower.png.im_q85_luma_subsample.jpg"); + // JPEG size is 400,724 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 325354u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression444_12)) { + // 444 JPEG that has an interesting sampling-factor (1x2, 1x2, 1x2). + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_444_1x2.jpg"); + // JPEG size is 703,874 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 569679u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression422)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_422.jpg"); + // JPEG size is 522,057 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 499282u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression440)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_440.jpg"); + // JPEG size is 603,623 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 501151u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression_asymmetric)) { + // 2x vertical downsample of one chroma channel, 2x horizontal downsample of + // the other. + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_asymmetric.jpg"); + // JPEG size is 604,601 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 500602u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompression420Progr)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/flower/flower.png.im_q85_420_progr.jpg"); + // JPEG size is 522,057 bytes. + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 455499u, 10); +} + +TEST(JxlTest, JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionMetadata)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/jpeg_reconstruction/1x1_exif_xmp.jpg"); + // JPEG size is 4290 bytes + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 1400u, 30); +} + +TEST(JxlTest, + JXL_TRANSCODE_JPEG_TEST(RoundtripJpegRecompressionOrientationICC)) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = + jxl::test::ReadTestData("jxl/jpeg_reconstruction/sideways_bench.jpg"); + // JPEG size is 15252 bytes + EXPECT_NEAR(RoundtripJpeg(orig, &pool), 12000u, 470); + // TODO(jon): investigate why 'Cross-compiling i686-linux-gnu' produces a + // larger result +} + +TEST(JxlTest, RoundtripProgressive) { + ThreadPoolForTests pool(4); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 1); + cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1); + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 62160, 750); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.4)); +} + +TEST(JxlTest, RoundtripProgressiveLevel2Slow) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + TestImage t; + t.DecodeFromBytes(orig).ClearMetadata().SetDimensions(600, 1024); + + JXLCompressParams cparams; + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 9); // kTortoise + cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_DC, 2); + cparams.AddOption(JXL_ENC_FRAME_SETTING_PROGRESSIVE_AC, 1); + cparams.AddOption(JXL_ENC_FRAME_SETTING_RESPONSIVE, 1); + + PackedPixelFile ppf_out; + EXPECT_NEAR(Roundtrip(t.ppf(), cparams, {}, &pool, &ppf_out), 71111, 1000); + EXPECT_THAT(ButteraugliDistance(t.ppf(), ppf_out), IsSlightlyBelow(1.17)); +} + +TEST(JxlTest, RoundtripUnsignedCustomBitdepthLossless) { + ThreadPool* pool = nullptr; + for (uint32_t num_channels = 1; num_channels < 6; ++num_channels) { + for (JxlEndianness endianness : {JXL_LITTLE_ENDIAN, JXL_BIG_ENDIAN}) { + for (uint32_t bitdepth = 3; bitdepth <= 16; ++bitdepth) { + if (bitdepth <= 8 && endianness == JXL_BIG_ENDIAN) continue; + printf("Testing %u channel unsigned %u bit %s endian lossless.\n", + num_channels, bitdepth, + endianness == JXL_LITTLE_ENDIAN ? "little" : "big"); + TestImage t; + t.SetDimensions(256, 256).SetChannels(num_channels); + t.SetAllBitDepths(bitdepth).SetEndianness(endianness); + TestImage::Frame frame = t.AddFrame(); + frame.RandomFill(); + + JXLCompressParams cparams = CompressParamsForLossless(); + cparams.input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM; + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM; + + PackedPixelFile ppf_out; + Roundtrip(t.ppf(), cparams, dparams, pool, &ppf_out); + + ASSERT_TRUE(test::SamePixels(t.ppf(), ppf_out)); + } + } + } +} + +TEST(JxlTest, LosslessPNMRoundtrip) { + static const char* kChannels[] = {"", "g", "ga", "rgb", "rgba"}; + static const char* kExtension[] = {"", ".pgm", ".pam", ".ppm", ".pam"}; + for (size_t bit_depth = 1; bit_depth <= 16; ++bit_depth) { + for (size_t channels = 1; channels <= 4; ++channels) { + if (bit_depth == 1 && (channels == 2 || channels == 4)) continue; + std::string extension(kExtension[channels]); + std::string filename = "jxl/flower/flower_small." + + std::string(kChannels[channels]) + ".depth" + + std::to_string(bit_depth) + extension; + const PaddedBytes orig = jxl::test::ReadTestData(filename); + test::TestImage t; + if (channels < 3) t.SetColorEncoding("Gra_D65_Rel_SRG"); + t.DecodeFromBytes(orig); + + JXLCompressParams cparams = CompressParamsForLossless(); + cparams.AddOption(JXL_ENC_FRAME_SETTING_EFFORT, 1); // kLightning + cparams.input_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM; + + JXLDecompressParams dparams; + dparams.accepted_formats.push_back(t.ppf().frames[0].color.format); + dparams.output_bitdepth.type = JXL_BIT_DEPTH_FROM_CODESTREAM; + + PackedPixelFile ppf_out; + Roundtrip(t.ppf(), cparams, dparams, nullptr, &ppf_out); + + extras::EncodedImage encoded; + auto encoder = extras::Encoder::FromExtension(extension); + ASSERT_TRUE(encoder.get()); + ASSERT_TRUE(encoder->Encode(ppf_out, &encoded, nullptr)); + ASSERT_EQ(encoded.bitstreams.size(), 1); + ASSERT_EQ(orig.size(), encoded.bitstreams[0].size()); + EXPECT_EQ(0, + memcmp(orig.data(), encoded.bitstreams[0].data(), orig.size())); + } + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/lehmer_code.h b/third-party/libjxl/libjxl/lib/jxl/lehmer_code.h new file mode 100644 index 0000000000..dd1d21c6f7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/lehmer_code.h @@ -0,0 +1,102 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_LEHMER_CODE_H_ +#define LIB_JXL_LEHMER_CODE_H_ + +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Permutation <=> factorial base representation (Lehmer code). + +using LehmerT = uint32_t; + +template +constexpr T ValueOfLowest1Bit(T t) { + return t & -t; +} + +// Computes the Lehmer (factorial basis) code of permutation, an array of n +// unique indices in [0..n), and stores it in code[0..len). N*logN time. +// temp must have n + 1 elements but need not be initialized. +template +void ComputeLehmerCode(const PermutationT* JXL_RESTRICT permutation, + uint32_t* JXL_RESTRICT temp, const size_t n, + LehmerT* JXL_RESTRICT code) { + for (size_t idx = 0; idx < n + 1; ++idx) temp[idx] = 0; + + for (size_t idx = 0; idx < n; ++idx) { + const PermutationT s = permutation[idx]; + + // Compute sum in Fenwick tree + uint32_t penalty = 0; + uint32_t i = s + 1; + while (i != 0) { + penalty += temp[i]; + i &= i - 1; // clear lowest bit + } + JXL_DASSERT(s >= penalty); + code[idx] = s - penalty; + i = s + 1; + // Add operation in Fenwick tree + while (i < n + 1) { + temp[i] += 1; + i += ValueOfLowest1Bit(i); + } + } +} + +// Decodes the Lehmer code in code[0..n) into permutation[0..n). +// temp must have 1 << CeilLog2(n) elements but need not be initialized. +template +void DecodeLehmerCode(const LehmerT* JXL_RESTRICT code, + uint32_t* JXL_RESTRICT temp, size_t n, + PermutationT* JXL_RESTRICT permutation) { + JXL_DASSERT(n != 0); + const size_t log2n = CeilLog2Nonzero(n); + const size_t padded_n = 1ull << log2n; + + for (size_t i = 0; i < padded_n; i++) { + const int32_t i1 = static_cast(i + 1); + temp[i] = static_cast(ValueOfLowest1Bit(i1)); + } + + for (size_t i = 0; i < n; i++) { + JXL_DASSERT(code[i] + i < n); + uint32_t rank = code[i] + 1; + + // Extract i-th unused element via implicit order-statistics tree. + size_t bit = padded_n; + size_t next = 0; + for (size_t i = 0; i <= log2n; i++) { + const size_t cand = next + bit; + JXL_DASSERT(cand >= 1); + bit >>= 1; + if (temp[cand - 1] < rank) { + next = cand; + rank -= temp[cand - 1]; + } + } + + permutation[i] = next; + + // Mark as used + next += 1; + while (next <= padded_n) { + temp[next - 1] -= 1; + next += ValueOfLowest1Bit(next); + } + } +} + +} // namespace jxl + +#endif // LIB_JXL_LEHMER_CODE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/lehmer_code_test.cc b/third-party/libjxl/libjxl/lib/jxl/lehmer_code_test.cc new file mode 100644 index 0000000000..acda762545 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/lehmer_code_test.cc @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/lehmer_code.h" + +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +template +struct WorkingSet { + explicit WorkingSet(size_t max_n) + : padded_n(1ull << CeilLog2Nonzero(max_n + 1)), + permutation(max_n), + temp(padded_n), + lehmer(max_n), + decoded(max_n) {} + + size_t padded_n; + std::vector permutation; + std::vector temp; + std::vector lehmer; + std::vector decoded; +}; + +template +void Roundtrip(size_t n, WorkingSet* ws) { + JXL_ASSERT(n != 0); + const size_t padded_n = 1ull << CeilLog2Nonzero(n); + + Rng rng(n * 65537 + 13); + + // Ensure indices fit into PermutationT + EXPECT_LE(n, 1ULL << (sizeof(PermutationT) * 8)); + + std::iota(ws->permutation.begin(), ws->permutation.begin() + n, 0); + + // For various random permutations: + for (size_t rep = 0; rep < 3; ++rep) { + rng.Shuffle(ws->permutation.data(), n); + + // Must decode to the same permutation + ComputeLehmerCode(ws->permutation.data(), ws->temp.data(), n, + ws->lehmer.data()); + memset(ws->temp.data(), 0, padded_n * 4); + DecodeLehmerCode(ws->lehmer.data(), ws->temp.data(), n, ws->decoded.data()); + + for (size_t i = 0; i < n; ++i) { + EXPECT_EQ(ws->permutation[i], ws->decoded[i]); + } + } +} + +// Preallocates arrays and tests n = [begin, end). +template +void RoundtripSizeRange(ThreadPool* pool, uint32_t begin, uint32_t end) { + ASSERT_NE(0u, begin); // n = 0 not allowed. + std::vector> working_sets; + + JXL_CHECK(RunOnPool( + pool, begin, end, + [&working_sets, end](const size_t num_threads) { + for (size_t i = 0; i < num_threads; i++) { + working_sets.emplace_back(end - 1); + } + return true; + }, + [&working_sets](const uint32_t n, const size_t thread) { + Roundtrip(n, &working_sets[thread]); + }, + "lehmer test")); +} + +TEST(LehmerCodeTest, TestRoundtrips) { + test::ThreadPoolForTests pool(8); + + RoundtripSizeRange(&pool, 1, 1026); + + // Ensures PermutationT can fit > 16 bit values. + RoundtripSizeRange(&pool, 65536, 65540); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/libjxl.pc.in b/third-party/libjxl/libjxl/lib/jxl/libjxl.pc.in new file mode 100644 index 0000000000..4a7af65b7c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/libjxl.pc.in @@ -0,0 +1,13 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=@PKGCONFIG_TARGET_LIBS@ +includedir=@PKGCONFIG_TARGET_INCLUDES@ + +Name: libjxl +Description: Loads and saves JPEG XL files +Version: @JPEGXL_LIBRARY_VERSION@ +Requires.private: @JPEGXL_LIBRARY_REQUIRES@ +Libs: -L${libdir} -ljxl +Libs.private: -lm +Cflags: -I${includedir} +Cflags.private: -DJXL_STATIC_DEFINE diff --git a/third-party/libjxl/libjxl/lib/jxl/loop_filter.cc b/third-party/libjxl/libjxl/lib/jxl/loop_filter.cc new file mode 100644 index 0000000000..5afe87617d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/loop_filter.cc @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/loop_filter.h" + +#include "lib/jxl/base/status.h" +#include "lib/jxl/fields.h" + +namespace jxl { + +LoopFilter::LoopFilter() { Bundle::Init(this); } +Status LoopFilter::VisitFields(Visitor* JXL_RESTRICT visitor) { + // Must come before AllDefault. + + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(true, &gab)); + if (visitor->Conditional(gab)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &gab_custom)); + if (visitor->Conditional(gab_custom)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(1.1 * 0.104699568f, &gab_x_weight1)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(1.1 * 0.055680538f, &gab_x_weight2)); + if (std::abs(1.0f + (gab_x_weight1 + gab_x_weight2) * 4) < 1e-8) { + return JXL_FAILURE( + "Gaborish x weights lead to near 0 unnormalized kernel"); + } + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(1.1 * 0.104699568f, &gab_y_weight1)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(1.1 * 0.055680538f, &gab_y_weight2)); + if (std::abs(1.0f + (gab_y_weight1 + gab_y_weight2) * 4) < 1e-8) { + return JXL_FAILURE( + "Gaborish y weights lead to near 0 unnormalized kernel"); + } + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(1.1 * 0.104699568f, &gab_b_weight1)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(1.1 * 0.055680538f, &gab_b_weight2)); + if (std::abs(1.0f + (gab_b_weight1 + gab_b_weight2) * 4) < 1e-8) { + return JXL_FAILURE( + "Gaborish b weights lead to near 0 unnormalized kernel"); + } + } + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(2, 2, &epf_iters)); + if (visitor->Conditional(epf_iters > 0)) { + if (visitor->Conditional(!nonserialized_is_modular)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sharp_custom)); + if (visitor->Conditional(epf_sharp_custom)) { + for (size_t i = 0; i < kEpfSharpEntries; ++i) { + JXL_QUIET_RETURN_IF_ERROR(visitor->F16( + float(i) / float(kEpfSharpEntries - 1), &epf_sharp_lut[i])); + } + } + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_weight_custom)); + if (visitor->Conditional(epf_weight_custom)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(40.0f, &epf_channel_scale[0])); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(5.0f, &epf_channel_scale[1])); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(3.5f, &epf_channel_scale[2])); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.45f, &epf_pass1_zeroflush)); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.6f, &epf_pass2_zeroflush)); + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &epf_sigma_custom)); + if (visitor->Conditional(epf_sigma_custom)) { + if (visitor->Conditional(!nonserialized_is_modular)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.46f, &epf_quant_mul)); + } + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(0.9f, &epf_pass0_sigma_scale)); + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(6.5f, &epf_pass2_sigma_scale)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->F16(0.6666666666666666f, &epf_border_sad_mul)); + } + if (visitor->Conditional(nonserialized_is_modular)) { + JXL_QUIET_RETURN_IF_ERROR(visitor->F16(1.0f, &epf_sigma_for_modular)); + if (epf_sigma_for_modular < 1e-8) { + return JXL_FAILURE("EPF: sigma for modular is too small"); + } + } + } + + JXL_QUIET_RETURN_IF_ERROR(visitor->BeginExtensions(&extensions)); + // Extensions: in chronological order of being added to the format. + return visitor->EndExtensions(); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/loop_filter.h b/third-party/libjxl/libjxl/lib/jxl/loop_filter.h new file mode 100644 index 0000000000..e4b418ba2b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/loop_filter.h @@ -0,0 +1,76 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_LOOP_FILTER_H_ +#define LIB_JXL_LOOP_FILTER_H_ + +// Parameters for loop filter(s), stored in each frame. + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/field_encodings.h" + +namespace jxl { + +struct LoopFilter : public Fields { + LoopFilter(); + JXL_FIELDS_NAME(LoopFilter) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + size_t Padding() const { + static const size_t padding_per_epf_iter[4] = {0, 2, 3, 6}; + return padding_per_epf_iter[epf_iters] + (gab ? 1 : 0); + } + + mutable bool all_default; + + // --- Gaborish convolution + bool gab; + + bool gab_custom; + float gab_x_weight1; + float gab_x_weight2; + float gab_y_weight1; + float gab_y_weight2; + float gab_b_weight1; + float gab_b_weight2; + + // --- Edge-preserving filter + + // Number of EPF stages to apply. 0 means EPF disabled. 1 applies only the + // first stage, 2 applies both stages and 3 applies the first stage twice and + // the second stage once. + uint32_t epf_iters; + + bool epf_sharp_custom; + enum { kEpfSharpEntries = 8 }; + float epf_sharp_lut[kEpfSharpEntries]; + + bool epf_weight_custom; // Custom weight params + float epf_channel_scale[3]; // Relative weight of each channel + float epf_pass1_zeroflush; // Minimum weight for first pass + float epf_pass2_zeroflush; // Minimum weight for second pass + + bool epf_sigma_custom; // Custom sigma parameters + float epf_quant_mul; // Sigma is ~ this * quant + float epf_pass0_sigma_scale; // Multiplier for sigma in pass 0 + float epf_pass2_sigma_scale; // Multiplier for sigma in the second pass + float epf_border_sad_mul; // (inverse) multiplier for sigma on borders + + float epf_sigma_for_modular; + + uint64_t extensions; + + bool nonserialized_is_modular = false; +}; + +} // namespace jxl + +#endif // LIB_JXL_LOOP_FILTER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/luminance.cc b/third-party/libjxl/libjxl/lib/jxl/luminance.cc new file mode 100644 index 0000000000..10151f4267 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/luminance.cc @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/luminance.h" + +#include "lib/jxl/image_metadata.h" + +namespace jxl { + +void SetIntensityTarget(ImageMetadata* m) { + if (m->color_encoding.tf.IsPQ()) { + // Peak luminance of PQ as defined by SMPTE ST 2084:2014. + m->SetIntensityTarget(10000); + } else if (m->color_encoding.tf.IsHLG()) { + // Nominal display peak luminance used as a reference by + // Rec. ITU-R BT.2100-2. + m->SetIntensityTarget(1000); + } else { + // SDR + m->SetIntensityTarget(kDefaultIntensityTarget); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/luminance.h b/third-party/libjxl/libjxl/lib/jxl/luminance.h new file mode 100644 index 0000000000..3181576823 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/luminance.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_LUMINANCE_H_ +#define LIB_JXL_LUMINANCE_H_ + +namespace jxl { + +// Chooses a default intensity target based on the transfer function of the +// image, if known. For SDR images or images not known to be HDR, returns +// kDefaultIntensityTarget, for images known to have PQ or HLG transfer function +// returns a higher value. + +struct ImageMetadata; +// TODO(eustas): rename +void SetIntensityTarget(ImageMetadata* m); + +} // namespace jxl + +#endif // LIB_JXL_LUMINANCE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/matrix_ops.h b/third-party/libjxl/libjxl/lib/jxl/matrix_ops.h new file mode 100644 index 0000000000..1a969bd4f0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/matrix_ops.h @@ -0,0 +1,84 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MATRIX_OPS_H_ +#define LIB_JXL_MATRIX_OPS_H_ + +// 3x3 matrix operations. + +#include // abs +#include + +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Computes C = A * B, where A, B, C are 3x3 matrices. +template +void Mul3x3Matrix(const T* a, const T* b, T* c) { + alignas(16) T temp[3]; // For transposed column + for (size_t x = 0; x < 3; x++) { + for (size_t z = 0; z < 3; z++) { + temp[z] = b[z * 3 + x]; + } + for (size_t y = 0; y < 3; y++) { + double e = 0; + for (size_t z = 0; z < 3; z++) { + e += a[y * 3 + z] * temp[z]; + } + c[y * 3 + x] = e; + } + } +} + +// Computes C = A * B, where A is 3x3 matrix and B is vector. +template +void Mul3x3Vector(const T* a, const T* b, T* c) { + for (size_t y = 0; y < 3; y++) { + double e = 0; + for (size_t x = 0; x < 3; x++) { + e += a[y * 3 + x] * b[x]; + } + c[y] = e; + } +} + +// Inverts a 3x3 matrix in place. +template +Status Inv3x3Matrix(T* matrix) { + // Intermediate computation is done in double precision. + double temp[9]; + temp[0] = static_cast(matrix[4]) * matrix[8] - + static_cast(matrix[5]) * matrix[7]; + temp[1] = static_cast(matrix[2]) * matrix[7] - + static_cast(matrix[1]) * matrix[8]; + temp[2] = static_cast(matrix[1]) * matrix[5] - + static_cast(matrix[2]) * matrix[4]; + temp[3] = static_cast(matrix[5]) * matrix[6] - + static_cast(matrix[3]) * matrix[8]; + temp[4] = static_cast(matrix[0]) * matrix[8] - + static_cast(matrix[2]) * matrix[6]; + temp[5] = static_cast(matrix[2]) * matrix[3] - + static_cast(matrix[0]) * matrix[5]; + temp[6] = static_cast(matrix[3]) * matrix[7] - + static_cast(matrix[4]) * matrix[6]; + temp[7] = static_cast(matrix[1]) * matrix[6] - + static_cast(matrix[0]) * matrix[7]; + temp[8] = static_cast(matrix[0]) * matrix[4] - + static_cast(matrix[1]) * matrix[3]; + double det = matrix[0] * temp[0] + matrix[1] * temp[3] + matrix[2] * temp[6]; + if (std::abs(det) < 1e-10) { + return JXL_FAILURE("Matrix determinant is too close to 0"); + } + double idet = 1.0 / det; + for (size_t i = 0; i < 9; i++) { + matrix[i] = temp[i] * idet; + } + return true; +} + +} // namespace jxl + +#endif // LIB_JXL_MATRIX_OPS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.cc b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.cc new file mode 100644 index 0000000000..87727e75cd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.cc @@ -0,0 +1,18 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/memory_manager_internal.h" + +#include + +namespace jxl { + +void* MemoryManagerDefaultAlloc(void* opaque, size_t size) { + return malloc(size); +} + +void MemoryManagerDefaultFree(void* opaque, void* address) { free(address); } + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.h b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.h new file mode 100644 index 0000000000..f8a5cd8d59 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/memory_manager_internal.h @@ -0,0 +1,101 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MEMORY_MANAGER_INTERNAL_H_ +#define LIB_JXL_MEMORY_MANAGER_INTERNAL_H_ + +// Memory allocator with support for alignment + misalignment. + +#include +#include +#include +#include +#include // memcpy + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" + +namespace jxl { + +// Default alloc and free functions. +void* MemoryManagerDefaultAlloc(void* opaque, size_t size); +void MemoryManagerDefaultFree(void* opaque, void* address); + +// Initializes the memory manager instance with the passed one. The +// MemoryManager passed in |memory_manager| may be NULL or contain NULL +// functions which will be initialized with the default ones. If either alloc +// or free are NULL, then both must be NULL, otherwise this function returns an +// error. +static JXL_INLINE Status MemoryManagerInit( + JxlMemoryManager* self, const JxlMemoryManager* memory_manager) { + if (memory_manager) { + *self = *memory_manager; + } else { + memset(self, 0, sizeof(*self)); + } + if (!self->alloc != !self->free) { + return false; + } + if (!self->alloc) self->alloc = jxl::MemoryManagerDefaultAlloc; + if (!self->free) self->free = jxl::MemoryManagerDefaultFree; + + return true; +} + +static JXL_INLINE void* MemoryManagerAlloc( + const JxlMemoryManager* memory_manager, size_t size) { + return memory_manager->alloc(memory_manager->opaque, size); +} + +static JXL_INLINE void MemoryManagerFree(const JxlMemoryManager* memory_manager, + void* address) { + return memory_manager->free(memory_manager->opaque, address); +} + +// Helper class to be used as a deleter in a unique_ptr call. +class MemoryManagerDeleteHelper { + public: + explicit MemoryManagerDeleteHelper(const JxlMemoryManager* memory_manager) + : memory_manager_(memory_manager) {} + + // Delete and free the passed pointer using the memory_manager. + template + void operator()(T* address) const { + if (!address) { + return; + } + address->~T(); + return memory_manager_->free(memory_manager_->opaque, address); + } + + private: + const JxlMemoryManager* memory_manager_; +}; + +template +using MemoryManagerUniquePtr = std::unique_ptr; + +// Creates a new object T allocating it with the memory allocator into a +// unique_ptr. +template +JXL_INLINE MemoryManagerUniquePtr MemoryManagerMakeUnique( + const JxlMemoryManager* memory_manager, Args&&... args) { + T* mem = + static_cast(memory_manager->alloc(memory_manager->opaque, sizeof(T))); + if (!mem) { + // Allocation error case. + return MemoryManagerUniquePtr(nullptr, + MemoryManagerDeleteHelper(memory_manager)); + } + return MemoryManagerUniquePtr(new (mem) T(std::forward(args)...), + MemoryManagerDeleteHelper(memory_manager)); +} + +} // namespace jxl + +#endif // LIB_JXL_MEMORY_MANAGER_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/context_predict.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/context_predict.h new file mode 100644 index 0000000000..4c3a33a52a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/context_predict.h @@ -0,0 +1,672 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_ +#define LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_ + +#include +#include + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +namespace weighted { +constexpr static size_t kNumPredictors = 4; +constexpr static int64_t kPredExtraBits = 3; +constexpr static int64_t kPredictionRound = ((1 << kPredExtraBits) >> 1) - 1; +constexpr static size_t kNumProperties = 1; + +struct Header : public Fields { + JXL_FIELDS_NAME(WeightedPredictorHeader) + // TODO(janwas): move to cc file, avoid including fields.h. + Header() { Bundle::Init(this); } + + Status VisitFields(Visitor *JXL_RESTRICT visitor) override { + if (visitor->AllDefault(*this, &all_default)) { + // Overwrite all serialized fields, but not any nonserialized_*. + visitor->SetDefault(this); + return true; + } + auto visit_p = [visitor](pixel_type val, pixel_type *p) { + uint32_t up = *p; + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(5, val, &up)); + *p = up; + return Status(true); + }; + JXL_QUIET_RETURN_IF_ERROR(visit_p(16, &p1C)); + JXL_QUIET_RETURN_IF_ERROR(visit_p(10, &p2C)); + JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Ca)); + JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cb)); + JXL_QUIET_RETURN_IF_ERROR(visit_p(7, &p3Cc)); + JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Cd)); + JXL_QUIET_RETURN_IF_ERROR(visit_p(0, &p3Ce)); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xd, &w[0])); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[1])); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[2])); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bits(4, 0xc, &w[3])); + return true; + } + + bool all_default; + pixel_type p1C = 0, p2C = 0, p3Ca = 0, p3Cb = 0, p3Cc = 0, p3Cd = 0, p3Ce = 0; + uint32_t w[kNumPredictors] = {}; +}; + +struct State { + pixel_type_w prediction[kNumPredictors] = {}; + pixel_type_w pred = 0; // *before* removing the added bits. + std::vector pred_errors[kNumPredictors]; + std::vector error; + const Header header; + + // Allows to approximate division by a number from 1 to 64. + // for (int i = 0; i < 64; i++) divlookup[i] = (1 << 24) / (i + 1); + + const uint32_t divlookup[64] = { + 16777216, 8388608, 5592405, 4194304, 3355443, 2796202, 2396745, 2097152, + 1864135, 1677721, 1525201, 1398101, 1290555, 1198372, 1118481, 1048576, + 986895, 932067, 883011, 838860, 798915, 762600, 729444, 699050, + 671088, 645277, 621378, 599186, 578524, 559240, 541200, 524288, + 508400, 493447, 479349, 466033, 453438, 441505, 430185, 419430, + 409200, 399457, 390167, 381300, 372827, 364722, 356962, 349525, + 342392, 335544, 328965, 322638, 316551, 310689, 305040, 299593, + 294337, 289262, 284359, 279620, 275036, 270600, 266305, 262144}; + + constexpr static pixel_type_w AddBits(pixel_type_w x) { + return uint64_t(x) << kPredExtraBits; + } + + State(Header header, size_t xsize, size_t ysize) : header(header) { + // Extra margin to avoid out-of-bounds writes. + // All have space for two rows of data. + for (size_t i = 0; i < 4; i++) { + pred_errors[i].resize((xsize + 2) * 2); + } + error.resize((xsize + 2) * 2); + } + + // Approximates 4+(maxweight<<24)/(x+1), avoiding division + JXL_INLINE uint32_t ErrorWeight(uint64_t x, uint32_t maxweight) const { + int shift = static_cast(FloorLog2Nonzero(x + 1)) - 5; + if (shift < 0) shift = 0; + return 4 + ((maxweight * divlookup[x >> shift]) >> shift); + } + + // Approximates the weighted average of the input values with the given + // weights, avoiding division. Weights must sum to at least 16. + JXL_INLINE pixel_type_w + WeightedAverage(const pixel_type_w *JXL_RESTRICT p, + std::array w) const { + uint32_t weight_sum = 0; + for (size_t i = 0; i < kNumPredictors; i++) { + weight_sum += w[i]; + } + JXL_DASSERT(weight_sum > 15); + uint32_t log_weight = FloorLog2Nonzero(weight_sum); // at least 4. + weight_sum = 0; + for (size_t i = 0; i < kNumPredictors; i++) { + w[i] >>= log_weight - 4; + weight_sum += w[i]; + } + // for rounding. + pixel_type_w sum = (weight_sum >> 1) - 1; + for (size_t i = 0; i < kNumPredictors; i++) { + sum += p[i] * w[i]; + } + return (sum * divlookup[weight_sum - 1]) >> 24; + } + + template + JXL_INLINE pixel_type_w Predict(size_t x, size_t y, size_t xsize, + pixel_type_w N, pixel_type_w W, + pixel_type_w NE, pixel_type_w NW, + pixel_type_w NN, Properties *properties, + size_t offset) { + size_t cur_row = y & 1 ? 0 : (xsize + 2); + size_t prev_row = y & 1 ? (xsize + 2) : 0; + size_t pos_N = prev_row + x; + size_t pos_NE = x < xsize - 1 ? pos_N + 1 : pos_N; + size_t pos_NW = x > 0 ? pos_N - 1 : pos_N; + std::array weights; + for (size_t i = 0; i < kNumPredictors; i++) { + // pred_errors[pos_N] also contains the error of pixel W. + // pred_errors[pos_NW] also contains the error of pixel WW. + weights[i] = pred_errors[i][pos_N] + pred_errors[i][pos_NE] + + pred_errors[i][pos_NW]; + weights[i] = ErrorWeight(weights[i], header.w[i]); + } + + N = AddBits(N); + W = AddBits(W); + NE = AddBits(NE); + NW = AddBits(NW); + NN = AddBits(NN); + + pixel_type_w teW = x == 0 ? 0 : error[cur_row + x - 1]; + pixel_type_w teN = error[pos_N]; + pixel_type_w teNW = error[pos_NW]; + pixel_type_w sumWN = teN + teW; + pixel_type_w teNE = error[pos_NE]; + + if (compute_properties) { + pixel_type_w p = teW; + if (std::abs(teN) > std::abs(p)) p = teN; + if (std::abs(teNW) > std::abs(p)) p = teNW; + if (std::abs(teNE) > std::abs(p)) p = teNE; + (*properties)[offset++] = p; + } + + prediction[0] = W + NE - N; + prediction[1] = N - (((sumWN + teNE) * header.p1C) >> 5); + prediction[2] = W - (((sumWN + teNW) * header.p2C) >> 5); + prediction[3] = + N - ((teNW * header.p3Ca + teN * header.p3Cb + teNE * header.p3Cc + + (NN - N) * header.p3Cd + (NW - W) * header.p3Ce) >> + 5); + + pred = WeightedAverage(prediction, weights); + + // If all three have the same sign, skip clamping. + if (((teN ^ teW) | (teN ^ teNW)) > 0) { + return (pred + kPredictionRound) >> kPredExtraBits; + } + + // Otherwise, clamp to min/max of neighbouring pixels (just W, NE, N). + pixel_type_w mx = std::max(W, std::max(NE, N)); + pixel_type_w mn = std::min(W, std::min(NE, N)); + pred = std::max(mn, std::min(mx, pred)); + return (pred + kPredictionRound) >> kPredExtraBits; + } + + JXL_INLINE void UpdateErrors(pixel_type_w val, size_t x, size_t y, + size_t xsize) { + size_t cur_row = y & 1 ? 0 : (xsize + 2); + size_t prev_row = y & 1 ? (xsize + 2) : 0; + val = AddBits(val); + error[cur_row + x] = pred - val; + for (size_t i = 0; i < kNumPredictors; i++) { + pixel_type_w err = + (std::abs(prediction[i] - val) + kPredictionRound) >> kPredExtraBits; + // For predicting in the next row. + pred_errors[i][cur_row + x] = err; + // Add the error on this pixel to the error on the NE pixel. This has the + // effect of adding the error on this pixel to the E and EE pixels. + pred_errors[i][prev_row + x + 1] += err; + } + } +}; + +// Encoder helper function to set the parameters to some presets. +inline void PredictorMode(int i, Header *header) { + switch (i) { + case 0: + // ~ lossless16 predictor + header->w[0] = 0xd; + header->w[1] = 0xc; + header->w[2] = 0xc; + header->w[3] = 0xc; + header->p1C = 16; + header->p2C = 10; + header->p3Ca = 7; + header->p3Cb = 7; + header->p3Cc = 7; + header->p3Cd = 0; + header->p3Ce = 0; + break; + case 1: + // ~ default lossless8 predictor + header->w[0] = 0xd; + header->w[1] = 0xc; + header->w[2] = 0xc; + header->w[3] = 0xb; + header->p1C = 8; + header->p2C = 8; + header->p3Ca = 4; + header->p3Cb = 0; + header->p3Cc = 3; + header->p3Cd = 23; + header->p3Ce = 2; + break; + case 2: + // ~ west lossless8 predictor + header->w[0] = 0xd; + header->w[1] = 0xc; + header->w[2] = 0xd; + header->w[3] = 0xc; + header->p1C = 10; + header->p2C = 9; + header->p3Ca = 7; + header->p3Cb = 0; + header->p3Cc = 0; + header->p3Cd = 16; + header->p3Ce = 9; + break; + case 3: + // ~ north lossless8 predictor + header->w[0] = 0xd; + header->w[1] = 0xd; + header->w[2] = 0xc; + header->w[3] = 0xc; + header->p1C = 16; + header->p2C = 8; + header->p3Ca = 0; + header->p3Cb = 16; + header->p3Cc = 0; + header->p3Cd = 23; + header->p3Ce = 0; + break; + case 4: + default: + // something else, because why not + header->w[0] = 0xd; + header->w[1] = 0xc; + header->w[2] = 0xc; + header->w[3] = 0xc; + header->p1C = 10; + header->p2C = 10; + header->p3Ca = 5; + header->p3Cb = 5; + header->p3Cc = 5; + header->p3Cd = 12; + header->p3Ce = 4; + break; + } +} +} // namespace weighted + +// Stores a node and its two children at the same time. This significantly +// reduces the number of branches needed during decoding. +struct FlatDecisionNode { + // Property + splitval of the top node. + int32_t property0; // -1 if leaf. + union { + PropertyVal splitval0; + Predictor predictor; + }; + // Property+splitval of the two child nodes. + union { + PropertyVal splitvals[2]; + int32_t multiplier; + }; + uint32_t childID; // childID is ctx id if leaf. + union { + int16_t properties[2]; + int32_t predictor_offset; + }; +}; +using FlatTree = std::vector; + +class MATreeLookup { + public: + explicit MATreeLookup(const FlatTree &tree) : nodes_(tree) {} + struct LookupResult { + uint32_t context; + Predictor predictor; + int32_t offset; + int32_t multiplier; + }; + JXL_INLINE LookupResult Lookup(const Properties &properties) const { + uint32_t pos = 0; + while (true) { +#define TRAVERSE_THE_TREE \ + { \ + const FlatDecisionNode &node = nodes_[pos]; \ + if (node.property0 < 0) { \ + return {node.childID, node.predictor, node.predictor_offset, \ + node.multiplier}; \ + } \ + bool p0 = properties[node.property0] <= node.splitval0; \ + uint32_t off0 = properties[node.properties[0]] <= node.splitvals[0]; \ + uint32_t off1 = 2 | (properties[node.properties[1]] <= node.splitvals[1]); \ + pos = node.childID + (p0 ? off1 : off0); \ + } + + TRAVERSE_THE_TREE; + TRAVERSE_THE_TREE; + } + } + + private: + const FlatTree &nodes_; +}; + +static constexpr size_t kExtraPropsPerChannel = 4; +static constexpr size_t kNumNonrefProperties = + kNumStaticProperties + 13 + weighted::kNumProperties; + +constexpr size_t kWPProp = kNumNonrefProperties - weighted::kNumProperties; +constexpr size_t kGradientProp = 9; + +// Clamps gradient to the min/max of n, w (and l, implicitly). +static JXL_INLINE int32_t ClampedGradient(const int32_t n, const int32_t w, + const int32_t l) { + const int32_t m = std::min(n, w); + const int32_t M = std::max(n, w); + // The end result of this operation doesn't overflow or underflow if the + // result is between m and M, but the intermediate value may overflow, so we + // do the intermediate operations in uint32_t and check later if we had an + // overflow or underflow condition comparing m, M and l directly. + // grad = M + m - l = n + w - l + const int32_t grad = + static_cast(static_cast(n) + static_cast(w) - + static_cast(l)); + // We use two sets of ternary operators to force the evaluation of them in + // any case, allowing the compiler to avoid branches and use cmovl/cmovg in + // x86. + const int32_t grad_clamp_M = (l < m) ? M : grad; + return (l > M) ? m : grad_clamp_M; +} + +inline pixel_type_w Select(pixel_type_w a, pixel_type_w b, pixel_type_w c) { + pixel_type_w p = a + b - c; + pixel_type_w pa = std::abs(p - a); + pixel_type_w pb = std::abs(p - b); + return pa < pb ? a : b; +} + +inline void PrecomputeReferences(const Channel &ch, size_t y, + const Image &image, uint32_t i, + Channel *references) { + ZeroFillImage(&references->plane); + uint32_t offset = 0; + size_t num_extra_props = references->w; + intptr_t onerow = references->plane.PixelsPerRow(); + for (int32_t j = static_cast(i) - 1; + j >= 0 && offset < num_extra_props; j--) { + if (image.channel[j].w != image.channel[i].w || + image.channel[j].h != image.channel[i].h) { + continue; + } + if (image.channel[j].hshift != image.channel[i].hshift) continue; + if (image.channel[j].vshift != image.channel[i].vshift) continue; + pixel_type *JXL_RESTRICT rp = references->Row(0) + offset; + const pixel_type *JXL_RESTRICT rpp = image.channel[j].Row(y); + const pixel_type *JXL_RESTRICT rpprev = image.channel[j].Row(y ? y - 1 : 0); + for (size_t x = 0; x < ch.w; x++, rp += onerow) { + pixel_type_w v = rpp[x]; + rp[0] = std::abs(v); + rp[1] = v; + pixel_type_w vleft = (x ? rpp[x - 1] : 0); + pixel_type_w vtop = (y ? rpprev[x] : vleft); + pixel_type_w vtopleft = (x && y ? rpprev[x - 1] : vleft); + pixel_type_w vpredicted = ClampedGradient(vleft, vtop, vtopleft); + rp[2] = std::abs(v - vpredicted); + rp[3] = v - vpredicted; + } + + offset += kExtraPropsPerChannel; + } +} + +struct PredictionResult { + int context = 0; + pixel_type_w guess = 0; + Predictor predictor; + int32_t multiplier; +}; + +inline void InitPropsRow( + Properties *p, + const std::array &static_props, + const int y) { + for (size_t i = 0; i < kNumStaticProperties; i++) { + (*p)[i] = static_props[i]; + } + (*p)[2] = y; + (*p)[9] = 0; // local gradient. +} + +namespace detail { +enum PredictorMode { + kUseTree = 1, + kUseWP = 2, + kForceComputeProperties = 4, + kAllPredictions = 8, + kNoEdgeCases = 16 +}; + +JXL_INLINE pixel_type_w PredictOne(Predictor p, pixel_type_w left, + pixel_type_w top, pixel_type_w toptop, + pixel_type_w topleft, pixel_type_w topright, + pixel_type_w leftleft, + pixel_type_w toprightright, + pixel_type_w wp_pred) { + switch (p) { + case Predictor::Zero: + return pixel_type_w{0}; + case Predictor::Left: + return left; + case Predictor::Top: + return top; + case Predictor::Select: + return Select(left, top, topleft); + case Predictor::Weighted: + return wp_pred; + case Predictor::Gradient: + return pixel_type_w{ClampedGradient(left, top, topleft)}; + case Predictor::TopLeft: + return topleft; + case Predictor::TopRight: + return topright; + case Predictor::LeftLeft: + return leftleft; + case Predictor::Average0: + return (left + top) / 2; + case Predictor::Average1: + return (left + topleft) / 2; + case Predictor::Average2: + return (topleft + top) / 2; + case Predictor::Average3: + return (top + topright) / 2; + case Predictor::Average4: + return (6 * top - 2 * toptop + 7 * left + 1 * leftleft + + 1 * toprightright + 3 * topright + 8) / + 16; + default: + return pixel_type_w{0}; + } +} + +template +JXL_INLINE PredictionResult Predict( + Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const size_t x, const size_t y, Predictor predictor, + const MATreeLookup *lookup, const Channel *references, + weighted::State *wp_state, pixel_type_w *predictions) { + // We start in position 3 because of 2 static properties + y. + size_t offset = 3; + constexpr bool compute_properties = + mode & kUseTree || mode & kForceComputeProperties; + constexpr bool nec = mode & kNoEdgeCases; + pixel_type_w left = (nec || x ? pp[-1] : (y ? pp[-onerow] : 0)); + pixel_type_w top = (nec || y ? pp[-onerow] : left); + pixel_type_w topleft = (nec || (x && y) ? pp[-1 - onerow] : left); + pixel_type_w topright = (nec || (x + 1 < w && y) ? pp[1 - onerow] : top); + pixel_type_w leftleft = (nec || x > 1 ? pp[-2] : left); + pixel_type_w toptop = (nec || y > 1 ? pp[-onerow - onerow] : top); + pixel_type_w toprightright = + (nec || (x + 2 < w && y) ? pp[2 - onerow] : topright); + + if (compute_properties) { + // location + (*p)[offset++] = x; + // neighbors + (*p)[offset++] = top > 0 ? top : -top; + (*p)[offset++] = left > 0 ? left : -left; + (*p)[offset++] = top; + (*p)[offset++] = left; + + // local gradient + (*p)[offset] = left - (*p)[offset + 1]; + offset++; + // local gradient + (*p)[offset++] = left + top - topleft; + + // FFV1 context properties + (*p)[offset++] = left - topleft; + (*p)[offset++] = topleft - top; + (*p)[offset++] = top - topright; + (*p)[offset++] = top - toptop; + (*p)[offset++] = left - leftleft; + } + + pixel_type_w wp_pred = 0; + if (mode & kUseWP) { + wp_pred = wp_state->Predict( + x, y, w, top, left, topright, topleft, toptop, p, offset); + } + if (!nec && compute_properties) { + offset += weighted::kNumProperties; + // Extra properties. + const pixel_type *JXL_RESTRICT rp = references->Row(x); + for (size_t i = 0; i < references->w; i++) { + (*p)[offset++] = rp[i]; + } + } + PredictionResult result; + if (mode & kUseTree) { + MATreeLookup::LookupResult lr = lookup->Lookup(*p); + result.context = lr.context; + result.guess = lr.offset; + result.multiplier = lr.multiplier; + predictor = lr.predictor; + } + if (mode & kAllPredictions) { + for (size_t i = 0; i < kNumModularPredictors; i++) { + predictions[i] = PredictOne((Predictor)i, left, top, toptop, topleft, + topright, leftleft, toprightright, wp_pred); + } + } + result.guess += PredictOne(predictor, left, top, toptop, topleft, topright, + leftleft, toprightright, wp_pred); + result.predictor = predictor; + + return result; +} +} // namespace detail + +inline PredictionResult PredictNoTreeNoWP(size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, Predictor predictor) { + return detail::Predict( + /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, + /*references=*/nullptr, /*wp_state=*/nullptr, /*predictions=*/nullptr); +} + +inline PredictionResult PredictNoTreeWP(size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, Predictor predictor, + weighted::State *wp_state) { + return detail::Predict( + /*p=*/nullptr, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, + /*references=*/nullptr, wp_state, /*predictions=*/nullptr); +} + +inline PredictionResult PredictTreeNoWP(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, + const MATreeLookup &tree_lookup, + const Channel &references) { + return detail::Predict( + p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references, + /*wp_state=*/nullptr, /*predictions=*/nullptr); +} +// Only use for y > 1, x > 1, x < w-2, and empty references +JXL_INLINE PredictionResult +PredictTreeNoWPNEC(Properties *p, size_t w, const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, const int y, + const MATreeLookup &tree_lookup, const Channel &references) { + return detail::Predict( + p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references, + /*wp_state=*/nullptr, /*predictions=*/nullptr); +} + +inline PredictionResult PredictTreeWP(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, + const MATreeLookup &tree_lookup, + const Channel &references, + weighted::State *wp_state) { + return detail::Predict( + p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references, + wp_state, /*predictions=*/nullptr); +} +JXL_INLINE PredictionResult PredictTreeWPNEC(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, + const MATreeLookup &tree_lookup, + const Channel &references, + weighted::State *wp_state) { + return detail::Predict( + p, w, pp, onerow, x, y, Predictor::Zero, &tree_lookup, &references, + wp_state, /*predictions=*/nullptr); +} + +inline PredictionResult PredictLearn(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, Predictor predictor, + const Channel &references, + weighted::State *wp_state) { + return detail::Predict( + p, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, &references, + wp_state, /*predictions=*/nullptr); +} + +inline void PredictLearnAll(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, const int y, + const Channel &references, + weighted::State *wp_state, + pixel_type_w *predictions) { + detail::Predict( + p, w, pp, onerow, x, y, Predictor::Zero, + /*lookup=*/nullptr, &references, wp_state, predictions); +} +inline PredictionResult PredictLearnNEC(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, + const int y, Predictor predictor, + const Channel &references, + weighted::State *wp_state) { + return detail::Predict( + p, w, pp, onerow, x, y, predictor, /*lookup=*/nullptr, &references, + wp_state, /*predictions=*/nullptr); +} + +inline void PredictLearnAllNEC(Properties *p, size_t w, + const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, const int y, + const Channel &references, + weighted::State *wp_state, + pixel_type_w *predictions) { + detail::Predict( + p, w, pp, onerow, x, y, Predictor::Zero, + /*lookup=*/nullptr, &references, wp_state, predictions); +} + +inline void PredictAllNoWP(size_t w, const pixel_type *JXL_RESTRICT pp, + const intptr_t onerow, const int x, const int y, + pixel_type_w *predictions) { + detail::Predict( + /*p=*/nullptr, w, pp, onerow, x, y, Predictor::Zero, + /*lookup=*/nullptr, + /*references=*/nullptr, /*wp_state=*/nullptr, predictions); +} +} // namespace jxl + +#endif // LIB_JXL_MODULAR_ENCODING_CONTEXT_PREDICT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.cc new file mode 100644 index 0000000000..66562f7dfd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.cc @@ -0,0 +1,107 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/encoding/dec_ma.h" + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/modular/encoding/ma_common.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +namespace { + +Status ValidateTree( + const Tree &tree, + const std::vector> &prop_bounds, + size_t root) { + if (tree[root].property == -1) return true; + size_t p = tree[root].property; + int val = tree[root].splitval; + if (prop_bounds[p].first > val) return JXL_FAILURE("Invalid tree"); + // Splitting at max value makes no sense: left range will be exactly same + // as parent, right range will be invalid (min > max). + if (prop_bounds[p].second <= val) return JXL_FAILURE("Invalid tree"); + auto new_bounds = prop_bounds; + new_bounds[p].first = val + 1; + JXL_RETURN_IF_ERROR(ValidateTree(tree, new_bounds, tree[root].lchild)); + new_bounds[p] = prop_bounds[p]; + new_bounds[p].second = val; + return ValidateTree(tree, new_bounds, tree[root].rchild); +} + +Status DecodeTree(BitReader *br, ANSSymbolReader *reader, + const std::vector &context_map, Tree *tree, + size_t tree_size_limit) { + size_t leaf_id = 0; + size_t to_decode = 1; + tree->clear(); + while (to_decode > 0) { + JXL_RETURN_IF_ERROR(br->AllReadsWithinBounds()); + if (tree->size() > tree_size_limit) { + return JXL_FAILURE("Tree is too large: %" PRIuS " nodes vs %" PRIuS + " max nodes", + tree->size(), tree_size_limit); + } + to_decode--; + uint32_t prop1 = reader->ReadHybridUint(kPropertyContext, br, context_map); + if (prop1 > 256) return JXL_FAILURE("Invalid tree property value"); + int property = prop1 - 1; + if (property == -1) { + size_t predictor = + reader->ReadHybridUint(kPredictorContext, br, context_map); + if (predictor >= kNumModularPredictors) { + return JXL_FAILURE("Invalid predictor"); + } + int64_t predictor_offset = + UnpackSigned(reader->ReadHybridUint(kOffsetContext, br, context_map)); + uint32_t mul_log = + reader->ReadHybridUint(kMultiplierLogContext, br, context_map); + if (mul_log >= 31) { + return JXL_FAILURE("Invalid multiplier logarithm"); + } + uint32_t mul_bits = + reader->ReadHybridUint(kMultiplierBitsContext, br, context_map); + if (mul_bits + 1 >= 1u << (31u - mul_log)) { + return JXL_FAILURE("Invalid multiplier"); + } + uint32_t multiplier = (mul_bits + 1U) << mul_log; + tree->emplace_back(-1, 0, leaf_id++, 0, static_cast(predictor), + predictor_offset, multiplier); + continue; + } + int splitval = + UnpackSigned(reader->ReadHybridUint(kSplitValContext, br, context_map)); + tree->emplace_back(property, splitval, tree->size() + to_decode + 1, + tree->size() + to_decode + 2, Predictor::Zero, 0, 1); + to_decode += 2; + } + std::vector> prop_bounds; + prop_bounds.resize(256, {std::numeric_limits::min(), + std::numeric_limits::max()}); + return ValidateTree(*tree, prop_bounds, 0); +} +} // namespace + +Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit) { + std::vector tree_context_map; + ANSCode tree_code; + JXL_RETURN_IF_ERROR( + DecodeHistograms(br, kNumTreeContexts, &tree_code, &tree_context_map)); + // TODO(eustas): investigate more infinite tree cases. + if (tree_code.degenerate_symbols[tree_context_map[kPropertyContext]] > 0) { + return JXL_FAILURE("Infinite tree"); + } + ANSSymbolReader reader(&tree_code, br); + JXL_RETURN_IF_ERROR(DecodeTree(br, &reader, tree_context_map, tree, + std::min(tree_size_limit, kMaxTreeSize))); + if (!reader.CheckANSFinalState()) { + return JXL_FAILURE("ANS decode final state failed"); + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.h new file mode 100644 index 0000000000..a910c4deb1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/dec_ma.h @@ -0,0 +1,66 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_DEC_MA_H_ +#define LIB_JXL_MODULAR_ENCODING_DEC_MA_H_ + +#include +#include + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +// inner nodes +struct PropertyDecisionNode { + PropertyVal splitval; + int16_t property; // -1: leaf node, lchild points to leaf node + uint32_t lchild; + uint32_t rchild; + Predictor predictor; + int64_t predictor_offset; + uint32_t multiplier; + + PropertyDecisionNode(int p, int split_val, int lchild, int rchild, + Predictor predictor, int64_t predictor_offset, + uint32_t multiplier) + : splitval(split_val), + property(p), + lchild(lchild), + rchild(rchild), + predictor(predictor), + predictor_offset(predictor_offset), + multiplier(multiplier) {} + PropertyDecisionNode() + : splitval(0), + property(-1), + lchild(0), + rchild(0), + predictor(Predictor::Zero), + predictor_offset(0), + multiplier(1) {} + static PropertyDecisionNode Leaf(Predictor predictor, int64_t offset = 0, + uint32_t multiplier = 1) { + return PropertyDecisionNode(-1, 0, 0, 0, predictor, offset, multiplier); + } + static PropertyDecisionNode Split(int p, int split_val, int lchild, + int rchild = -1) { + if (rchild == -1) rchild = lchild + 1; + return PropertyDecisionNode(p, split_val, lchild, rchild, Predictor::Zero, + 0, 1); + } +}; + +using Tree = std::vector; + +Status DecodeTree(BitReader *br, Tree *tree, size_t tree_size_limit); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_ENCODING_DEC_MA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.cc new file mode 100644 index 0000000000..f2a1705e4b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.cc @@ -0,0 +1,124 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/encoding/enc_debug_tree.h" + +#include +#include + +#include "lib/jxl/base/os_macros.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/encoding/dec_ma.h" +#include "lib/jxl/modular/options.h" + +#if JXL_OS_IOS +#define JXL_ENABLE_DOT 0 +#else +#define JXL_ENABLE_DOT 1 // iOS lacks C89 system() +#endif + +namespace jxl { + +const char *PredictorName(Predictor p) { + switch (p) { + case Predictor::Zero: + return "Zero"; + case Predictor::Left: + return "Left"; + case Predictor::Top: + return "Top"; + case Predictor::Average0: + return "Avg0"; + case Predictor::Average1: + return "Avg1"; + case Predictor::Average2: + return "Avg2"; + case Predictor::Average3: + return "Avg3"; + case Predictor::Average4: + return "Avg4"; + case Predictor::Select: + return "Sel"; + case Predictor::Gradient: + return "Grd"; + case Predictor::Weighted: + return "Wgh"; + case Predictor::TopLeft: + return "TopL"; + case Predictor::TopRight: + return "TopR"; + case Predictor::LeftLeft: + return "LL"; + default: + return "INVALID"; + }; +} + +std::string PropertyName(size_t i) { + static_assert(kNumNonrefProperties == 16, "Update this function"); + switch (i) { + case 0: + return "c"; + case 1: + return "g"; + case 2: + return "y"; + case 3: + return "x"; + case 4: + return "|N|"; + case 5: + return "|W|"; + case 6: + return "N"; + case 7: + return "W"; + case 8: + return "W-WW-NW+NWW"; + case 9: + return "W+N-NW"; + case 10: + return "W-NW"; + case 11: + return "NW-N"; + case 12: + return "N-NE"; + case 13: + return "N-NN"; + case 14: + return "W-WW"; + case 15: + return "WGH"; + default: + return "ch[" + ToString(15 - (int)i) + "]"; + } +} + +void PrintTree(const Tree &tree, const std::string &path) { + FILE *f = fopen((path + ".dot").c_str(), "w"); + fprintf(f, "graph{\n"); + for (size_t cur = 0; cur < tree.size(); cur++) { + if (tree[cur].property < 0) { + fprintf(f, "n%05" PRIuS " [label=\"%s%+" PRId64 " (x%u)\"];\n", cur, + PredictorName(tree[cur].predictor), tree[cur].predictor_offset, + tree[cur].multiplier); + } else { + fprintf(f, "n%05" PRIuS " [label=\"%s>%d\"];\n", cur, + PropertyName(tree[cur].property).c_str(), tree[cur].splitval); + fprintf(f, "n%05" PRIuS " -- n%05d;\n", cur, tree[cur].lchild); + fprintf(f, "n%05" PRIuS " -- n%05d;\n", cur, tree[cur].rchild); + } + } + fprintf(f, "}\n"); + fclose(f); +#if JXL_ENABLE_DOT + JXL_ASSERT( + system(("dot " + path + ".dot -T svg -o " + path + ".svg").c_str()) == 0); +#endif +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.h new file mode 100644 index 0000000000..78deaab1b8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_debug_tree.h @@ -0,0 +1,27 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_ +#define LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_ + +#include +#include + +#include +#include + +#include "lib/jxl/modular/encoding/dec_ma.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +const char *PredictorName(Predictor p); +std::string PropertyName(size_t i); + +void PrintTree(const Tree &tree, const std::string &path); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_ENCODING_ENC_DEBUG_TREE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.cc new file mode 100644 index 0000000000..12a9774b81 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.cc @@ -0,0 +1,599 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/encoding/enc_debug_tree.h" +#include "lib/jxl/modular/encoding/enc_ma.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/encoding/ma_common.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/modular/transform/transform.h" +#include "lib/jxl/toc.h" + +namespace jxl { + +namespace { +// Plot tree (if enabled) and predictor usage map. +constexpr bool kWantDebug = true; +// constexpr bool kPrintTree = false; + +inline std::array PredictorColor(Predictor p) { + switch (p) { + case Predictor::Zero: + return {{0, 0, 0}}; + case Predictor::Left: + return {{255, 0, 0}}; + case Predictor::Top: + return {{0, 255, 0}}; + case Predictor::Average0: + return {{0, 0, 255}}; + case Predictor::Average4: + return {{192, 128, 128}}; + case Predictor::Select: + return {{255, 255, 0}}; + case Predictor::Gradient: + return {{255, 0, 255}}; + case Predictor::Weighted: + return {{0, 255, 255}}; + // TODO + default: + return {{255, 255, 255}}; + }; +} + +} // namespace + +void GatherTreeData(const Image &image, pixel_type chan, size_t group_id, + const weighted::Header &wp_header, + const ModularOptions &options, TreeSamples &tree_samples, + size_t *total_pixels) { + const Channel &channel = image.channel[chan]; + + JXL_DEBUG_V(7, "Learning %" PRIuS "x%" PRIuS " channel %d", channel.w, + channel.h, chan); + + std::array static_props = { + {chan, (int)group_id}}; + Properties properties(kNumNonrefProperties + + kExtraPropsPerChannel * options.max_properties); + double pixel_fraction = std::min(1.0f, options.nb_repeats); + // a fraction of 0 is used to disable learning entirely. + if (pixel_fraction > 0) { + pixel_fraction = std::max(pixel_fraction, + std::min(1.0, 1024.0 / (channel.w * channel.h))); + } + uint64_t threshold = + (std::numeric_limits::max() >> 32) * pixel_fraction; + uint64_t s[2] = {static_cast(0x94D049BB133111EBull), + static_cast(0xBF58476D1CE4E5B9ull)}; + // Xorshift128+ adapted from xorshift128+-inl.h + auto use_sample = [&]() { + auto s1 = s[0]; + const auto s0 = s[1]; + const auto bits = s1 + s0; // b, c + s[0] = s0; + s1 ^= s1 << 23; + s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); + s[1] = s1; + return (bits >> 32) <= threshold; + }; + + const intptr_t onerow = channel.plane.PixelsPerRow(); + Channel references(properties.size() - kNumNonrefProperties, channel.w); + weighted::State wp_state(wp_header, channel.w, channel.h); + tree_samples.PrepareForSamples(pixel_fraction * channel.h * channel.w + 64); + const bool multiple_predictors = tree_samples.NumPredictors() != 1; + auto compute_sample = [&](const pixel_type *p, size_t x, size_t y) { + pixel_type_w pred[kNumModularPredictors]; + if (multiple_predictors) { + PredictLearnAll(&properties, channel.w, p + x, onerow, x, y, references, + &wp_state, pred); + } else { + pred[static_cast(tree_samples.PredictorFromIndex(0))] = + PredictLearn(&properties, channel.w, p + x, onerow, x, y, + tree_samples.PredictorFromIndex(0), references, + &wp_state) + .guess; + } + (*total_pixels)++; + if (use_sample()) { + tree_samples.AddSample(p[x], properties, pred); + } + wp_state.UpdateErrors(p[x], x, y, channel.w); + }; + + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT p = channel.Row(y); + PrecomputeReferences(channel, y, image, chan, &references); + InitPropsRow(&properties, static_props, y); + + // TODO(veluca): avoid computing WP if we don't use its property or + // predictions. + if (y > 1 && channel.w > 8 && references.w == 0) { + for (size_t x = 0; x < 2; x++) { + compute_sample(p, x, y); + } + for (size_t x = 2; x < channel.w - 2; x++) { + pixel_type_w pred[kNumModularPredictors]; + if (multiple_predictors) { + PredictLearnAllNEC(&properties, channel.w, p + x, onerow, x, y, + references, &wp_state, pred); + } else { + pred[static_cast(tree_samples.PredictorFromIndex(0))] = + PredictLearnNEC(&properties, channel.w, p + x, onerow, x, y, + tree_samples.PredictorFromIndex(0), references, + &wp_state) + .guess; + } + (*total_pixels)++; + if (use_sample()) { + tree_samples.AddSample(p[x], properties, pred); + } + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + for (size_t x = channel.w - 2; x < channel.w; x++) { + compute_sample(p, x, y); + } + } else { + for (size_t x = 0; x < channel.w; x++) { + compute_sample(p, x, y); + } + } + } +} + +Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels, + const ModularOptions &options, + const std::vector &multiplier_info = {}, + StaticPropRange static_prop_range = {}) { + for (size_t i = 0; i < kNumStaticProperties; i++) { + if (static_prop_range[i][1] == 0) { + static_prop_range[i][1] = std::numeric_limits::max(); + } + } + if (!tree_samples.HasSamples()) { + Tree tree; + tree.emplace_back(); + tree.back().predictor = tree_samples.PredictorFromIndex(0); + tree.back().property = -1; + tree.back().predictor_offset = 0; + tree.back().multiplier = 1; + return tree; + } + float pixel_fraction = tree_samples.NumSamples() * 1.0f / total_pixels; + float required_cost = pixel_fraction * 0.9 + 0.1; + tree_samples.AllSamplesDone(); + Tree tree; + ComputeBestTree(tree_samples, + options.splitting_heuristics_node_threshold * required_cost, + multiplier_info, static_prop_range, + options.fast_decode_multiplier, &tree); + return tree; +} + +Status EncodeModularChannelMAANS(const Image &image, pixel_type chan, + const weighted::Header &wp_header, + const Tree &global_tree, Token **tokenpp, + AuxOut *aux_out, size_t group_id, + bool skip_encoder_fast_path) { + const Channel &channel = image.channel[chan]; + Token *tokenp = *tokenpp; + JXL_ASSERT(channel.w != 0 && channel.h != 0); + + Image3F predictor_img; + if (kWantDebug) predictor_img = Image3F(channel.w, channel.h); + + JXL_DEBUG_V(6, + "Encoding %" PRIuS "x%" PRIuS + " channel %d, " + "(shift=%i,%i)", + channel.w, channel.h, chan, channel.hshift, channel.vshift); + + std::array static_props = { + {chan, (int)group_id}}; + bool use_wp, is_wp_only; + bool is_gradient_only; + size_t num_props; + FlatTree tree = FilterTree(global_tree, static_props, &num_props, &use_wp, + &is_wp_only, &is_gradient_only); + Properties properties(num_props); + MATreeLookup tree_lookup(tree); + JXL_DEBUG_V(3, "Encoding using a MA tree with %" PRIuS " nodes", tree.size()); + + // Check if this tree is a WP-only tree with a small enough property value + // range. + // Initialized to avoid clang-tidy complaining. + auto tree_lut = jxl::make_unique>(); + if (is_wp_only) { + is_wp_only = TreeToLookupTable(tree, *tree_lut); + } + if (is_gradient_only) { + is_gradient_only = TreeToLookupTable(tree, *tree_lut); + } + + if (is_wp_only && !skip_encoder_fast_path) { + for (size_t c = 0; c < 3; c++) { + FillImage(static_cast(PredictorColor(Predictor::Weighted)[c]), + &predictor_img.Plane(c)); + } + const intptr_t onerow = channel.plane.PixelsPerRow(); + weighted::State wp_state(wp_header, channel.w, channel.h); + Properties properties(1); + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + size_t offset = 0; + pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type_w top = (y ? *(r + x - onerow) : left); + pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); + pixel_type_w topright = + (x + 1 < channel.w && y ? *(r + x + 1 - onerow) : top); + pixel_type_w toptop = (y > 1 ? *(r + x - onerow - onerow) : top); + int32_t guess = wp_state.Predict( + x, y, channel.w, top, left, topright, topleft, toptop, &properties, + offset); + uint32_t pos = + kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]), + kPropRangeFast - 1); + uint32_t ctx_id = tree_lut->context_lookup[pos]; + int32_t residual = r[x] - guess - tree_lut->offsets[pos]; + *tokenp++ = Token(ctx_id, PackSigned(residual)); + wp_state.UpdateErrors(r[x], x, y, channel.w); + } + } + } else if (tree.size() == 1 && tree[0].predictor == Predictor::Gradient && + tree[0].multiplier == 1 && tree[0].predictor_offset == 0 && + !skip_encoder_fast_path) { + for (size_t c = 0; c < 3; c++) { + FillImage(static_cast(PredictorColor(Predictor::Gradient)[c]), + &predictor_img.Plane(c)); + } + const intptr_t onerow = channel.plane.PixelsPerRow(); + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type_w top = (y ? *(r + x - onerow) : left); + pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); + int32_t guess = ClampedGradient(top, left, topleft); + int32_t residual = r[x] - guess; + *tokenp++ = Token(tree[0].childID, PackSigned(residual)); + } + } + } else if (is_gradient_only && !skip_encoder_fast_path) { + for (size_t c = 0; c < 3; c++) { + FillImage(static_cast(PredictorColor(Predictor::Gradient)[c]), + &predictor_img.Plane(c)); + } + const intptr_t onerow = channel.plane.PixelsPerRow(); + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type_w top = (y ? *(r + x - onerow) : left); + pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); + int32_t guess = ClampedGradient(top, left, topleft); + uint32_t pos = + kPropRangeFast + + std::min( + std::max(-kPropRangeFast, top + left - topleft), + kPropRangeFast - 1); + uint32_t ctx_id = tree_lut->context_lookup[pos]; + int32_t residual = r[x] - guess - tree_lut->offsets[pos]; + *tokenp++ = Token(ctx_id, PackSigned(residual)); + } + } + } else if (tree.size() == 1 && tree[0].predictor == Predictor::Zero && + tree[0].multiplier == 1 && tree[0].predictor_offset == 0 && + !skip_encoder_fast_path) { + for (size_t c = 0; c < 3; c++) { + FillImage(static_cast(PredictorColor(Predictor::Zero)[c]), + &predictor_img.Plane(c)); + } + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT p = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + *tokenp++ = Token(tree[0].childID, PackSigned(p[x])); + } + } + } else if (tree.size() == 1 && tree[0].predictor != Predictor::Weighted && + (tree[0].multiplier & (tree[0].multiplier - 1)) == 0 && + tree[0].predictor_offset == 0 && !skip_encoder_fast_path) { + // multiplier is a power of 2. + for (size_t c = 0; c < 3; c++) { + FillImage(static_cast(PredictorColor(tree[0].predictor)[c]), + &predictor_img.Plane(c)); + } + uint32_t mul_shift = FloorLog2Nonzero((uint32_t)tree[0].multiplier); + const intptr_t onerow = channel.plane.PixelsPerRow(); + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + PredictionResult pred = PredictNoTreeNoWP(channel.w, r + x, onerow, x, + y, tree[0].predictor); + pixel_type_w residual = r[x] - pred.guess; + JXL_DASSERT((residual >> mul_shift) * tree[0].multiplier == residual); + *tokenp++ = Token(tree[0].childID, PackSigned(residual >> mul_shift)); + } + } + + } else if (!use_wp && !skip_encoder_fast_path) { + const intptr_t onerow = channel.plane.PixelsPerRow(); + Channel references(properties.size() - kNumNonrefProperties, channel.w); + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT p = channel.Row(y); + PrecomputeReferences(channel, y, image, chan, &references); + float *pred_img_row[3]; + if (kWantDebug) { + for (size_t c = 0; c < 3; c++) { + pred_img_row[c] = predictor_img.PlaneRow(c, y); + } + } + InitPropsRow(&properties, static_props, y); + for (size_t x = 0; x < channel.w; x++) { + PredictionResult res = + PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references); + if (kWantDebug) { + for (size_t i = 0; i < 3; i++) { + pred_img_row[i][x] = PredictorColor(res.predictor)[i]; + } + } + pixel_type_w residual = p[x] - res.guess; + JXL_DASSERT(residual % res.multiplier == 0); + *tokenp++ = Token(res.context, PackSigned(residual / res.multiplier)); + } + } + } else { + const intptr_t onerow = channel.plane.PixelsPerRow(); + Channel references(properties.size() - kNumNonrefProperties, channel.w); + weighted::State wp_state(wp_header, channel.w, channel.h); + for (size_t y = 0; y < channel.h; y++) { + const pixel_type *JXL_RESTRICT p = channel.Row(y); + PrecomputeReferences(channel, y, image, chan, &references); + float *pred_img_row[3]; + if (kWantDebug) { + for (size_t c = 0; c < 3; c++) { + pred_img_row[c] = predictor_img.PlaneRow(c, y); + } + } + InitPropsRow(&properties, static_props, y); + for (size_t x = 0; x < channel.w; x++) { + PredictionResult res = + PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references, &wp_state); + if (kWantDebug) { + for (size_t i = 0; i < 3; i++) { + pred_img_row[i][x] = PredictorColor(res.predictor)[i]; + } + } + pixel_type_w residual = p[x] - res.guess; + JXL_DASSERT(residual % res.multiplier == 0); + *tokenp++ = Token(res.context, PackSigned(residual / res.multiplier)); + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + } + } + /* TODO(szabadka): Add cparams to the call stack here. + if (kWantDebug && WantDebugOutput(cparams)) { + DumpImage( + cparams, + ("pred_" + ToString(group_id) + "_" + ToString(chan)).c_str(), + predictor_img); + } + */ + *tokenpp = tokenp; + return true; +} + +Status ModularEncode(const Image &image, const ModularOptions &options, + BitWriter *writer, AuxOut *aux_out, size_t layer, + size_t group_id, TreeSamples *tree_samples, + size_t *total_pixels, const Tree *tree, + GroupHeader *header, std::vector *tokens, + size_t *width) { + if (image.error) return JXL_FAILURE("Invalid image"); + size_t nb_channels = image.channel.size(); + JXL_DEBUG_V( + 2, "Encoding %" PRIuS "-channel, %i-bit, %" PRIuS "x%" PRIuS " image.", + nb_channels, image.bitdepth, image.w, image.h); + + if (nb_channels < 1) { + return true; // is there any use for a zero-channel image? + } + + // encode transforms + GroupHeader header_storage; + if (header == nullptr) header = &header_storage; + Bundle::Init(header); + if (options.predictor == Predictor::Weighted) { + weighted::PredictorMode(options.wp_mode, &header->wp_header); + } + header->transforms = image.transform; + // This doesn't actually work + if (tree != nullptr) { + header->use_global_tree = true; + } + if (tree_samples == nullptr && tree == nullptr) { + JXL_RETURN_IF_ERROR(Bundle::Write(*header, writer, layer, aux_out)); + } + + TreeSamples tree_samples_storage; + size_t total_pixels_storage = 0; + if (!total_pixels) total_pixels = &total_pixels_storage; + // If there's no tree, compute one (or gather data to). + if (tree == nullptr) { + bool gather_data = tree_samples != nullptr; + if (tree_samples == nullptr) { + JXL_RETURN_IF_ERROR(tree_samples_storage.SetPredictor( + options.predictor, options.wp_tree_mode)); + JXL_RETURN_IF_ERROR(tree_samples_storage.SetProperties( + options.splitting_heuristics_properties, options.wp_tree_mode)); + std::vector pixel_samples; + std::vector diff_samples; + std::vector group_pixel_count; + std::vector channel_pixel_count; + CollectPixelSamples(image, options, 0, group_pixel_count, + channel_pixel_count, pixel_samples, diff_samples); + std::vector dummy_multiplier_info; + StaticPropRange range; + tree_samples_storage.PreQuantizeProperties( + range, dummy_multiplier_info, group_pixel_count, channel_pixel_count, + pixel_samples, diff_samples, options.max_property_values); + } + for (size_t i = 0; i < nb_channels; i++) { + if (!image.channel[i].w || !image.channel[i].h) { + continue; // skip empty channels + } + if (i >= image.nb_meta_channels && + (image.channel[i].w > options.max_chan_size || + image.channel[i].h > options.max_chan_size)) { + break; + } + GatherTreeData(image, i, group_id, header->wp_header, options, + gather_data ? *tree_samples : tree_samples_storage, + total_pixels); + } + if (gather_data) return true; + } + + JXL_ASSERT((tree == nullptr) == (tokens == nullptr)); + + Tree tree_storage; + std::vector> tokens_storage(1); + // Compute tree. + if (tree == nullptr) { + EntropyEncodingData code; + std::vector context_map; + + std::vector> tree_tokens(1); + tree_storage = + LearnTree(std::move(tree_samples_storage), *total_pixels, options); + tree = &tree_storage; + tokens = &tokens_storage[0]; + + Tree decoded_tree; + TokenizeTree(*tree, &tree_tokens[0], &decoded_tree); + JXL_ASSERT(tree->size() == decoded_tree.size()); + tree_storage = std::move(decoded_tree); + + /* TODO(szabadka) Add text output callback + if (kWantDebug && kPrintTree && WantDebugOutput(aux_out)) { + PrintTree(*tree, aux_out->debug_prefix + "/tree_" + ToString(group_id)); + } */ + + // Write tree + BuildAndEncodeHistograms(HistogramParams(), kNumTreeContexts, tree_tokens, + &code, &context_map, writer, kLayerModularTree, + aux_out); + WriteTokens(tree_tokens[0], code, context_map, writer, kLayerModularTree, + aux_out); + } + + size_t image_width = 0; + size_t total_tokens = 0; + for (size_t i = 0; i < nb_channels; i++) { + if (i >= image.nb_meta_channels && + (image.channel[i].w > options.max_chan_size || + image.channel[i].h > options.max_chan_size)) { + break; + } + if (image.channel[i].w > image_width) image_width = image.channel[i].w; + total_tokens += image.channel[i].w * image.channel[i].h; + } + if (options.zero_tokens) { + tokens->resize(tokens->size() + total_tokens, {0, 0}); + } else { + // Do one big allocation for all the tokens we'll need, + // to avoid reallocs that might require copying. + size_t pos = tokens->size(); + tokens->resize(pos + total_tokens); + Token *tokenp = tokens->data() + pos; + for (size_t i = 0; i < nb_channels; i++) { + if (!image.channel[i].w || !image.channel[i].h) { + continue; // skip empty channels + } + if (i >= image.nb_meta_channels && + (image.channel[i].w > options.max_chan_size || + image.channel[i].h > options.max_chan_size)) { + break; + } + JXL_RETURN_IF_ERROR(EncodeModularChannelMAANS( + image, i, header->wp_header, *tree, &tokenp, aux_out, group_id, + options.skip_encoder_fast_path)); + } + // Make sure we actually wrote all tokens + JXL_CHECK(tokenp == tokens->data() + tokens->size()); + } + + // Write data if not using a global tree/ANS stream. + if (!header->use_global_tree) { + EntropyEncodingData code; + std::vector context_map; + HistogramParams histo_params; + histo_params.image_widths.push_back(image_width); + BuildAndEncodeHistograms(histo_params, (tree->size() + 1) / 2, + tokens_storage, &code, &context_map, writer, layer, + aux_out); + WriteTokens(tokens_storage[0], code, context_map, writer, layer, aux_out); + } else { + *width = image_width; + } + return true; +} + +Status ModularGenericCompress(Image &image, const ModularOptions &opts, + BitWriter *writer, AuxOut *aux_out, size_t layer, + size_t group_id, TreeSamples *tree_samples, + size_t *total_pixels, const Tree *tree, + GroupHeader *header, std::vector *tokens, + size_t *width) { + if (image.w == 0 || image.h == 0) return true; + ModularOptions options = opts; // Make a copy to modify it. + + if (options.predictor == static_cast(-1)) { + options.predictor = Predictor::Gradient; + } + + size_t bits = writer ? writer->BitsWritten() : 0; + JXL_RETURN_IF_ERROR(ModularEncode(image, options, writer, aux_out, layer, + group_id, tree_samples, total_pixels, tree, + header, tokens, width)); + bits = writer ? writer->BitsWritten() - bits : 0; + if (writer) { + JXL_DEBUG_V(4, + "Modular-encoded a %" PRIuS "x%" PRIuS + " bitdepth=%i nbchans=%" PRIuS " image in %" PRIuS " bytes", + image.w, image.h, image.bitdepth, image.channel.size(), + bits / 8); + } + (void)bits; + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.h new file mode 100644 index 0000000000..04df504750 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_encoding.h @@ -0,0 +1,47 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_ +#define LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_ + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/enc_bit_writer.h" +#include "lib/jxl/image.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/encoding/enc_ma.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +Tree LearnTree(TreeSamples &&tree_samples, size_t total_pixels, + const ModularOptions &options, + const std::vector &multiplier_info = {}, + StaticPropRange static_prop_range = {}); + +// TODO(veluca): make cleaner interfaces. + +Status ModularGenericCompress( + Image &image, const ModularOptions &opts, BitWriter *writer, + AuxOut *aux_out = nullptr, size_t layer = 0, size_t group_id = 0, + // For gathering data for producing a global tree. + TreeSamples *tree_samples = nullptr, size_t *total_pixels = nullptr, + // For encoding with global tree. + const Tree *tree = nullptr, GroupHeader *header = nullptr, + std::vector *tokens = nullptr, size_t *widths = nullptr); +} // namespace jxl + +#endif // LIB_JXL_MODULAR_ENCODING_ENC_ENCODING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.cc new file mode 100644 index 0000000000..72b027906d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.cc @@ -0,0 +1,1011 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/encoding/enc_ma.h" + +#include +#include +#include +#include +#include +#include + +#include "lib/jxl/modular/encoding/ma_common.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/modular/encoding/enc_ma.cc" +#include +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/fast_math-inl.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/options.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Eq; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::Max; + +const HWY_FULL(float) df; +const HWY_FULL(int32_t) di; +size_t Padded(size_t x) { return RoundUpTo(x, Lanes(df)); } + +// Compute entropy of the histogram, taking into account the minimum probability +// for symbols with non-zero counts. +float EstimateBits(const int32_t *counts, size_t num_symbols) { + int32_t total = std::accumulate(counts, counts + num_symbols, 0); + const auto zero = Zero(df); + const auto minprob = Set(df, 1.0f / ANS_TAB_SIZE); + const auto inv_total = Set(df, 1.0f / total); + auto bits_lanes = Zero(df); + auto total_v = Set(di, total); + for (size_t i = 0; i < num_symbols; i += Lanes(df)) { + const auto counts_iv = LoadU(di, &counts[i]); + const auto counts_fv = ConvertTo(df, counts_iv); + const auto probs = Mul(counts_fv, inv_total); + const auto mprobs = Max(probs, minprob); + const auto nbps = IfThenElse(Eq(counts_iv, total_v), BitCast(di, zero), + BitCast(di, FastLog2f(df, mprobs))); + bits_lanes = Sub(bits_lanes, Mul(counts_fv, BitCast(df, nbps))); + } + return GetLane(SumOfLanes(df, bits_lanes)); +} + +void MakeSplitNode(size_t pos, int property, int splitval, Predictor lpred, + int64_t loff, Predictor rpred, int64_t roff, Tree *tree) { + // Note that the tree splits on *strictly greater*. + (*tree)[pos].lchild = tree->size(); + (*tree)[pos].rchild = tree->size() + 1; + (*tree)[pos].splitval = splitval; + (*tree)[pos].property = property; + tree->emplace_back(); + tree->back().property = -1; + tree->back().predictor = rpred; + tree->back().predictor_offset = roff; + tree->back().multiplier = 1; + tree->emplace_back(); + tree->back().property = -1; + tree->back().predictor = lpred; + tree->back().predictor_offset = loff; + tree->back().multiplier = 1; +} + +enum class IntersectionType { kNone, kPartial, kInside }; +IntersectionType BoxIntersects(StaticPropRange needle, StaticPropRange haystack, + uint32_t &partial_axis, uint32_t &partial_val) { + bool partial = false; + for (size_t i = 0; i < kNumStaticProperties; i++) { + if (haystack[i][0] >= needle[i][1]) { + return IntersectionType::kNone; + } + if (haystack[i][1] <= needle[i][0]) { + return IntersectionType::kNone; + } + if (haystack[i][0] <= needle[i][0] && haystack[i][1] >= needle[i][1]) { + continue; + } + partial = true; + partial_axis = i; + if (haystack[i][0] > needle[i][0] && haystack[i][0] < needle[i][1]) { + partial_val = haystack[i][0] - 1; + } else { + JXL_DASSERT(haystack[i][1] > needle[i][0] && + haystack[i][1] < needle[i][1]); + partial_val = haystack[i][1] - 1; + } + } + return partial ? IntersectionType::kPartial : IntersectionType::kInside; +} + +void SplitTreeSamples(TreeSamples &tree_samples, size_t begin, size_t pos, + size_t end, size_t prop) { + auto cmp = [&](size_t a, size_t b) { + return int32_t(tree_samples.Property(prop, a)) - + int32_t(tree_samples.Property(prop, b)); + }; + Rng rng(0); + while (end > begin + 1) { + { + size_t pivot = rng.UniformU(begin, end); + tree_samples.Swap(begin, pivot); + } + size_t pivot_begin = begin; + size_t pivot_end = pivot_begin + 1; + for (size_t i = begin + 1; i < end; i++) { + JXL_DASSERT(i >= pivot_end); + JXL_DASSERT(pivot_end > pivot_begin); + int32_t cmp_result = cmp(i, pivot_begin); + if (cmp_result < 0) { // i < pivot, move pivot forward and put i before + // the pivot. + tree_samples.ThreeShuffle(pivot_begin, pivot_end, i); + pivot_begin++; + pivot_end++; + } else if (cmp_result == 0) { + tree_samples.Swap(pivot_end, i); + pivot_end++; + } + } + JXL_DASSERT(pivot_begin >= begin); + JXL_DASSERT(pivot_end > pivot_begin); + JXL_DASSERT(pivot_end <= end); + for (size_t i = begin; i < pivot_begin; i++) { + JXL_DASSERT(cmp(i, pivot_begin) < 0); + } + for (size_t i = pivot_end; i < end; i++) { + JXL_DASSERT(cmp(i, pivot_begin) > 0); + } + for (size_t i = pivot_begin; i < pivot_end; i++) { + JXL_DASSERT(cmp(i, pivot_begin) == 0); + } + // We now have that [begin, pivot_begin) is < pivot, [pivot_begin, + // pivot_end) is = pivot, and [pivot_end, end) is > pivot. + // If pos falls in the first or the last interval, we continue in that + // interval; otherwise, we are done. + if (pivot_begin > pos) { + end = pivot_begin; + } else if (pivot_end < pos) { + begin = pivot_end; + } else { + break; + } + } +} + +void FindBestSplit(TreeSamples &tree_samples, float threshold, + const std::vector &mul_info, + StaticPropRange initial_static_prop_range, + float fast_decode_multiplier, Tree *tree) { + struct NodeInfo { + size_t pos; + size_t begin; + size_t end; + uint64_t used_properties; + StaticPropRange static_prop_range; + }; + std::vector nodes; + nodes.push_back(NodeInfo{0, 0, tree_samples.NumDistinctSamples(), 0, + initial_static_prop_range}); + + size_t num_predictors = tree_samples.NumPredictors(); + size_t num_properties = tree_samples.NumProperties(); + + // TODO(veluca): consider parallelizing the search (processing multiple nodes + // at a time). + while (!nodes.empty()) { + size_t pos = nodes.back().pos; + size_t begin = nodes.back().begin; + size_t end = nodes.back().end; + uint64_t used_properties = nodes.back().used_properties; + StaticPropRange static_prop_range = nodes.back().static_prop_range; + nodes.pop_back(); + if (begin == end) continue; + + struct SplitInfo { + size_t prop = 0; + uint32_t val = 0; + size_t pos = 0; + float lcost = std::numeric_limits::max(); + float rcost = std::numeric_limits::max(); + Predictor lpred = Predictor::Zero; + Predictor rpred = Predictor::Zero; + float Cost() { return lcost + rcost; } + }; + + SplitInfo best_split_static_constant; + SplitInfo best_split_static; + SplitInfo best_split_nonstatic; + SplitInfo best_split_nowp; + + JXL_DASSERT(begin <= end); + JXL_DASSERT(end <= tree_samples.NumDistinctSamples()); + + // Compute the maximum token in the range. + size_t max_symbols = 0; + for (size_t pred = 0; pred < num_predictors; pred++) { + for (size_t i = begin; i < end; i++) { + uint32_t tok = tree_samples.Token(pred, i); + max_symbols = max_symbols > tok + 1 ? max_symbols : tok + 1; + } + } + max_symbols = Padded(max_symbols); + std::vector counts(max_symbols * num_predictors); + std::vector tot_extra_bits(num_predictors); + for (size_t pred = 0; pred < num_predictors; pred++) { + for (size_t i = begin; i < end; i++) { + counts[pred * max_symbols + tree_samples.Token(pred, i)] += + tree_samples.Count(i); + tot_extra_bits[pred] += + tree_samples.NBits(pred, i) * tree_samples.Count(i); + } + } + + float base_bits; + { + size_t pred = tree_samples.PredictorIndex((*tree)[pos].predictor); + base_bits = + EstimateBits(counts.data() + pred * max_symbols, max_symbols) + + tot_extra_bits[pred]; + } + + SplitInfo *best = &best_split_nonstatic; + + SplitInfo forced_split; + // The multiplier ranges cut halfway through the current ranges of static + // properties. We do this even if the current node is not a leaf, to + // minimize the number of nodes in the resulting tree. + for (size_t i = 0; i < mul_info.size(); i++) { + uint32_t axis, val; + IntersectionType t = + BoxIntersects(static_prop_range, mul_info[i].range, axis, val); + if (t == IntersectionType::kNone) continue; + if (t == IntersectionType::kInside) { + (*tree)[pos].multiplier = mul_info[i].multiplier; + break; + } + if (t == IntersectionType::kPartial) { + forced_split.val = tree_samples.QuantizeProperty(axis, val); + forced_split.prop = axis; + forced_split.lcost = forced_split.rcost = base_bits / 2 - threshold; + forced_split.lpred = forced_split.rpred = (*tree)[pos].predictor; + best = &forced_split; + best->pos = begin; + JXL_ASSERT(best->prop == tree_samples.PropertyFromIndex(best->prop)); + for (size_t x = begin; x < end; x++) { + if (tree_samples.Property(best->prop, x) <= best->val) { + best->pos++; + } + } + break; + } + } + + if (best != &forced_split) { + std::vector prop_value_used_count; + std::vector count_increase; + std::vector extra_bits_increase; + // For each property, compute which of its values are used, and what + // tokens correspond to those usages. Then, iterate through the values, + // and compute the entropy of each side of the split (of the form `prop > + // threshold`). Finally, find the split that minimizes the cost. + struct CostInfo { + float cost = std::numeric_limits::max(); + float extra_cost = 0; + float Cost() const { return cost + extra_cost; } + Predictor pred; // will be uninitialized in some cases, but never used. + }; + std::vector costs_l; + std::vector costs_r; + + std::vector counts_above(max_symbols); + std::vector counts_below(max_symbols); + + // The lower the threshold, the higher the expected noisiness of the + // estimate. Thus, discourage changing predictors. + float change_pred_penalty = 800.0f / (100.0f + threshold); + for (size_t prop = 0; prop < num_properties && base_bits > threshold; + prop++) { + costs_l.clear(); + costs_r.clear(); + size_t prop_size = tree_samples.NumPropertyValues(prop); + if (extra_bits_increase.size() < prop_size) { + count_increase.resize(prop_size * max_symbols); + extra_bits_increase.resize(prop_size); + } + // Clear prop_value_used_count (which cannot be cleared "on the go") + prop_value_used_count.clear(); + prop_value_used_count.resize(prop_size); + + size_t first_used = prop_size; + size_t last_used = 0; + + // TODO(veluca): consider finding multiple splits along a single + // property at the same time, possibly with a bottom-up approach. + for (size_t i = begin; i < end; i++) { + size_t p = tree_samples.Property(prop, i); + prop_value_used_count[p]++; + last_used = std::max(last_used, p); + first_used = std::min(first_used, p); + } + costs_l.resize(last_used - first_used); + costs_r.resize(last_used - first_used); + // For all predictors, compute the right and left costs of each split. + for (size_t pred = 0; pred < num_predictors; pred++) { + // Compute cost and histogram increments for each property value. + for (size_t i = begin; i < end; i++) { + size_t p = tree_samples.Property(prop, i); + size_t cnt = tree_samples.Count(i); + size_t sym = tree_samples.Token(pred, i); + count_increase[p * max_symbols + sym] += cnt; + extra_bits_increase[p] += tree_samples.NBits(pred, i) * cnt; + } + memcpy(counts_above.data(), counts.data() + pred * max_symbols, + max_symbols * sizeof counts_above[0]); + memset(counts_below.data(), 0, max_symbols * sizeof counts_below[0]); + size_t extra_bits_below = 0; + // Exclude last used: this ensures neither counts_above nor + // counts_below is empty. + for (size_t i = first_used; i < last_used; i++) { + if (!prop_value_used_count[i]) continue; + extra_bits_below += extra_bits_increase[i]; + // The increase for this property value has been used, and will not + // be used again: clear it. Also below. + extra_bits_increase[i] = 0; + for (size_t sym = 0; sym < max_symbols; sym++) { + counts_above[sym] -= count_increase[i * max_symbols + sym]; + counts_below[sym] += count_increase[i * max_symbols + sym]; + count_increase[i * max_symbols + sym] = 0; + } + float rcost = EstimateBits(counts_above.data(), max_symbols) + + tot_extra_bits[pred] - extra_bits_below; + float lcost = EstimateBits(counts_below.data(), max_symbols) + + extra_bits_below; + JXL_DASSERT(extra_bits_below <= tot_extra_bits[pred]); + float penalty = 0; + // Never discourage moving away from the Weighted predictor. + if (tree_samples.PredictorFromIndex(pred) != + (*tree)[pos].predictor && + (*tree)[pos].predictor != Predictor::Weighted) { + penalty = change_pred_penalty; + } + // If everything else is equal, disfavour Weighted (slower) and + // favour Zero (faster if it's the only predictor used in a + // group+channel combination) + if (tree_samples.PredictorFromIndex(pred) == Predictor::Weighted) { + penalty += 1e-8; + } + if (tree_samples.PredictorFromIndex(pred) == Predictor::Zero) { + penalty -= 1e-8; + } + if (rcost + penalty < costs_r[i - first_used].Cost()) { + costs_r[i - first_used].cost = rcost; + costs_r[i - first_used].extra_cost = penalty; + costs_r[i - first_used].pred = + tree_samples.PredictorFromIndex(pred); + } + if (lcost + penalty < costs_l[i - first_used].Cost()) { + costs_l[i - first_used].cost = lcost; + costs_l[i - first_used].extra_cost = penalty; + costs_l[i - first_used].pred = + tree_samples.PredictorFromIndex(pred); + } + } + } + // Iterate through the possible splits and find the one with minimum sum + // of costs of the two sides. + size_t split = begin; + for (size_t i = first_used; i < last_used; i++) { + if (!prop_value_used_count[i]) continue; + split += prop_value_used_count[i]; + float rcost = costs_r[i - first_used].cost; + float lcost = costs_l[i - first_used].cost; + // WP was not used + we would use the WP property or predictor + bool adds_wp = + (tree_samples.PropertyFromIndex(prop) == kWPProp && + (used_properties & (1LU << prop)) == 0) || + ((costs_l[i - first_used].pred == Predictor::Weighted || + costs_r[i - first_used].pred == Predictor::Weighted) && + (*tree)[pos].predictor != Predictor::Weighted); + bool zero_entropy_side = rcost == 0 || lcost == 0; + + SplitInfo &best = + prop < kNumStaticProperties + ? (zero_entropy_side ? best_split_static_constant + : best_split_static) + : (adds_wp ? best_split_nonstatic : best_split_nowp); + if (lcost + rcost < best.Cost()) { + best.prop = prop; + best.val = i; + best.pos = split; + best.lcost = lcost; + best.lpred = costs_l[i - first_used].pred; + best.rcost = rcost; + best.rpred = costs_r[i - first_used].pred; + } + } + // Clear extra_bits_increase and cost_increase for last_used. + extra_bits_increase[last_used] = 0; + for (size_t sym = 0; sym < max_symbols; sym++) { + count_increase[last_used * max_symbols + sym] = 0; + } + } + + // Try to avoid introducing WP. + if (best_split_nowp.Cost() + threshold < base_bits && + best_split_nowp.Cost() <= fast_decode_multiplier * best->Cost()) { + best = &best_split_nowp; + } + // Split along static props if possible and not significantly more + // expensive. + if (best_split_static.Cost() + threshold < base_bits && + best_split_static.Cost() <= fast_decode_multiplier * best->Cost()) { + best = &best_split_static; + } + // Split along static props to create constant nodes if possible. + if (best_split_static_constant.Cost() + threshold < base_bits) { + best = &best_split_static_constant; + } + } + + if (best->Cost() + threshold < base_bits) { + uint32_t p = tree_samples.PropertyFromIndex(best->prop); + pixel_type dequant = + tree_samples.UnquantizeProperty(best->prop, best->val); + // Split node and try to split children. + MakeSplitNode(pos, p, dequant, best->lpred, 0, best->rpred, 0, tree); + // "Sort" according to winning property + SplitTreeSamples(tree_samples, begin, best->pos, end, best->prop); + if (p >= kNumStaticProperties) { + used_properties |= 1 << best->prop; + } + auto new_sp_range = static_prop_range; + if (p < kNumStaticProperties) { + JXL_ASSERT(static_cast(dequant + 1) <= new_sp_range[p][1]); + new_sp_range[p][1] = dequant + 1; + JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]); + } + nodes.push_back(NodeInfo{(*tree)[pos].rchild, begin, best->pos, + used_properties, new_sp_range}); + new_sp_range = static_prop_range; + if (p < kNumStaticProperties) { + JXL_ASSERT(new_sp_range[p][0] <= static_cast(dequant + 1)); + new_sp_range[p][0] = dequant + 1; + JXL_ASSERT(new_sp_range[p][0] < new_sp_range[p][1]); + } + nodes.push_back(NodeInfo{(*tree)[pos].lchild, best->pos, end, + used_properties, new_sp_range}); + } + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(FindBestSplit); // Local function. + +void ComputeBestTree(TreeSamples &tree_samples, float threshold, + const std::vector &mul_info, + StaticPropRange static_prop_range, + float fast_decode_multiplier, Tree *tree) { + // TODO(veluca): take into account that different contexts can have different + // uint configs. + // + // Initialize tree. + tree->emplace_back(); + tree->back().property = -1; + tree->back().predictor = tree_samples.PredictorFromIndex(0); + tree->back().predictor_offset = 0; + tree->back().multiplier = 1; + JXL_ASSERT(tree_samples.NumProperties() < 64); + + JXL_ASSERT(tree_samples.NumDistinctSamples() <= + std::numeric_limits::max()); + HWY_DYNAMIC_DISPATCH(FindBestSplit) + (tree_samples, threshold, mul_info, static_prop_range, fast_decode_multiplier, + tree); +} + +constexpr int32_t TreeSamples::kPropertyRange; +constexpr uint32_t TreeSamples::kDedupEntryUnused; + +Status TreeSamples::SetPredictor(Predictor predictor, + ModularOptions::TreeMode wp_tree_mode) { + if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) { + predictors = {Predictor::Weighted}; + residuals.resize(1); + return true; + } + if (wp_tree_mode == ModularOptions::TreeMode::kNoWP && + predictor == Predictor::Weighted) { + return JXL_FAILURE("Invalid predictor settings"); + } + if (predictor == Predictor::Variable) { + for (size_t i = 0; i < kNumModularPredictors; i++) { + predictors.push_back(static_cast(i)); + } + std::swap(predictors[0], predictors[static_cast(Predictor::Weighted)]); + std::swap(predictors[1], predictors[static_cast(Predictor::Gradient)]); + } else if (predictor == Predictor::Best) { + predictors = {Predictor::Weighted, Predictor::Gradient}; + } else { + predictors = {predictor}; + } + if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) { + auto wp_it = + std::find(predictors.begin(), predictors.end(), Predictor::Weighted); + if (wp_it != predictors.end()) { + predictors.erase(wp_it); + } + } + residuals.resize(predictors.size()); + return true; +} + +Status TreeSamples::SetProperties(const std::vector &properties, + ModularOptions::TreeMode wp_tree_mode) { + props_to_use = properties; + if (wp_tree_mode == ModularOptions::TreeMode::kWPOnly) { + props_to_use = {static_cast(kWPProp)}; + } + if (wp_tree_mode == ModularOptions::TreeMode::kGradientOnly) { + props_to_use = {static_cast(kGradientProp)}; + } + if (wp_tree_mode == ModularOptions::TreeMode::kNoWP) { + auto it = std::find(props_to_use.begin(), props_to_use.end(), kWPProp); + if (it != props_to_use.end()) { + props_to_use.erase(it); + } + } + if (props_to_use.empty()) { + return JXL_FAILURE("Invalid property set configuration"); + } + props.resize(props_to_use.size()); + return true; +} + +void TreeSamples::InitTable(size_t size) { + JXL_DASSERT((size & (size - 1)) == 0); + if (dedup_table_.size() == size) return; + dedup_table_.resize(size, kDedupEntryUnused); + for (size_t i = 0; i < NumDistinctSamples(); i++) { + if (sample_counts[i] != std::numeric_limits::max()) { + AddToTable(i); + } + } +} + +bool TreeSamples::AddToTableAndMerge(size_t a) { + size_t pos1 = Hash1(a); + size_t pos2 = Hash2(a); + if (dedup_table_[pos1] != kDedupEntryUnused && + IsSameSample(a, dedup_table_[pos1])) { + JXL_DASSERT(sample_counts[a] == 1); + sample_counts[dedup_table_[pos1]]++; + // Remove from hash table samples that are saturated. + if (sample_counts[dedup_table_[pos1]] == + std::numeric_limits::max()) { + dedup_table_[pos1] = kDedupEntryUnused; + } + return true; + } + if (dedup_table_[pos2] != kDedupEntryUnused && + IsSameSample(a, dedup_table_[pos2])) { + JXL_DASSERT(sample_counts[a] == 1); + sample_counts[dedup_table_[pos2]]++; + // Remove from hash table samples that are saturated. + if (sample_counts[dedup_table_[pos2]] == + std::numeric_limits::max()) { + dedup_table_[pos2] = kDedupEntryUnused; + } + return true; + } + AddToTable(a); + return false; +} + +void TreeSamples::AddToTable(size_t a) { + size_t pos1 = Hash1(a); + size_t pos2 = Hash2(a); + if (dedup_table_[pos1] == kDedupEntryUnused) { + dedup_table_[pos1] = a; + } else if (dedup_table_[pos2] == kDedupEntryUnused) { + dedup_table_[pos2] = a; + } +} + +void TreeSamples::PrepareForSamples(size_t num_samples) { + for (auto &res : residuals) { + res.reserve(res.size() + num_samples); + } + for (auto &p : props) { + p.reserve(p.size() + num_samples); + } + size_t total_num_samples = num_samples + sample_counts.size(); + size_t next_pow2 = 1LLU << CeilLog2Nonzero(total_num_samples * 3 / 2); + InitTable(next_pow2); +} + +size_t TreeSamples::Hash1(size_t a) const { + constexpr uint64_t constant = 0x1e35a7bd; + uint64_t h = constant; + for (const auto &r : residuals) { + h = h * constant + r[a].tok; + h = h * constant + r[a].nbits; + } + for (const auto &p : props) { + h = h * constant + p[a]; + } + return (h >> 16) & (dedup_table_.size() - 1); +} +size_t TreeSamples::Hash2(size_t a) const { + constexpr uint64_t constant = 0x1e35a7bd1e35a7bd; + uint64_t h = constant; + for (const auto &p : props) { + h = h * constant ^ p[a]; + } + for (const auto &r : residuals) { + h = h * constant ^ r[a].tok; + h = h * constant ^ r[a].nbits; + } + return (h >> 16) & (dedup_table_.size() - 1); +} + +bool TreeSamples::IsSameSample(size_t a, size_t b) const { + bool ret = true; + for (const auto &r : residuals) { + if (r[a].tok != r[b].tok) { + ret = false; + } + if (r[a].nbits != r[b].nbits) { + ret = false; + } + } + for (const auto &p : props) { + if (p[a] != p[b]) { + ret = false; + } + } + return ret; +} + +void TreeSamples::AddSample(pixel_type_w pixel, const Properties &properties, + const pixel_type_w *predictions) { + for (size_t i = 0; i < predictors.size(); i++) { + pixel_type v = pixel - predictions[static_cast(predictors[i])]; + uint32_t tok, nbits, bits; + HybridUintConfig(4, 1, 2).Encode(PackSigned(v), &tok, &nbits, &bits); + JXL_DASSERT(tok < 256); + JXL_DASSERT(nbits < 256); + residuals[i].emplace_back( + ResidualToken{static_cast(tok), static_cast(nbits)}); + } + for (size_t i = 0; i < props_to_use.size(); i++) { + props[i].push_back(QuantizeProperty(i, properties[props_to_use[i]])); + } + sample_counts.push_back(1); + num_samples++; + if (AddToTableAndMerge(sample_counts.size() - 1)) { + for (auto &r : residuals) r.pop_back(); + for (auto &p : props) p.pop_back(); + sample_counts.pop_back(); + } +} + +void TreeSamples::Swap(size_t a, size_t b) { + if (a == b) return; + for (auto &r : residuals) { + std::swap(r[a], r[b]); + } + for (auto &p : props) { + std::swap(p[a], p[b]); + } + std::swap(sample_counts[a], sample_counts[b]); +} + +void TreeSamples::ThreeShuffle(size_t a, size_t b, size_t c) { + if (b == c) return Swap(a, b); + for (auto &r : residuals) { + auto tmp = r[a]; + r[a] = r[c]; + r[c] = r[b]; + r[b] = tmp; + } + for (auto &p : props) { + auto tmp = p[a]; + p[a] = p[c]; + p[c] = p[b]; + p[b] = tmp; + } + auto tmp = sample_counts[a]; + sample_counts[a] = sample_counts[c]; + sample_counts[c] = sample_counts[b]; + sample_counts[b] = tmp; +} + +namespace { +std::vector QuantizeHistogram(const std::vector &histogram, + size_t num_chunks) { + if (histogram.empty()) return {}; + // TODO(veluca): selecting distinct quantiles is likely not the best + // way to go about this. + std::vector thresholds; + uint64_t sum = std::accumulate(histogram.begin(), histogram.end(), 0LU); + uint64_t cumsum = 0; + uint64_t threshold = 1; + for (size_t i = 0; i + 1 < histogram.size(); i++) { + cumsum += histogram[i]; + if (cumsum >= threshold * sum / num_chunks) { + thresholds.push_back(i); + while (cumsum > threshold * sum / num_chunks) threshold++; + } + } + return thresholds; +} + +std::vector QuantizeSamples(const std::vector &samples, + size_t num_chunks) { + if (samples.empty()) return {}; + int min = *std::min_element(samples.begin(), samples.end()); + constexpr int kRange = 512; + min = std::min(std::max(min, -kRange), kRange); + std::vector counts(2 * kRange + 1); + for (int s : samples) { + uint32_t sample_offset = std::min(std::max(s, -kRange), kRange) - min; + counts[sample_offset]++; + } + std::vector thresholds = QuantizeHistogram(counts, num_chunks); + for (auto &v : thresholds) v += min; + return thresholds; +} +} // namespace + +void TreeSamples::PreQuantizeProperties( + const StaticPropRange &range, + const std::vector &multiplier_info, + const std::vector &group_pixel_count, + const std::vector &channel_pixel_count, + std::vector &pixel_samples, + std::vector &diff_samples, size_t max_property_values) { + // If we have forced splits because of multipliers, choose channel and group + // thresholds accordingly. + std::vector group_multiplier_thresholds; + std::vector channel_multiplier_thresholds; + for (const auto &v : multiplier_info) { + if (v.range[0][0] != range[0][0]) { + channel_multiplier_thresholds.push_back(v.range[0][0] - 1); + } + if (v.range[0][1] != range[0][1]) { + channel_multiplier_thresholds.push_back(v.range[0][1] - 1); + } + if (v.range[1][0] != range[1][0]) { + group_multiplier_thresholds.push_back(v.range[1][0] - 1); + } + if (v.range[1][1] != range[1][1]) { + group_multiplier_thresholds.push_back(v.range[1][1] - 1); + } + } + std::sort(channel_multiplier_thresholds.begin(), + channel_multiplier_thresholds.end()); + channel_multiplier_thresholds.resize( + std::unique(channel_multiplier_thresholds.begin(), + channel_multiplier_thresholds.end()) - + channel_multiplier_thresholds.begin()); + std::sort(group_multiplier_thresholds.begin(), + group_multiplier_thresholds.end()); + group_multiplier_thresholds.resize( + std::unique(group_multiplier_thresholds.begin(), + group_multiplier_thresholds.end()) - + group_multiplier_thresholds.begin()); + + compact_properties.resize(props_to_use.size()); + auto quantize_channel = [&]() { + if (!channel_multiplier_thresholds.empty()) { + return channel_multiplier_thresholds; + } + return QuantizeHistogram(channel_pixel_count, max_property_values); + }; + auto quantize_group_id = [&]() { + if (!group_multiplier_thresholds.empty()) { + return group_multiplier_thresholds; + } + return QuantizeHistogram(group_pixel_count, max_property_values); + }; + auto quantize_coordinate = [&]() { + std::vector quantized; + quantized.reserve(max_property_values - 1); + for (size_t i = 0; i + 1 < max_property_values; i++) { + quantized.push_back((i + 1) * 256 / max_property_values - 1); + } + return quantized; + }; + std::vector abs_pixel_thr; + std::vector pixel_thr; + auto quantize_pixel_property = [&]() { + if (pixel_thr.empty()) { + pixel_thr = QuantizeSamples(pixel_samples, max_property_values); + } + return pixel_thr; + }; + auto quantize_abs_pixel_property = [&]() { + if (abs_pixel_thr.empty()) { + quantize_pixel_property(); // Compute the non-abs thresholds. + for (auto &v : pixel_samples) v = std::abs(v); + abs_pixel_thr = QuantizeSamples(pixel_samples, max_property_values); + } + return abs_pixel_thr; + }; + std::vector abs_diff_thr; + std::vector diff_thr; + auto quantize_diff_property = [&]() { + if (diff_thr.empty()) { + diff_thr = QuantizeSamples(diff_samples, max_property_values); + } + return diff_thr; + }; + auto quantize_abs_diff_property = [&]() { + if (abs_diff_thr.empty()) { + quantize_diff_property(); // Compute the non-abs thresholds. + for (auto &v : diff_samples) v = std::abs(v); + abs_diff_thr = QuantizeSamples(diff_samples, max_property_values); + } + return abs_diff_thr; + }; + auto quantize_wp = [&]() { + if (max_property_values < 32) { + return std::vector{-127, -63, -31, -15, -7, -3, -1, 0, + 1, 3, 7, 15, 31, 63, 127}; + } + if (max_property_values < 64) { + return std::vector{-255, -191, -127, -95, -63, -47, -31, -23, + -15, -11, -7, -5, -3, -1, 0, 1, + 3, 5, 7, 11, 15, 23, 31, 47, + 63, 95, 127, 191, 255}; + } + return std::vector{ + -255, -223, -191, -159, -127, -111, -95, -79, -63, -55, -47, + -39, -31, -27, -23, -19, -15, -13, -11, -9, -7, -6, + -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, + 6, 7, 9, 11, 13, 15, 19, 23, 27, 31, 39, + 47, 55, 63, 79, 95, 111, 127, 159, 191, 223, 255}; + }; + + property_mapping.resize(props_to_use.size()); + for (size_t i = 0; i < props_to_use.size(); i++) { + if (props_to_use[i] == 0) { + compact_properties[i] = quantize_channel(); + } else if (props_to_use[i] == 1) { + compact_properties[i] = quantize_group_id(); + } else if (props_to_use[i] == 2 || props_to_use[i] == 3) { + compact_properties[i] = quantize_coordinate(); + } else if (props_to_use[i] == 6 || props_to_use[i] == 7 || + props_to_use[i] == 8 || + (props_to_use[i] >= kNumNonrefProperties && + (props_to_use[i] - kNumNonrefProperties) % 4 == 1)) { + compact_properties[i] = quantize_pixel_property(); + } else if (props_to_use[i] == 4 || props_to_use[i] == 5 || + (props_to_use[i] >= kNumNonrefProperties && + (props_to_use[i] - kNumNonrefProperties) % 4 == 0)) { + compact_properties[i] = quantize_abs_pixel_property(); + } else if (props_to_use[i] >= kNumNonrefProperties && + (props_to_use[i] - kNumNonrefProperties) % 4 == 2) { + compact_properties[i] = quantize_abs_diff_property(); + } else if (props_to_use[i] == kWPProp) { + compact_properties[i] = quantize_wp(); + } else { + compact_properties[i] = quantize_diff_property(); + } + property_mapping[i].resize(kPropertyRange * 2 + 1); + size_t mapped = 0; + for (size_t j = 0; j < property_mapping[i].size(); j++) { + while (mapped < compact_properties[i].size() && + static_cast(j) - kPropertyRange > + compact_properties[i][mapped]) { + mapped++; + } + // property_mapping[i] of a value V is `mapped` if + // compact_properties[i][mapped] <= j and + // compact_properties[i][mapped-1] > j + // This is because the decision node in the tree splits on (property) > j, + // hence everything that is not > of a threshold should be clustered + // together. + property_mapping[i][j] = mapped; + } + } +} + +void CollectPixelSamples(const Image &image, const ModularOptions &options, + size_t group_id, + std::vector &group_pixel_count, + std::vector &channel_pixel_count, + std::vector &pixel_samples, + std::vector &diff_samples) { + if (options.nb_repeats == 0) return; + if (group_pixel_count.size() <= group_id) { + group_pixel_count.resize(group_id + 1); + } + if (channel_pixel_count.size() < image.channel.size()) { + channel_pixel_count.resize(image.channel.size()); + } + Rng rng(group_id); + // Sample 10% of the final number of samples for property quantization. + float fraction = std::min(options.nb_repeats * 0.1, 0.99); + Rng::GeometricDistribution dist(fraction); + size_t total_pixels = 0; + std::vector channel_ids; + for (size_t i = 0; i < image.channel.size(); i++) { + if (image.channel[i].w <= 1 || image.channel[i].h == 0) { + continue; // skip empty or width-1 channels. + } + if (i >= image.nb_meta_channels && + (image.channel[i].w > options.max_chan_size || + image.channel[i].h > options.max_chan_size)) { + break; + } + channel_ids.push_back(i); + group_pixel_count[group_id] += image.channel[i].w * image.channel[i].h; + channel_pixel_count[i] += image.channel[i].w * image.channel[i].h; + total_pixels += image.channel[i].w * image.channel[i].h; + } + if (channel_ids.empty()) return; + pixel_samples.reserve(pixel_samples.size() + fraction * total_pixels); + diff_samples.reserve(diff_samples.size() + fraction * total_pixels); + size_t i = 0; + size_t y = 0; + size_t x = 0; + auto advance = [&](size_t amount) { + x += amount; + // Detect row overflow (rare). + while (x >= image.channel[channel_ids[i]].w) { + x -= image.channel[channel_ids[i]].w; + y++; + // Detect end-of-channel (even rarer). + if (y == image.channel[channel_ids[i]].h) { + i++; + y = 0; + if (i >= channel_ids.size()) { + return; + } + } + } + }; + advance(rng.Geometric(dist)); + for (; i < channel_ids.size(); advance(rng.Geometric(dist) + 1)) { + const pixel_type *row = image.channel[channel_ids[i]].Row(y); + pixel_samples.push_back(row[x]); + size_t xp = x == 0 ? 1 : x - 1; + diff_samples.push_back((int64_t)row[x] - row[xp]); + } +} + +// TODO(veluca): very simple encoding scheme. This should be improved. +void TokenizeTree(const Tree &tree, std::vector *tokens, + Tree *decoder_tree) { + JXL_ASSERT(tree.size() <= kMaxTreeSize); + std::queue q; + q.push(0); + size_t leaf_id = 0; + decoder_tree->clear(); + while (!q.empty()) { + int cur = q.front(); + q.pop(); + JXL_ASSERT(tree[cur].property >= -1); + tokens->emplace_back(kPropertyContext, tree[cur].property + 1); + if (tree[cur].property == -1) { + tokens->emplace_back(kPredictorContext, + static_cast(tree[cur].predictor)); + tokens->emplace_back(kOffsetContext, + PackSigned(tree[cur].predictor_offset)); + uint32_t mul_log = Num0BitsBelowLS1Bit_Nonzero(tree[cur].multiplier); + uint32_t mul_bits = (tree[cur].multiplier >> mul_log) - 1; + tokens->emplace_back(kMultiplierLogContext, mul_log); + tokens->emplace_back(kMultiplierBitsContext, mul_bits); + JXL_ASSERT(tree[cur].predictor < Predictor::Best); + decoder_tree->emplace_back(-1, 0, leaf_id++, 0, tree[cur].predictor, + tree[cur].predictor_offset, + tree[cur].multiplier); + continue; + } + decoder_tree->emplace_back(tree[cur].property, tree[cur].splitval, + decoder_tree->size() + q.size() + 1, + decoder_tree->size() + q.size() + 2, + Predictor::Zero, 0, 1); + q.push(tree[cur].lchild); + q.push(tree[cur].rchild); + tokens->emplace_back(kSplitValContext, PackSigned(tree[cur].splitval)); + } +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.h new file mode 100644 index 0000000000..ede37c8023 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/enc_ma.h @@ -0,0 +1,157 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_ENC_MA_H_ +#define LIB_JXL_MODULAR_ENCODING_ENC_MA_H_ + +#include + +#include "lib/jxl/enc_ans.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/modular/encoding/dec_ma.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +// Struct to collect all the data needed to build a tree. +struct TreeSamples { + bool HasSamples() const { + return !residuals.empty() && !residuals[0].empty(); + } + size_t NumDistinctSamples() const { return sample_counts.size(); } + size_t NumSamples() const { return num_samples; } + // Set the predictor to use. Must be called before adding any samples. + Status SetPredictor(Predictor predictor, + ModularOptions::TreeMode wp_tree_mode); + // Set the properties to use. Must be called before adding any samples. + Status SetProperties(const std::vector &properties, + ModularOptions::TreeMode wp_tree_mode); + + size_t Token(size_t pred, size_t i) const { return residuals[pred][i].tok; } + size_t NBits(size_t pred, size_t i) const { return residuals[pred][i].nbits; } + size_t Count(size_t i) const { return sample_counts[i]; } + size_t PredictorIndex(Predictor predictor) const { + const auto predictor_elem = + std::find(predictors.begin(), predictors.end(), predictor); + JXL_DASSERT(predictor_elem != predictors.end()); + return predictor_elem - predictors.begin(); + } + size_t PropertyIndex(size_t property) const { + const auto property_elem = + std::find(props_to_use.begin(), props_to_use.end(), property); + JXL_DASSERT(property_elem != props_to_use.end()); + return property_elem - props_to_use.begin(); + } + size_t NumPropertyValues(size_t property_index) const { + return compact_properties[property_index].size() + 1; + } + // Returns the *quantized* property value. + size_t Property(size_t property_index, size_t i) const { + return props[property_index][i]; + } + int UnquantizeProperty(size_t property_index, uint32_t quant) const { + JXL_ASSERT(quant < compact_properties[property_index].size()); + return compact_properties[property_index][quant]; + } + + Predictor PredictorFromIndex(size_t index) const { + JXL_DASSERT(index < predictors.size()); + return predictors[index]; + } + size_t PropertyFromIndex(size_t index) const { + JXL_DASSERT(index < props_to_use.size()); + return props_to_use[index]; + } + size_t NumPredictors() const { return predictors.size(); } + size_t NumProperties() const { return props_to_use.size(); } + + // Preallocate data for a given number of samples. MUST be called before + // adding any sample. + void PrepareForSamples(size_t num_samples); + // Add a sample. + void AddSample(pixel_type_w pixel, const Properties &properties, + const pixel_type_w *predictions); + // Pre-cluster property values. + void PreQuantizeProperties( + const StaticPropRange &range, + const std::vector &multiplier_info, + const std::vector &group_pixel_count, + const std::vector &channel_pixel_count, + std::vector &pixel_samples, + std::vector &diff_samples, size_t max_property_values); + + void AllSamplesDone() { dedup_table_ = std::vector(); } + + uint32_t QuantizeProperty(uint32_t prop, pixel_type v) const { + v = std::min(std::max(v, -kPropertyRange), kPropertyRange) + kPropertyRange; + return property_mapping[prop][v]; + } + + // Swaps samples in position a and b. Does nothing if a == b. + void Swap(size_t a, size_t b); + + // Cycles samples: a -> b -> c -> a. We assume a <= b <= c, so that we can + // just call Swap(a, b) if b==c. + void ThreeShuffle(size_t a, size_t b, size_t c); + + private: + // TODO(veluca): as the total number of properties and predictors are known + // before adding any samples, it might be better to interleave predictors, + // properties and counts in a single vector to improve locality. + // A first attempt at doing this actually results in much slower encoding, + // possibly because of the more complex addressing. + struct ResidualToken { + uint8_t tok; + uint8_t nbits; + }; + // Residual information: token and number of extra bits, per predictor. + std::vector> residuals; + // Number of occurrences of each sample. + std::vector sample_counts; + // Property values, quantized to at most 256 distinct values. + std::vector> props; + // Decompactification info for `props`. + std::vector> compact_properties; + // List of properties to use. + std::vector props_to_use; + // List of predictors to use. + std::vector predictors; + // Mapping property value -> quantized property value. + static constexpr int32_t kPropertyRange = 511; + std::vector> property_mapping; + // Number of samples seen. + size_t num_samples = 0; + // Table for deduplication. + static constexpr uint32_t kDedupEntryUnused{static_cast(-1)}; + std::vector dedup_table_; + + // Functions for sample deduplication. + bool IsSameSample(size_t a, size_t b) const; + size_t Hash1(size_t a) const; + size_t Hash2(size_t a) const; + void InitTable(size_t size); + // Returns true if `a` was already present in the table. + bool AddToTableAndMerge(size_t a); + void AddToTable(size_t a); +}; + +void TokenizeTree(const Tree &tree, std::vector *tokens, + Tree *decoder_tree); + +void CollectPixelSamples(const Image &image, const ModularOptions &options, + size_t group_id, + std::vector &group_pixel_count, + std::vector &channel_pixel_count, + std::vector &pixel_samples, + std::vector &diff_samples); + +void ComputeBestTree(TreeSamples &tree_samples, float threshold, + const std::vector &mul_info, + StaticPropRange static_prop_range, + float fast_decode_multiplier, Tree *tree); + +} // namespace jxl +#endif // LIB_JXL_MODULAR_ENCODING_ENC_MA_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.cc b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.cc new file mode 100644 index 0000000000..09f21c0cce --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.cc @@ -0,0 +1,684 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/encoding/encoding.h" + +#include +#include + +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/scope_guard.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +// Removes all nodes that use a static property (i.e. channel or group ID) from +// the tree and collapses each node on even levels with its two children to +// produce a flatter tree. Also computes whether the resulting tree requires +// using the weighted predictor. +FlatTree FilterTree(const Tree &global_tree, + std::array &static_props, + size_t *num_props, bool *use_wp, bool *wp_only, + bool *gradient_only) { + *num_props = 0; + bool has_wp = false; + bool has_non_wp = false; + *gradient_only = true; + const auto mark_property = [&](int32_t p) { + if (p == kWPProp) { + has_wp = true; + } else if (p >= kNumStaticProperties) { + has_non_wp = true; + } + if (p >= kNumStaticProperties && p != kGradientProp) { + *gradient_only = false; + } + }; + FlatTree output; + std::queue nodes; + nodes.push(0); + // Produces a trimmed and flattened tree by doing a BFS visit of the original + // tree, ignoring branches that are known to be false and proceeding two + // levels at a time to collapse nodes in a flatter tree; if an inner parent + // node has a leaf as a child, the leaf is duplicated and an implicit fake + // node is added. This allows to reduce the number of branches when traversing + // the resulting flat tree. + while (!nodes.empty()) { + size_t cur = nodes.front(); + nodes.pop(); + // Skip nodes that we can decide now, by jumping directly to their children. + while (global_tree[cur].property < kNumStaticProperties && + global_tree[cur].property != -1) { + if (static_props[global_tree[cur].property] > global_tree[cur].splitval) { + cur = global_tree[cur].lchild; + } else { + cur = global_tree[cur].rchild; + } + } + FlatDecisionNode flat; + if (global_tree[cur].property == -1) { + flat.property0 = -1; + flat.childID = global_tree[cur].lchild; + flat.predictor = global_tree[cur].predictor; + flat.predictor_offset = global_tree[cur].predictor_offset; + flat.multiplier = global_tree[cur].multiplier; + *gradient_only &= flat.predictor == Predictor::Gradient; + has_wp |= flat.predictor == Predictor::Weighted; + has_non_wp |= flat.predictor != Predictor::Weighted; + output.push_back(flat); + continue; + } + flat.childID = output.size() + nodes.size() + 1; + + flat.property0 = global_tree[cur].property; + *num_props = std::max(flat.property0 + 1, *num_props); + flat.splitval0 = global_tree[cur].splitval; + + for (size_t i = 0; i < 2; i++) { + size_t cur_child = + i == 0 ? global_tree[cur].lchild : global_tree[cur].rchild; + // Skip nodes that we can decide now. + while (global_tree[cur_child].property < kNumStaticProperties && + global_tree[cur_child].property != -1) { + if (static_props[global_tree[cur_child].property] > + global_tree[cur_child].splitval) { + cur_child = global_tree[cur_child].lchild; + } else { + cur_child = global_tree[cur_child].rchild; + } + } + // We ended up in a leaf, add a dummy decision and two copies of the leaf. + if (global_tree[cur_child].property == -1) { + flat.properties[i] = 0; + flat.splitvals[i] = 0; + nodes.push(cur_child); + nodes.push(cur_child); + } else { + flat.properties[i] = global_tree[cur_child].property; + flat.splitvals[i] = global_tree[cur_child].splitval; + nodes.push(global_tree[cur_child].lchild); + nodes.push(global_tree[cur_child].rchild); + *num_props = std::max(flat.properties[i] + 1, *num_props); + } + } + + for (size_t j = 0; j < 2; j++) mark_property(flat.properties[j]); + mark_property(flat.property0); + output.push_back(flat); + } + if (*num_props > kNumNonrefProperties) { + *num_props = + DivCeil(*num_props - kNumNonrefProperties, kExtraPropsPerChannel) * + kExtraPropsPerChannel + + kNumNonrefProperties; + } else { + *num_props = kNumNonrefProperties; + } + *use_wp = has_wp; + *wp_only = has_wp && !has_non_wp; + + return output; +} + +namespace detail { +template +Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader, + const std::vector &context_map, + const Tree &global_tree, + const weighted::Header &wp_header, + pixel_type chan, size_t group_id, + TreeLut &tree_lut, + Image *image) { + Channel &channel = image->channel[chan]; + + std::array static_props = { + {chan, (int)group_id}}; + // TODO(veluca): filter the tree according to static_props. + + // zero pixel channel? could happen + if (channel.w == 0 || channel.h == 0) return true; + + bool tree_has_wp_prop_or_pred = false; + bool is_wp_only = false; + bool is_gradient_only = false; + size_t num_props; + FlatTree tree = + FilterTree(global_tree, static_props, &num_props, + &tree_has_wp_prop_or_pred, &is_wp_only, &is_gradient_only); + + // From here on, tree lookup returns a *clustered* context ID. + // This avoids an extra memory lookup after tree traversal. + for (size_t i = 0; i < tree.size(); i++) { + if (tree[i].property0 == -1) { + tree[i].childID = context_map[tree[i].childID]; + } + } + + JXL_DEBUG_V(3, "Decoded MA tree with %" PRIuS " nodes", tree.size()); + + // MAANS decode + const auto make_pixel = [](uint64_t v, pixel_type multiplier, + pixel_type_w offset) -> pixel_type { + JXL_DASSERT((v & 0xFFFFFFFF) == v); + pixel_type_w val = UnpackSigned(v); + // if it overflows, it overflows, and we have a problem anyway + return val * multiplier + offset; + }; + + if (tree.size() == 1) { + // special optimized case: no meta-adaptation, so no need + // to compute properties. + Predictor predictor = tree[0].predictor; + int64_t offset = tree[0].predictor_offset; + int32_t multiplier = tree[0].multiplier; + size_t ctx_id = tree[0].childID; + if (predictor == Predictor::Zero) { + uint32_t value; + if (reader->IsSingleValueAndAdvance(ctx_id, &value, + channel.w * channel.h)) { + // Special-case: histogram has a single symbol, with no extra bits, and + // we use ANS mode. + JXL_DEBUG_V(8, "Fastest track."); + pixel_type v = make_pixel(value, multiplier, offset); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + std::fill(r, r + channel.w, v); + } + } else { + JXL_DEBUG_V(8, "Fast track."); + if (multiplier == 1 && offset == 0) { + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + uint32_t v = + reader->ReadHybridUintClusteredInlined(ctx_id, br); + r[x] = UnpackSigned(v); + } + } + } else { + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + uint32_t v = + reader->ReadHybridUintClusteredMaybeInlined(ctx_id, + br); + r[x] = make_pixel(v, multiplier, offset); + } + } + } + } + return true; + } else if (uses_lz77 && predictor == Predictor::Gradient && offset == 0 && + multiplier == 1 && reader->HuffRleOnly()) { + JXL_DEBUG_V(8, "Gradient RLE (fjxl) very fast track."); + uint32_t run = 0; + uint32_t v = 0; + pixel_type_w sv = 0; + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); + const pixel_type *JXL_RESTRICT rtopleft = + (y ? channel.Row(y - 1) - 1 : r - 1); + pixel_type_w guess = (y ? rtop[0] : 0); + if (run == 0) { + reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &v, &run); + sv = UnpackSigned(v); + } else { + run--; + } + r[0] = sv + guess; + for (size_t x = 1; x < channel.w; x++) { + pixel_type left = r[x - 1]; + pixel_type top = rtop[x]; + pixel_type topleft = rtopleft[x]; + pixel_type_w guess = ClampedGradient(top, left, topleft); + if (!run) { + reader->ReadHybridUintClusteredHuffRleOnly(ctx_id, br, &v, &run); + sv = UnpackSigned(v); + } else { + run--; + } + r[x] = sv + guess; + } + } + return true; + } else if (predictor == Predictor::Gradient && offset == 0 && + multiplier == 1) { + JXL_DEBUG_V(8, "Gradient very fast track."); + const intptr_t onerow = channel.plane.PixelsPerRow(); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + pixel_type left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type top = (y ? *(r + x - onerow) : left); + pixel_type topleft = (x && y ? *(r + x - 1 - onerow) : left); + pixel_type guess = ClampedGradient(top, left, topleft); + uint64_t v = reader->ReadHybridUintClusteredMaybeInlined( + ctx_id, br); + r[x] = make_pixel(v, 1, guess); + } + } + return true; + } + } + + // Check if this tree is a WP-only tree with a small enough property value + // range. + if (is_wp_only) { + is_wp_only = TreeToLookupTable(tree, tree_lut); + } + if (is_gradient_only) { + is_gradient_only = TreeToLookupTable(tree, tree_lut); + } + + if (is_gradient_only) { + JXL_DEBUG_V(8, "Gradient fast track."); + const intptr_t onerow = channel.plane.PixelsPerRow(); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + for (size_t x = 0; x < channel.w; x++) { + pixel_type_w left = (x ? r[x - 1] : y ? *(r + x - onerow) : 0); + pixel_type_w top = (y ? *(r + x - onerow) : left); + pixel_type_w topleft = (x && y ? *(r + x - 1 - onerow) : left); + int32_t guess = ClampedGradient(top, left, topleft); + uint32_t pos = + kPropRangeFast + + std::min( + std::max(-kPropRangeFast, top + left - topleft), + kPropRangeFast - 1); + uint32_t ctx_id = tree_lut.context_lookup[pos]; + uint64_t v = + reader->ReadHybridUintClusteredMaybeInlined(ctx_id, br); + r[x] = make_pixel( + v, tree_lut.multipliers[pos], + static_cast(tree_lut.offsets[pos]) + guess); + } + } + } else if (!uses_lz77 && is_wp_only && channel.w > 8) { + JXL_DEBUG_V(8, "WP fast track."); + weighted::State wp_state(wp_header, channel.w, channel.h); + Properties properties(1); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT r = channel.Row(y); + const pixel_type *JXL_RESTRICT rtop = (y ? channel.Row(y - 1) : r - 1); + const pixel_type *JXL_RESTRICT rtoptop = + (y > 1 ? channel.Row(y - 2) : rtop); + const pixel_type *JXL_RESTRICT rtopleft = + (y ? channel.Row(y - 1) - 1 : r - 1); + const pixel_type *JXL_RESTRICT rtopright = + (y ? channel.Row(y - 1) + 1 : r - 1); + size_t x = 0; + { + size_t offset = 0; + pixel_type_w left = y ? rtop[x] : 0; + pixel_type_w toptop = y ? rtoptop[x] : 0; + pixel_type_w topright = (x + 1 < channel.w && y ? rtop[x + 1] : left); + int32_t guess = wp_state.Predict( + x, y, channel.w, left, left, topright, left, toptop, &properties, + offset); + uint32_t pos = + kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]), + kPropRangeFast - 1); + uint32_t ctx_id = tree_lut.context_lookup[pos]; + uint64_t v = + reader->ReadHybridUintClusteredInlined(ctx_id, br); + r[x] = make_pixel( + v, tree_lut.multipliers[pos], + static_cast(tree_lut.offsets[pos]) + guess); + wp_state.UpdateErrors(r[x], x, y, channel.w); + } + for (x = 1; x + 1 < channel.w; x++) { + size_t offset = 0; + int32_t guess = wp_state.Predict( + x, y, channel.w, rtop[x], r[x - 1], rtopright[x], rtopleft[x], + rtoptop[x], &properties, offset); + uint32_t pos = + kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]), + kPropRangeFast - 1); + uint32_t ctx_id = tree_lut.context_lookup[pos]; + uint64_t v = + reader->ReadHybridUintClusteredInlined(ctx_id, br); + r[x] = make_pixel( + v, tree_lut.multipliers[pos], + static_cast(tree_lut.offsets[pos]) + guess); + wp_state.UpdateErrors(r[x], x, y, channel.w); + } + { + size_t offset = 0; + int32_t guess = wp_state.Predict( + x, y, channel.w, rtop[x], r[x - 1], rtop[x], rtopleft[x], + rtoptop[x], &properties, offset); + uint32_t pos = + kPropRangeFast + std::min(std::max(-kPropRangeFast, properties[0]), + kPropRangeFast - 1); + uint32_t ctx_id = tree_lut.context_lookup[pos]; + uint64_t v = + reader->ReadHybridUintClusteredInlined(ctx_id, br); + r[x] = make_pixel( + v, tree_lut.multipliers[pos], + static_cast(tree_lut.offsets[pos]) + guess); + wp_state.UpdateErrors(r[x], x, y, channel.w); + } + } + } else if (!tree_has_wp_prop_or_pred) { + // special optimized case: the weighted predictor and its properties are not + // used, so no need to compute weights and properties. + JXL_DEBUG_V(8, "Slow track."); + MATreeLookup tree_lookup(tree); + Properties properties = Properties(num_props); + const intptr_t onerow = channel.plane.PixelsPerRow(); + Channel references(properties.size() - kNumNonrefProperties, channel.w); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT p = channel.Row(y); + PrecomputeReferences(channel, y, *image, chan, &references); + InitPropsRow(&properties, static_props, y); + if (y > 1 && channel.w > 8 && references.w == 0) { + for (size_t x = 0; x < 2; x++) { + PredictionResult res = + PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references); + uint64_t v = + reader->ReadHybridUintClustered(res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + } + for (size_t x = 2; x < channel.w - 2; x++) { + PredictionResult res = + PredictTreeNoWPNEC(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references); + uint64_t v = reader->ReadHybridUintClusteredInlined( + res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + } + for (size_t x = channel.w - 2; x < channel.w; x++) { + PredictionResult res = + PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references); + uint64_t v = + reader->ReadHybridUintClustered(res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + } + } else { + for (size_t x = 0; x < channel.w; x++) { + PredictionResult res = + PredictTreeNoWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references); + uint64_t v = reader->ReadHybridUintClusteredMaybeInlined( + res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + } + } + } + } else { + JXL_DEBUG_V(8, "Slowest track."); + MATreeLookup tree_lookup(tree); + Properties properties = Properties(num_props); + const intptr_t onerow = channel.plane.PixelsPerRow(); + Channel references(properties.size() - kNumNonrefProperties, channel.w); + weighted::State wp_state(wp_header, channel.w, channel.h); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT p = channel.Row(y); + InitPropsRow(&properties, static_props, y); + PrecomputeReferences(channel, y, *image, chan, &references); + if (!uses_lz77 && y > 1 && channel.w > 8 && references.w == 0) { + for (size_t x = 0; x < 2; x++) { + PredictionResult res = + PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references, &wp_state); + uint64_t v = + reader->ReadHybridUintClustered(res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + for (size_t x = 2; x < channel.w - 2; x++) { + PredictionResult res = + PredictTreeWPNEC(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references, &wp_state); + uint64_t v = reader->ReadHybridUintClusteredInlined( + res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + for (size_t x = channel.w - 2; x < channel.w; x++) { + PredictionResult res = + PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references, &wp_state); + uint64_t v = + reader->ReadHybridUintClustered(res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + } else { + for (size_t x = 0; x < channel.w; x++) { + PredictionResult res = + PredictTreeWP(&properties, channel.w, p + x, onerow, x, y, + tree_lookup, references, &wp_state); + uint64_t v = + reader->ReadHybridUintClustered(res.context, br); + p[x] = make_pixel(v, res.multiplier, res.guess); + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + } + } + } + return true; +} +} // namespace detail + +Status DecodeModularChannelMAANS(BitReader *br, ANSSymbolReader *reader, + const std::vector &context_map, + const Tree &global_tree, + const weighted::Header &wp_header, + pixel_type chan, size_t group_id, + TreeLut &tree_lut, + Image *image) { + if (reader->UsesLZ77()) { + return detail::DecodeModularChannelMAANS( + br, reader, context_map, global_tree, wp_header, chan, group_id, + tree_lut, image); + } else { + return detail::DecodeModularChannelMAANS( + br, reader, context_map, global_tree, wp_header, chan, group_id, + tree_lut, image); + } +} + +GroupHeader::GroupHeader() { Bundle::Init(this); } + +Status ValidateChannelDimensions(const Image &image, + const ModularOptions &options) { + size_t nb_channels = image.channel.size(); + for (bool is_dc : {true, false}) { + size_t group_dim = options.group_dim * (is_dc ? kBlockDim : 1); + size_t c = image.nb_meta_channels; + for (; c < nb_channels; c++) { + const Channel &ch = image.channel[c]; + if (ch.w > options.group_dim || ch.h > options.group_dim) break; + } + for (; c < nb_channels; c++) { + const Channel &ch = image.channel[c]; + if (ch.w == 0 || ch.h == 0) continue; // skip empty + bool is_dc_channel = std::min(ch.hshift, ch.vshift) >= 3; + if (is_dc_channel != is_dc) continue; + size_t tile_dim = group_dim >> std::max(ch.hshift, ch.vshift); + if (tile_dim == 0) { + return JXL_FAILURE("Inconsistent transforms"); + } + } + } + return true; +} + +Status ModularDecode(BitReader *br, Image &image, GroupHeader &header, + size_t group_id, ModularOptions *options, + const Tree *global_tree, const ANSCode *global_code, + const std::vector *global_ctx_map, + const bool allow_truncated_group) { + if (image.channel.empty()) return true; + + // decode transforms + Status status = Bundle::Read(br, &header); + if (!allow_truncated_group) JXL_RETURN_IF_ERROR(status); + if (status.IsFatalError()) return status; + if (!br->AllReadsWithinBounds()) { + // Don't do/undo transforms if header is incomplete. + header.transforms.clear(); + image.transform = header.transforms; + for (size_t c = 0; c < image.channel.size(); c++) { + ZeroFillImage(&image.channel[c].plane); + } + return Status(StatusCode::kNotEnoughBytes); + } + + JXL_DEBUG_V(3, "Image data underwent %" PRIuS " transformations: ", + header.transforms.size()); + image.transform = header.transforms; + for (Transform &transform : image.transform) { + JXL_RETURN_IF_ERROR(transform.MetaApply(image)); + } + if (image.error) { + return JXL_FAILURE("Corrupt file. Aborting."); + } + JXL_RETURN_IF_ERROR(ValidateChannelDimensions(image, *options)); + + size_t nb_channels = image.channel.size(); + + size_t num_chans = 0; + size_t distance_multiplier = 0; + for (size_t i = 0; i < nb_channels; i++) { + Channel &channel = image.channel[i]; + if (!channel.w || !channel.h) { + continue; // skip empty channels + } + if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size || + channel.h > options->max_chan_size)) { + break; + } + if (channel.w > distance_multiplier) { + distance_multiplier = channel.w; + } + num_chans++; + } + if (num_chans == 0) return true; + + size_t next_channel = 0; + auto scope_guard = MakeScopeGuard([&]() { + for (size_t c = next_channel; c < image.channel.size(); c++) { + ZeroFillImage(&image.channel[c].plane); + } + }); + // Do not do anything if truncated groups are not allowed. + if (allow_truncated_group) scope_guard.Disarm(); + + // Read tree. + Tree tree_storage; + std::vector context_map_storage; + ANSCode code_storage; + const Tree *tree = &tree_storage; + const ANSCode *code = &code_storage; + const std::vector *context_map = &context_map_storage; + if (!header.use_global_tree) { + uint64_t max_tree_size = 1024; + for (size_t i = 0; i < nb_channels; i++) { + Channel &channel = image.channel[i]; + if (i >= image.nb_meta_channels && (channel.w > options->max_chan_size || + channel.h > options->max_chan_size)) { + break; + } + uint64_t pixels = channel.w * channel.h; + max_tree_size += pixels; + } + max_tree_size = std::min(static_cast(1 << 20), max_tree_size); + JXL_RETURN_IF_ERROR(DecodeTree(br, &tree_storage, max_tree_size)); + JXL_RETURN_IF_ERROR(DecodeHistograms(br, (tree_storage.size() + 1) / 2, + &code_storage, &context_map_storage)); + } else { + if (!global_tree || !global_code || !global_ctx_map || + global_tree->empty()) { + return JXL_FAILURE("No global tree available but one was requested"); + } + tree = global_tree; + code = global_code; + context_map = global_ctx_map; + } + + // Read channels + ANSSymbolReader reader(code, br, distance_multiplier); + auto tree_lut = jxl::make_unique>(); + for (; next_channel < nb_channels; next_channel++) { + Channel &channel = image.channel[next_channel]; + if (!channel.w || !channel.h) { + continue; // skip empty channels + } + if (next_channel >= image.nb_meta_channels && + (channel.w > options->max_chan_size || + channel.h > options->max_chan_size)) { + break; + } + JXL_RETURN_IF_ERROR(DecodeModularChannelMAANS( + br, &reader, *context_map, *tree, header.wp_header, next_channel, + group_id, *tree_lut, &image)); + + // Truncated group. + if (!br->AllReadsWithinBounds()) { + if (!allow_truncated_group) return JXL_FAILURE("Truncated input"); + return Status(StatusCode::kNotEnoughBytes); + } + } + + // Make sure no zero-filling happens even if next_channel < nb_channels. + scope_guard.Disarm(); + + if (!reader.CheckANSFinalState()) { + return JXL_FAILURE("ANS decode final state failed"); + } + return true; +} + +Status ModularGenericDecompress(BitReader *br, Image &image, + GroupHeader *header, size_t group_id, + ModularOptions *options, bool undo_transforms, + const Tree *tree, const ANSCode *code, + const std::vector *ctx_map, + bool allow_truncated_group) { +#ifdef JXL_ENABLE_ASSERT + std::vector> req_sizes(image.channel.size()); + for (size_t c = 0; c < req_sizes.size(); c++) { + req_sizes[c] = {image.channel[c].w, image.channel[c].h}; + } +#endif + GroupHeader local_header; + if (header == nullptr) header = &local_header; + size_t bit_pos = br->TotalBitsConsumed(); + auto dec_status = ModularDecode(br, image, *header, group_id, options, tree, + code, ctx_map, allow_truncated_group); + if (!allow_truncated_group) JXL_RETURN_IF_ERROR(dec_status); + if (dec_status.IsFatalError()) return dec_status; + if (undo_transforms) image.undo_transforms(header->wp_header); + if (image.error) return JXL_FAILURE("Corrupt file. Aborting."); + JXL_DEBUG_V(4, + "Modular-decoded a %" PRIuS "x%" PRIuS " nbchans=%" PRIuS + " image from %" PRIuS " bytes", + image.w, image.h, image.channel.size(), + (br->TotalBitsConsumed() - bit_pos) / 8); + JXL_DEBUG_V(5, "Modular image: %s", image.DebugString().c_str()); + (void)bit_pos; +#ifdef JXL_ENABLE_ASSERT + // Check that after applying all transforms we are back to the requested image + // sizes, otherwise there's a programming error with the transformations. + if (undo_transforms) { + JXL_ASSERT(image.channel.size() == req_sizes.size()); + for (size_t c = 0; c < req_sizes.size(); c++) { + JXL_ASSERT(req_sizes[c].first == image.channel[c].w); + JXL_ASSERT(req_sizes[c].second == image.channel[c].h); + } + } +#endif + return dec_status; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.h new file mode 100644 index 0000000000..4004e27be4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/encoding.h @@ -0,0 +1,142 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_ENCODING_H_ +#define LIB_JXL_MODULAR_ENCODING_ENCODING_H_ + +#include +#include + +#include +#include + +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/image.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/encoding/dec_ma.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/options.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +// Valid range of properties for using lookup tables instead of trees. +constexpr int32_t kPropRangeFast = 512; + +struct GroupHeader : public Fields { + GroupHeader(); + + JXL_FIELDS_NAME(GroupHeader) + + Status VisitFields(Visitor *JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &use_global_tree)); + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&wp_header)); + uint32_t num_transforms = static_cast(transforms.size()); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(0), Val(1), BitsOffset(4, 2), + BitsOffset(8, 18), 0, + &num_transforms)); + if (visitor->IsReading()) transforms.resize(num_transforms); + for (size_t i = 0; i < num_transforms; i++) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&transforms[i])); + } + return true; + } + + bool use_global_tree; + weighted::Header wp_header; + + std::vector transforms; +}; + +FlatTree FilterTree(const Tree &global_tree, + std::array &static_props, + size_t *num_props, bool *use_wp, bool *wp_only, + bool *gradient_only); + +template +struct TreeLut { + std::array context_lookup; + std::array offsets; + std::array multipliers; +}; + +template +bool TreeToLookupTable(const FlatTree &tree, TreeLut &lut) { + struct TreeRange { + // Begin *excluded*, end *included*. This works best with > vs <= decision + // nodes. + int begin, end; + size_t pos; + }; + std::vector ranges; + ranges.push_back(TreeRange{-kPropRangeFast - 1, kPropRangeFast - 1, 0}); + while (!ranges.empty()) { + TreeRange cur = ranges.back(); + ranges.pop_back(); + if (cur.begin < -kPropRangeFast - 1 || cur.begin >= kPropRangeFast - 1 || + cur.end > kPropRangeFast - 1) { + // Tree is outside the allowed range, exit. + return false; + } + auto &node = tree[cur.pos]; + // Leaf. + if (node.property0 == -1) { + if (node.predictor_offset < std::numeric_limits::min() || + node.predictor_offset > std::numeric_limits::max()) { + return false; + } + if (node.multiplier < std::numeric_limits::min() || + node.multiplier > std::numeric_limits::max()) { + return false; + } + if (!HAS_MULTIPLIERS && node.multiplier != 1) { + return false; + } + for (int i = cur.begin + 1; i < cur.end + 1; i++) { + lut.context_lookup[i + kPropRangeFast] = node.childID; + if (HAS_MULTIPLIERS) { + lut.multipliers[i + kPropRangeFast] = node.multiplier; + } + lut.offsets[i + kPropRangeFast] = node.predictor_offset; + } + continue; + } + // > side of top node. + if (node.properties[0] >= kNumStaticProperties) { + ranges.push_back(TreeRange({node.splitvals[0], cur.end, node.childID})); + ranges.push_back( + TreeRange({node.splitval0, node.splitvals[0], node.childID + 1})); + } else { + ranges.push_back(TreeRange({node.splitval0, cur.end, node.childID})); + } + // <= side + if (node.properties[1] >= kNumStaticProperties) { + ranges.push_back( + TreeRange({node.splitvals[1], node.splitval0, node.childID + 2})); + ranges.push_back( + TreeRange({cur.begin, node.splitvals[1], node.childID + 3})); + } else { + ranges.push_back( + TreeRange({cur.begin, node.splitval0, node.childID + 2})); + } + } + return true; +} +// TODO(veluca): make cleaner interfaces. + +Status ValidateChannelDimensions(const Image &image, + const ModularOptions &options); + +Status ModularGenericDecompress(BitReader *br, Image &image, + GroupHeader *header, size_t group_id, + ModularOptions *options, + bool undo_transforms = true, + const Tree *tree = nullptr, + const ANSCode *code = nullptr, + const std::vector *ctx_map = nullptr, + bool allow_truncated_group = false); +} // namespace jxl + +#endif // LIB_JXL_MODULAR_ENCODING_ENCODING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/encoding/ma_common.h b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/ma_common.h new file mode 100644 index 0000000000..71b7847321 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/encoding/ma_common.h @@ -0,0 +1,28 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_ +#define LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_ + +#include + +namespace jxl { + +enum MATreeContext : size_t { + kSplitValContext = 0, + kPropertyContext = 1, + kPredictorContext = 2, + kOffsetContext = 3, + kMultiplierLogContext = 4, + kMultiplierBitsContext = 5, + + kNumTreeContexts = 6, +}; + +static constexpr size_t kMaxTreeSize = 1 << 22; + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_ENCODING_MA_COMMON_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.cc b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.cc new file mode 100644 index 0000000000..55b4af1cb3 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.cc @@ -0,0 +1,79 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/modular_image.h" + +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +void Image::undo_transforms(const weighted::Header &wp_header, + jxl::ThreadPool *pool) { + while (!transform.empty()) { + Transform t = transform.back(); + JXL_DEBUG_V(4, "Undoing transform"); + Status result = t.Inverse(*this, wp_header, pool); + if (result == false) { + JXL_NOTIFY_ERROR("Error while undoing transform."); + error = true; + return; + } + JXL_DEBUG_V(8, "Undoing transform: done"); + transform.pop_back(); + } +} + +Image::Image(size_t iw, size_t ih, int bitdepth, int nb_chans) + : w(iw), h(ih), bitdepth(bitdepth), nb_meta_channels(0), error(false) { + for (int i = 0; i < nb_chans; i++) channel.emplace_back(Channel(iw, ih)); +} + +Image::Image() : w(0), h(0), bitdepth(8), nb_meta_channels(0), error(true) {} + +Image &Image::operator=(Image &&other) noexcept { + w = other.w; + h = other.h; + bitdepth = other.bitdepth; + nb_meta_channels = other.nb_meta_channels; + error = other.error; + channel = std::move(other.channel); + transform = std::move(other.transform); + return *this; +} + +Image Image::clone() { + Image c(w, h, bitdepth, 0); + c.nb_meta_channels = nb_meta_channels; + c.error = error; + c.transform = transform; + for (Channel &ch : channel) { + Channel a(ch.w, ch.h, ch.hshift, ch.vshift); + CopyImageTo(ch.plane, &a.plane); + c.channel.push_back(std::move(a)); + } + return c; +} + +#if JXL_DEBUG_V_LEVEL >= 1 +std::string Image::DebugString() const { + std::ostringstream os; + os << w << "x" << h << ", depth: " << bitdepth; + if (!channel.empty()) { + os << ", channels:"; + for (size_t i = 0; i < channel.size(); ++i) { + os << " " << channel[i].w << "x" << channel[i].h + << "(shift: " << channel[i].hshift << "," << channel[i].vshift << ")"; + if (i < nb_meta_channels) os << "*"; + } + } + return os.str(); +} +#endif + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.h b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.h new file mode 100644 index 0000000000..3e9b5a8a08 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/modular_image.h @@ -0,0 +1,118 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_MODULAR_IMAGE_H_ +#define LIB_JXL_MODULAR_MODULAR_IMAGE_H_ + +#include +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { + +typedef int32_t pixel_type; // can use int16_t if it's only for 8-bit images. + // Need some wiggle room for YCoCg / Squeeze etc + +typedef int64_t pixel_type_w; + +namespace weighted { +struct Header; +} + +class Channel { + public: + jxl::Plane plane; + size_t w, h; + int hshift, vshift; // w ~= image.w >> hshift; h ~= image.h >> vshift + Channel(size_t iw, size_t ih, int hsh = 0, int vsh = 0) + : plane(iw, ih), w(iw), h(ih), hshift(hsh), vshift(vsh) {} + + Channel(const Channel& other) = delete; + Channel& operator=(const Channel& other) = delete; + + // Move assignment + Channel& operator=(Channel&& other) noexcept { + w = other.w; + h = other.h; + hshift = other.hshift; + vshift = other.vshift; + plane = std::move(other.plane); + return *this; + } + + // Move constructor + Channel(Channel&& other) noexcept = default; + + void shrink() { + if (plane.xsize() == w && plane.ysize() == h) return; + jxl::Plane resizedplane(w, h); + plane = std::move(resizedplane); + } + void shrink(int nw, int nh) { + w = nw; + h = nh; + shrink(); + } + + JXL_INLINE pixel_type* Row(const size_t y) { return plane.Row(y); } + JXL_INLINE const pixel_type* Row(const size_t y) const { + return plane.Row(y); + } +}; + +class Transform; + +class Image { + public: + // image data, transforms can dramatically change the number of channels and + // their semantics + std::vector channel; + // transforms that have been applied (and that have to be undone) + std::vector transform; + + // image dimensions (channels may have different dimensions due to transforms) + size_t w, h; + int bitdepth; + size_t nb_meta_channels; // first few channels might contain palette(s) + bool error; // true if a fatal error occurred, false otherwise + + Image(size_t iw, size_t ih, int bitdepth, int nb_chans); + Image(); + + Image(const Image& other) = delete; + Image& operator=(const Image& other) = delete; + + Image& operator=(Image&& other) noexcept; + Image(Image&& other) noexcept = default; + + bool empty() const { + for (const auto& ch : channel) { + if (ch.w && ch.h) return false; + } + return true; + } + + Image clone(); + + void undo_transforms(const weighted::Header& wp_header, + jxl::ThreadPool* pool = nullptr); + + std::string DebugString() const; +}; + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_MODULAR_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/options.h b/third-party/libjxl/libjxl/lib/jxl/modular/options.h new file mode 100644 index 0000000000..ce6596b912 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/options.h @@ -0,0 +1,117 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_OPTIONS_H_ +#define LIB_JXL_MODULAR_OPTIONS_H_ + +#include + +#include +#include + +namespace jxl { + +using PropertyVal = int32_t; +using Properties = std::vector; + +enum class Predictor : uint32_t { + Zero = 0, + Left = 1, + Top = 2, + Average0 = 3, + Select = 4, + Gradient = 5, + Weighted = 6, + TopRight = 7, + TopLeft = 8, + LeftLeft = 9, + Average1 = 10, + Average2 = 11, + Average3 = 12, + Average4 = 13, + // The following predictors are encoder-only. + Best = 14, // Best of Gradient and Weighted + Variable = + 15, // Find the best decision tree for predictors/predictor per row +}; + +constexpr size_t kNumModularPredictors = + static_cast(Predictor::Average4) + 1; +constexpr size_t kNumModularEncoderPredictors = + static_cast(Predictor::Variable) + 1; + +static constexpr ssize_t kNumStaticProperties = 2; // channel, group_id. + +using StaticPropRange = + std::array, kNumStaticProperties>; + +struct ModularMultiplierInfo { + StaticPropRange range; + uint32_t multiplier; +}; + +struct ModularOptions { + /// Used in both encode and decode: + + // Stop encoding/decoding when reaching a (non-meta) channel that has a + // dimension bigger than max_chan_size. + size_t max_chan_size = 0xFFFFFF; + + // Used during decoding for validation of transforms (sqeeezing) scheme. + size_t group_dim = 0x1FFFFFFF; + + /// Encode options: + // Fraction of pixels to look at to learn a MA tree + // Number of iterations to do to learn a MA tree + // (if zero there is no MA context model) + float nb_repeats = .5f; + + // Maximum number of (previous channel) properties to use in the MA trees + int max_properties = 0; // no previous channels + + // Alternative heuristic tweaks. + // Properties default to channel, group, weighted, gradient residual, W-NW, + // NW-N, N-NE, N-NN + std::vector splitting_heuristics_properties = {0, 1, 15, 9, + 10, 11, 12, 13}; + float splitting_heuristics_node_threshold = 96; + size_t max_property_values = 32; + + // Predictor to use for each channel. + Predictor predictor = static_cast(-1); + + int wp_mode = 0; + + float fast_decode_multiplier = 1.01f; + + // Forces the encoder to produce a tree that is compatible with the WP-only + // decode path (or with the no-wp path, or the gradient-only path). + enum class TreeMode { kGradientOnly, kWPOnly, kNoWP, kDefault }; + TreeMode wp_tree_mode = TreeMode::kDefault; + + // Skip fast paths in the encoder. + bool skip_encoder_fast_path = false; + + // Kind of tree to use. + // TODO(veluca): add tree kinds for JPEG recompression with CfL enabled, + // general AC metadata, different DC qualities, and others. + enum class TreeKind { + kTrivialTreeNoPredictor, + kLearn, + kJpegTranscodeACMeta, + kFalconACMeta, + kACMeta, + kWPFixedDC, + kGradientFixedDC, + }; + TreeKind tree_kind = TreeKind::kLearn; + + // Ignore the image and just pretend all tokens are zeroes + bool zero_tokens = false; +}; + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_OPTIONS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.cc new file mode 100644 index 0000000000..e400e15e98 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.cc @@ -0,0 +1,595 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_palette.h" + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/enc_transform.h" +#include "lib/jxl/modular/transform/palette.h" + +namespace jxl { + +namespace palette_internal { + +static constexpr bool kEncodeToHighQualityImplicitPalette = true; + +// Inclusive. +static constexpr int kMinImplicitPaletteIndex = -(2 * 72 - 1); + +float ColorDistance(const std::vector &JXL_RESTRICT a, + const std::vector &JXL_RESTRICT b) { + JXL_ASSERT(a.size() == b.size()); + float distance = 0; + float ave3 = 0; + if (a.size() >= 3) { + ave3 = (a[0] + b[0] + a[1] + b[1] + a[2] + b[2]) * (1.21f / 3.0f); + } + float sum_a = 0, sum_b = 0; + for (size_t c = 0; c < a.size(); ++c) { + const float difference = + static_cast(a[c]) - static_cast(b[c]); + float weight = c == 0 ? 3 : c == 1 ? 5 : 2; + if (c < 3 && (a[c] + b[c] >= ave3)) { + const float add_w[3] = { + 1.15, + 1.15, + 1.12, + }; + weight += add_w[c]; + if (c == 2 && ((a[2] + b[2]) < 1.22 * ave3)) { + weight -= 0.5; + } + } + distance += difference * difference * weight * weight; + const int sum_weight = c == 0 ? 3 : c == 1 ? 5 : 1; + sum_a += a[c] * sum_weight; + sum_b += b[c] * sum_weight; + } + distance *= 4; + float sum_difference = sum_a - sum_b; + distance += sum_difference * sum_difference; + return distance; +} + +static int QuantizeColorToImplicitPaletteIndex( + const std::vector &color, const int palette_size, + const int bit_depth, bool high_quality) { + int index = 0; + if (high_quality) { + int multiplier = 1; + for (size_t c = 0; c < color.size(); c++) { + int quantized = ((kLargeCube - 1) * color[c] + (1 << (bit_depth - 1))) / + ((1 << bit_depth) - 1); + JXL_ASSERT((quantized % kLargeCube) == quantized); + index += quantized * multiplier; + multiplier *= kLargeCube; + } + return index + palette_size + kLargeCubeOffset; + } else { + int multiplier = 1; + for (size_t c = 0; c < color.size(); c++) { + int value = color[c]; + value -= 1 << (std::max(0, bit_depth - 3)); + value = std::max(0, value); + int quantized = ((kLargeCube - 1) * value + (1 << (bit_depth - 1))) / + ((1 << bit_depth) - 1); + JXL_ASSERT((quantized % kLargeCube) == quantized); + if (quantized > kSmallCube - 1) { + quantized = kSmallCube - 1; + } + index += quantized * multiplier; + multiplier *= kSmallCube; + } + return index + palette_size; + } +} + +} // namespace palette_internal + +int RoundInt(int value, int div) { // symmetric rounding around 0 + if (value < 0) return -RoundInt(-value, div); + return (value + div / 2) / div; +} + +struct PaletteIterationData { + static constexpr int kMaxDeltas = 128; + bool final_run = false; + std::vector deltas[3]; + std::vector delta_distances; + std::vector frequent_deltas[3]; + + // Populates `frequent_deltas` with items from `deltas` based on frequencies + // and color distances. + void FindFrequentColorDeltas(int num_pixels, int bitdepth) { + using pixel_type_3d = std::array; + std::map delta_frequency_map; + pixel_type bucket_size = 3 << std::max(0, bitdepth - 8); + // Store frequency weighted by delta distance from quantized value. + for (size_t i = 0; i < deltas[0].size(); ++i) { + pixel_type_3d delta = { + {RoundInt(deltas[0][i], bucket_size), + RoundInt(deltas[1][i], bucket_size), + RoundInt(deltas[2][i], bucket_size)}}; // a basic form of clustering + if (delta[0] == 0 && delta[1] == 0 && delta[2] == 0) continue; + delta_frequency_map[delta] += sqrt(sqrt(delta_distances[i])); + } + + const float delta_distance_multiplier = 1.0f / num_pixels; + + // Weigh frequencies by magnitude and normalize. + for (auto &delta_frequency : delta_frequency_map) { + std::vector current_delta = {delta_frequency.first[0], + delta_frequency.first[1], + delta_frequency.first[2]}; + float delta_distance = + sqrt(palette_internal::ColorDistance({0, 0, 0}, current_delta)) + 1; + delta_frequency.second *= delta_distance * delta_distance_multiplier; + } + + // Sort by weighted frequency. + using pixel_type_3d_frequency = std::pair; + std::vector sorted_delta_frequency_map( + delta_frequency_map.begin(), delta_frequency_map.end()); + std::sort( + sorted_delta_frequency_map.begin(), sorted_delta_frequency_map.end(), + [](const pixel_type_3d_frequency &a, const pixel_type_3d_frequency &b) { + return a.second > b.second; + }); + + // Store the top deltas. + for (auto &delta_frequency : sorted_delta_frequency_map) { + if (frequent_deltas[0].size() >= kMaxDeltas) break; + // Number obtained by optimizing on jyrki31 corpus: + if (delta_frequency.second < 17) break; + for (int c = 0; c < 3; ++c) { + frequent_deltas[c].push_back(delta_frequency.first[c] * bucket_size); + } + } + } +}; + +Status FwdPaletteIteration(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t &nb_colors, uint32_t &nb_deltas, + bool ordered, bool lossy, Predictor &predictor, + const weighted::Header &wp_header, + PaletteIterationData &palette_iteration_data) { + JXL_QUIET_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c)); + JXL_ASSERT(begin_c >= input.nb_meta_channels); + uint32_t nb = end_c - begin_c + 1; + + size_t w = input.channel[begin_c].w; + size_t h = input.channel[begin_c].h; + + if (!lossy && nb == 1) { + // Channel palette special case + if (nb_colors == 0) return false; + std::vector lookup; + pixel_type minval, maxval; + compute_minmax(input.channel[begin_c], &minval, &maxval); + size_t lookup_table_size = + static_cast(maxval) - static_cast(minval) + 1; + if (lookup_table_size > palette_internal::kMaxPaletteLookupTableSize) { + // a lookup table would use too much memory, instead use a slower approach + // with std::set + std::set chpalette; + pixel_type idx = 0; + for (size_t y = 0; y < h; y++) { + const pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + const bool new_color = chpalette.insert(p[x]).second; + if (new_color) { + idx++; + if (idx > (int)nb_colors) return false; + } + } + } + JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx); + Channel pch(idx, 1); + pch.hshift = -1; + pch.vshift = -1; + nb_colors = idx; + idx = 0; + pixel_type *JXL_RESTRICT p_palette = pch.Row(0); + for (pixel_type p : chpalette) { + p_palette[idx++] = p; + } + for (size_t y = 0; y < h; y++) { + pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + for (idx = 0; p[x] != p_palette[idx] && idx < (int)nb_colors; idx++) { + } + JXL_DASSERT(idx < (int)nb_colors); + p[x] = idx; + } + } + predictor = Predictor::Zero; + input.nb_meta_channels++; + input.channel.insert(input.channel.begin(), std::move(pch)); + + return true; + } + lookup.resize(lookup_table_size, 0); + pixel_type idx = 0; + for (size_t y = 0; y < h; y++) { + const pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + if (lookup[p[x] - minval] == 0) { + lookup[p[x] - minval] = 1; + idx++; + if (idx > (int)nb_colors) return false; + } + } + } + JXL_DEBUG_V(6, "Channel %i uses only %i colors.", begin_c, idx); + Channel pch(idx, 1); + pch.hshift = -1; + pch.vshift = -1; + nb_colors = idx; + idx = 0; + pixel_type *JXL_RESTRICT p_palette = pch.Row(0); + for (size_t i = 0; i < lookup_table_size; i++) { + if (lookup[i]) { + p_palette[idx] = i + minval; + lookup[i] = idx; + idx++; + } + } + for (size_t y = 0; y < h; y++) { + pixel_type *p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) p[x] = lookup[p[x] - minval]; + } + predictor = Predictor::Zero; + input.nb_meta_channels++; + input.channel.insert(input.channel.begin(), std::move(pch)); + return true; + } + + Image quantized_input; + if (lossy) { + quantized_input = Image(w, h, input.bitdepth, nb); + for (size_t c = 0; c < nb; c++) { + CopyImageTo(input.channel[begin_c + c].plane, + &quantized_input.channel[c].plane); + } + } + + JXL_DEBUG_V( + 7, "Trying to represent channels %i-%i using at most a %i-color palette.", + begin_c, end_c, nb_colors); + nb_deltas = 0; + bool delta_used = false; + std::set> candidate_palette; + std::vector> candidate_palette_imageorder; + std::vector color(nb); + std::vector color_with_error(nb); + std::vector p_in(nb); + std::map, size_t> inv_palette; + + if (lossy) { + palette_iteration_data.FindFrequentColorDeltas(w * h, input.bitdepth); + nb_deltas = palette_iteration_data.frequent_deltas[0].size(); + + // Count color frequency for colors that make a cross. + std::map, size_t> color_freq_map; + for (size_t y = 1; y + 1 < h; y++) { + for (uint32_t c = 0; c < nb; c++) { + p_in[c] = input.channel[begin_c + c].Row(y); + } + for (size_t x = 1; x + 1 < w; x++) { + for (uint32_t c = 0; c < nb; c++) { + color[c] = p_in[c][x]; + } + int offsets[4][2] = {{1, 0}, {-1, 0}, {0, 1}, {0, -1}}; + bool makes_cross = true; + for (int i = 0; i < 4 && makes_cross; ++i) { + int dx = offsets[i][0]; + int dy = offsets[i][1]; + for (uint32_t c = 0; c < nb && makes_cross; c++) { + if (input.channel[begin_c + c].Row(y + dy)[x + dx] != color[c]) { + makes_cross = false; + } + } + } + if (makes_cross) color_freq_map[color] += 1; + } + } + // Add colors satisfying frequency condition to the palette. + constexpr float kImageFraction = 0.01f; + size_t color_frequency_lower_bound = 5 + input.h * input.w * kImageFraction; + for (const auto &color_freq : color_freq_map) { + if (color_freq.second > color_frequency_lower_bound) { + candidate_palette.insert(color_freq.first); + candidate_palette_imageorder.push_back(color_freq.first); + } + } + } + + for (size_t y = 0; y < h; y++) { + for (uint32_t c = 0; c < nb; c++) { + p_in[c] = input.channel[begin_c + c].Row(y); + } + for (size_t x = 0; x < w; x++) { + if (lossy && candidate_palette.size() >= nb_colors) break; + for (uint32_t c = 0; c < nb; c++) { + color[c] = p_in[c][x]; + } + const bool new_color = candidate_palette.insert(color).second; + if (new_color) { + candidate_palette_imageorder.push_back(color); + } + if (candidate_palette.size() > nb_colors) { + return false; // too many colors + } + } + } + + nb_colors = nb_deltas + candidate_palette.size(); + JXL_DEBUG_V(6, "Channels %i-%i can be represented using a %i-color palette.", + begin_c, end_c, nb_colors); + + Channel pch(nb_colors, nb); + pch.hshift = -1; + pch.vshift = -1; + pixel_type *JXL_RESTRICT p_palette = pch.Row(0); + intptr_t onerow = pch.plane.PixelsPerRow(); + intptr_t onerow_image = input.channel[begin_c].plane.PixelsPerRow(); + const int bit_depth = std::min(input.bitdepth, 24); + + if (lossy) { + for (uint32_t i = 0; i < nb_deltas; i++) { + for (size_t c = 0; c < 3; c++) { + p_palette[c * onerow + i] = + palette_iteration_data.frequent_deltas[c][i]; + } + } + } + + int x = 0; + if (ordered && nb >= 3) { + JXL_DEBUG_V(7, "Palette of %i colors, using luma order", nb_colors); + // sort on luma (multiplied by alpha if available) + std::sort(candidate_palette_imageorder.begin(), + candidate_palette_imageorder.end(), + [](std::vector ap, std::vector bp) { + float ay, by; + ay = (0.299f * ap[0] + 0.587f * ap[1] + 0.114f * ap[2] + 0.1f); + if (ap.size() > 3) ay *= 1.f + ap[3]; + by = (0.299f * bp[0] + 0.587f * bp[1] + 0.114f * bp[2] + 0.1f); + if (bp.size() > 3) by *= 1.f + bp[3]; + return ay < by; + }); + } else { + JXL_DEBUG_V(7, "Palette of %i colors, using image order", nb_colors); + } + for (auto pcol : candidate_palette_imageorder) { + JXL_DEBUG_V(9, " Color %i : ", x); + for (size_t i = 0; i < nb; i++) { + p_palette[nb_deltas + i * onerow + x] = pcol[i]; + JXL_DEBUG_V(9, "%i ", pcol[i]); + } + inv_palette[pcol] = x; + x++; + } + std::vector wp_states; + for (size_t c = 0; c < nb; c++) { + wp_states.emplace_back(wp_header, w, h); + } + std::vector p_quant(nb); + // Three rows of error for dithering: y to y + 2. + // Each row has two pixels of padding in the ends, which is + // beneficial for both precision and encoding speed. + std::vector> error_row[3]; + if (lossy) { + for (int i = 0; i < 3; ++i) { + error_row[i].resize(nb); + for (size_t c = 0; c < nb; ++c) { + error_row[i][c].resize(w + 4); + } + } + } + for (size_t y = 0; y < h; y++) { + for (size_t c = 0; c < nb; c++) { + p_in[c] = input.channel[begin_c + c].Row(y); + if (lossy) p_quant[c] = quantized_input.channel[c].Row(y); + } + pixel_type *JXL_RESTRICT p = input.channel[begin_c].Row(y); + for (size_t x = 0; x < w; x++) { + int index; + if (!lossy) { + for (size_t c = 0; c < nb; c++) color[c] = p_in[c][x]; + index = inv_palette[color]; + } else { + int best_index = 0; + bool best_is_delta = false; + float best_distance = std::numeric_limits::infinity(); + std::vector best_val(nb, 0); + std::vector ideal_residual(nb, 0); + std::vector quantized_val(nb); + std::vector predictions(nb); + static const double kDiffusionMultiplier[] = {0.55, 0.75}; + for (int diffusion_index = 0; diffusion_index < 2; ++diffusion_index) { + for (size_t c = 0; c < nb; c++) { + color_with_error[c] = + p_in[c][x] + palette_iteration_data.final_run * + kDiffusionMultiplier[diffusion_index] * + error_row[0][c][x + 2]; + color[c] = Clamp1(lroundf(color_with_error[c]), 0l, + (1l << input.bitdepth) - 1); + } + + for (size_t c = 0; c < nb; ++c) { + predictions[c] = PredictNoTreeWP(w, p_quant[c] + x, onerow_image, x, + y, predictor, &wp_states[c]) + .guess; + } + const auto TryIndex = [&](const int index) { + for (size_t c = 0; c < nb; c++) { + quantized_val[c] = palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/nb_colors, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + if (index < static_cast(nb_deltas)) { + quantized_val[c] += predictions[c]; + } + } + const float color_distance = + 32.0 / (1LL << std::max(0, 2 * (bit_depth - 8))) * + palette_internal::ColorDistance(color_with_error, + quantized_val); + float index_penalty = 0; + if (index == -1) { + index_penalty = -124; + } else if (index < 0) { + index_penalty = -2 * index; + } else if (index < static_cast(nb_deltas)) { + index_penalty = 250; + } else if (index < static_cast(nb_colors)) { + index_penalty = 150; + } else if (index < static_cast(nb_colors) + + palette_internal::kLargeCubeOffset) { + index_penalty = 70; + } else { + index_penalty = 256; + } + const float distance = color_distance + index_penalty; + if (distance < best_distance) { + best_distance = distance; + best_index = index; + best_is_delta = index < static_cast(nb_deltas); + best_val.swap(quantized_val); + for (size_t c = 0; c < nb; ++c) { + ideal_residual[c] = color_with_error[c] - predictions[c]; + } + } + }; + for (index = palette_internal::kMinImplicitPaletteIndex; + index < static_cast(nb_colors); index++) { + TryIndex(index); + } + TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex( + color, nb_colors, bit_depth, + /*high_quality=*/false)); + if (palette_internal::kEncodeToHighQualityImplicitPalette) { + TryIndex(palette_internal::QuantizeColorToImplicitPaletteIndex( + color, nb_colors, bit_depth, + /*high_quality=*/true)); + } + } + index = best_index; + delta_used |= best_is_delta; + if (!palette_iteration_data.final_run) { + for (size_t c = 0; c < 3; ++c) { + palette_iteration_data.deltas[c].push_back(ideal_residual[c]); + } + palette_iteration_data.delta_distances.push_back(best_distance); + } + + for (size_t c = 0; c < nb; ++c) { + wp_states[c].UpdateErrors(best_val[c], x, y, w); + p_quant[c][x] = best_val[c]; + } + float len_error = 0; + for (size_t c = 0; c < nb; ++c) { + float local_error = color_with_error[c] - best_val[c]; + len_error += local_error * local_error; + } + len_error = sqrt(len_error); + float modulate = 1.0; + int len_limit = 38 << std::max(0, bit_depth - 8); + if (len_error > len_limit) { + modulate *= len_limit / len_error; + } + for (size_t c = 0; c < nb; ++c) { + float total_error = (color_with_error[c] - best_val[c]); + + // If the neighboring pixels have some error in the opposite + // direction of total_error, cancel some or all of it out before + // spreading among them. + constexpr int offsets[12][2] = {{1, 2}, {0, 3}, {0, 4}, {1, 1}, + {1, 3}, {2, 2}, {1, 0}, {1, 4}, + {2, 1}, {2, 3}, {2, 0}, {2, 4}}; + float total_available = 0; + for (int i = 0; i < 11; ++i) { + const int row = offsets[i][0]; + const int col = offsets[i][1]; + if (std::signbit(error_row[row][c][x + col]) != + std::signbit(total_error)) { + total_available += error_row[row][c][x + col]; + } + } + float weight = + std::abs(total_error) / (std::abs(total_available) + 1e-3); + weight = std::min(weight, 1.0f); + for (int i = 0; i < 11; ++i) { + const int row = offsets[i][0]; + const int col = offsets[i][1]; + if (std::signbit(error_row[row][c][x + col]) != + std::signbit(total_error)) { + total_error += weight * error_row[row][c][x + col]; + error_row[row][c][x + col] *= (1 - weight); + } + } + total_error *= modulate; + const float remaining_error = (1.0f / 14.) * total_error; + error_row[0][c][x + 3] += 2 * remaining_error; + error_row[0][c][x + 4] += remaining_error; + error_row[1][c][x + 0] += remaining_error; + for (int i = 0; i < 5; ++i) { + error_row[1][c][x + i] += remaining_error; + error_row[2][c][x + i] += remaining_error; + } + } + } + if (palette_iteration_data.final_run) p[x] = index; + } + if (lossy) { + for (size_t c = 0; c < nb; ++c) { + error_row[0][c].swap(error_row[1][c]); + error_row[1][c].swap(error_row[2][c]); + std::fill(error_row[2][c].begin(), error_row[2][c].end(), 0.f); + } + } + } + if (!delta_used) { + predictor = Predictor::Zero; + } + if (palette_iteration_data.final_run) { + input.nb_meta_channels++; + input.channel.erase(input.channel.begin() + begin_c + 1, + input.channel.begin() + end_c + 1); + input.channel.insert(input.channel.begin(), std::move(pch)); + } + nb_colors -= nb_deltas; + return true; +} + +Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered, + bool lossy, Predictor &predictor, + const weighted::Header &wp_header) { + PaletteIterationData palette_iteration_data; + uint32_t nb_colors_orig = nb_colors; + uint32_t nb_deltas_orig = nb_deltas; + // preprocessing pass in case of lossy palette + if (lossy && input.bitdepth >= 8) { + JXL_RETURN_IF_ERROR(FwdPaletteIteration( + input, begin_c, end_c, nb_colors_orig, nb_deltas_orig, ordered, lossy, + predictor, wp_header, palette_iteration_data)); + } + palette_iteration_data.final_run = true; + return FwdPaletteIteration(input, begin_c, end_c, nb_colors, nb_deltas, + ordered, lossy, predictor, wp_header, + palette_iteration_data); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.h new file mode 100644 index 0000000000..0f3d66825b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_palette.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_ + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +Status FwdPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t &nb_colors, uint32_t &nb_deltas, bool ordered, + bool lossy, Predictor &predictor, + const weighted::Header &wp_header); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_PALETTE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.cc new file mode 100644 index 0000000000..050563a3c2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.cc @@ -0,0 +1,73 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_rct.h" + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels + +namespace jxl { + +Status FwdRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { + JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2)); + if (rct_type == 0) { // noop + return false; + } + // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR + int permutation = rct_type / 7; + // 0-5 values have the low bit corresponding to Third and the high bits + // corresponding to Second. 6 corresponds to YCoCg. + // + // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird + // + // Third: 0=nop, 1=SubtractFirst + int custom = rct_type % 7; + size_t m = begin_c; + size_t w = input.channel[m + 0].w; + size_t h = input.channel[m + 0].h; + int second = (custom % 7) >> 1; + int third = (custom % 7) & 1; + const auto do_rct = [&](const int y, const int thread) { + const pixel_type* in0 = input.channel[m + (permutation % 3)].Row(y); + const pixel_type* in1 = + input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); + const pixel_type* in2 = + input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); + pixel_type* out0 = input.channel[m].Row(y); + pixel_type* out1 = input.channel[m + 1].Row(y); + pixel_type* out2 = input.channel[m + 2].Row(y); + if (custom == 6) { + for (size_t x = 0; x < w; x++) { + pixel_type R = in0[x]; + pixel_type G = in1[x]; + pixel_type B = in2[x]; + out1[x] = R - B; + pixel_type tmp = B + (out1[x] >> 1); + out2[x] = G - tmp; + out0[x] = tmp + (out2[x] >> 1); + } + } else { + for (size_t x = 0; x < w; x++) { + pixel_type First = in0[x]; + pixel_type Second = in1[x]; + pixel_type Third = in2[x]; + if (second == 1) { + Second = Second - First; + } else if (second == 2) { + Second = Second - ((First + Third) >> 1); + } + if (third) Third = Third - First; + out0[x] = First; + out1[x] = Second; + out2[x] = Third; + } + } + }; + return RunOnPool(pool, 0, h, ThreadPool::NoInit, do_rct, "FwdRCT"); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.h new file mode 100644 index 0000000000..cb5a193c8d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_rct.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_ + +#include "lib/jxl/modular/modular_image.h" + +namespace jxl { + +Status FwdRCT(Image &input, size_t begin_c, size_t rct_type, ThreadPool *pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_RCT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.cc new file mode 100644 index 0000000000..dfd90cde68 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.cc @@ -0,0 +1,141 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_squeeze.h" + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/squeeze.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +void FwdHSqueeze(Image &input, int c, int rc) { + const Channel &chin = input.channel[c]; + + JXL_DEBUG_V(4, "Doing horizontal squeeze of channel %i to new channel %i", c, + rc); + + Channel chout((chin.w + 1) / 2, chin.h, chin.hshift + 1, chin.vshift); + Channel chout_residual(chin.w - chout.w, chout.h, chin.hshift + 1, + chin.vshift); + + for (size_t y = 0; y < chout.h; y++) { + const pixel_type *JXL_RESTRICT p_in = chin.Row(y); + pixel_type *JXL_RESTRICT p_out = chout.Row(y); + pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y); + for (size_t x = 0; x < chout_residual.w; x++) { + pixel_type A = p_in[x * 2]; + pixel_type B = p_in[x * 2 + 1]; + pixel_type avg = (A + B + (A > B)) >> 1; + p_out[x] = avg; + + pixel_type diff = A - B; + + pixel_type next_avg = avg; + if (x + 1 < chout_residual.w) { + next_avg = (p_in[x * 2 + 2] + p_in[x * 2 + 3] + + (p_in[x * 2 + 2] > p_in[x * 2 + 3])) >> + 1; // which will be chout.value(y,x+1) + } else if (chin.w & 1) + next_avg = p_in[x * 2 + 2]; + pixel_type left = (x > 0 ? p_in[x * 2 - 1] : avg); + pixel_type tendency = SmoothTendency(left, avg, next_avg); + + p_res[x] = diff - tendency; + } + if (chin.w & 1) { + int x = chout.w - 1; + p_out[x] = p_in[x * 2]; + } + } + input.channel[c] = std::move(chout); + input.channel.insert(input.channel.begin() + rc, std::move(chout_residual)); +} + +void FwdVSqueeze(Image &input, int c, int rc) { + const Channel &chin = input.channel[c]; + + JXL_DEBUG_V(4, "Doing vertical squeeze of channel %i to new channel %i", c, + rc); + + Channel chout(chin.w, (chin.h + 1) / 2, chin.hshift, chin.vshift + 1); + Channel chout_residual(chin.w, chin.h - chout.h, chin.hshift, + chin.vshift + 1); + intptr_t onerow_in = chin.plane.PixelsPerRow(); + for (size_t y = 0; y < chout_residual.h; y++) { + const pixel_type *JXL_RESTRICT p_in = chin.Row(y * 2); + pixel_type *JXL_RESTRICT p_out = chout.Row(y); + pixel_type *JXL_RESTRICT p_res = chout_residual.Row(y); + for (size_t x = 0; x < chout.w; x++) { + pixel_type A = p_in[x]; + pixel_type B = p_in[x + onerow_in]; + pixel_type avg = (A + B + (A > B)) >> 1; + p_out[x] = avg; + + pixel_type diff = A - B; + + pixel_type next_avg = avg; + if (y + 1 < chout_residual.h) { + next_avg = (p_in[x + 2 * onerow_in] + p_in[x + 3 * onerow_in] + + (p_in[x + 2 * onerow_in] > p_in[x + 3 * onerow_in])) >> + 1; // which will be chout.value(y+1,x) + } else if (chin.h & 1) { + next_avg = p_in[x + 2 * onerow_in]; + } + pixel_type top = + (y > 0 ? p_in[static_cast(x) - onerow_in] : avg); + pixel_type tendency = SmoothTendency(top, avg, next_avg); + + p_res[x] = diff - tendency; + } + } + if (chin.h & 1) { + size_t y = chout.h - 1; + const pixel_type *p_in = chin.Row(y * 2); + pixel_type *p_out = chout.Row(y); + for (size_t x = 0; x < chout.w; x++) { + p_out[x] = p_in[x]; + } + } + input.channel[c] = std::move(chout); + input.channel.insert(input.channel.begin() + rc, std::move(chout_residual)); +} + +Status FwdSqueeze(Image &input, std::vector parameters, + ThreadPool *pool) { + if (parameters.empty()) { + DefaultSqueezeParameters(¶meters, input); + } + // if nothing to do, don't do squeeze + if (parameters.empty()) return false; + for (size_t i = 0; i < parameters.size(); i++) { + JXL_RETURN_IF_ERROR( + CheckMetaSqueezeParams(parameters[i], input.channel.size())); + bool horizontal = parameters[i].horizontal; + bool in_place = parameters[i].in_place; + uint32_t beginc = parameters[i].begin_c; + uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1; + uint32_t offset; + if (in_place) { + offset = endc + 1; + } else { + offset = input.channel.size(); + } + for (uint32_t c = beginc; c <= endc; c++) { + if (horizontal) { + FwdHSqueeze(input, c, offset + c - beginc); + } else { + FwdVSqueeze(input, c, offset + c - beginc); + } + } + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.h new file mode 100644 index 0000000000..39b001017b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_squeeze.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_ + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +Status FwdSqueeze(Image &input, std::vector parameters, + ThreadPool *pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_SQUEEZE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.cc new file mode 100644 index 0000000000..bdaaf9f87e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.cc @@ -0,0 +1,46 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/enc_transform.h" + +#include "lib/jxl/modular/transform/enc_palette.h" +#include "lib/jxl/modular/transform/enc_rct.h" +#include "lib/jxl/modular/transform/enc_squeeze.h" + +namespace jxl { + +Status TransformForward(Transform &t, Image &input, + const weighted::Header &wp_header, ThreadPool *pool) { + switch (t.id) { + case TransformId::kRCT: + return FwdRCT(input, t.begin_c, t.rct_type, pool); + case TransformId::kSqueeze: + return FwdSqueeze(input, t.squeezes, pool); + case TransformId::kPalette: + return FwdPalette(input, t.begin_c, t.begin_c + t.num_c - 1, t.nb_colors, + t.nb_deltas, t.ordered_palette, t.lossy_palette, + t.predictor, wp_header); + default: + return JXL_FAILURE("Unknown transformation (ID=%u)", + static_cast(t.id)); + } +} + +void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max) { + pixel_type realmin = std::numeric_limits::max(); + pixel_type realmax = std::numeric_limits::min(); + for (size_t y = 0; y < ch.h; y++) { + const pixel_type *JXL_RESTRICT p = ch.Row(y); + for (size_t x = 0; x < ch.w; x++) { + if (p[x] < realmin) realmin = p[x]; + if (p[x] > realmax) realmax = p[x]; + } + } + + if (min) *min = realmin; + if (max) *max = realmax; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.h new file mode 100644 index 0000000000..07659e1b0a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/enc_transform.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_ +#define LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_ + +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +namespace jxl { + +Status TransformForward(Transform &t, Image &input, + const weighted::Header &wp_header, ThreadPool *pool); + +void compute_minmax(const Channel &ch, pixel_type *min, pixel_type *max); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_ENC_TRANSFORM_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.cc new file mode 100644 index 0000000000..bffbacf160 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.cc @@ -0,0 +1,177 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/palette.h" + +namespace jxl { + +Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors, + uint32_t nb_deltas, Predictor predictor, + const weighted::Header &wp_header, ThreadPool *pool) { + if (input.nb_meta_channels < 1) { + return JXL_FAILURE("Error: Palette transform without palette."); + } + std::atomic num_errors{0}; + int nb = input.channel[0].h; + uint32_t c0 = begin_c + 1; + if (c0 >= input.channel.size()) { + return JXL_FAILURE("Channel is out of range."); + } + size_t w = input.channel[c0].w; + size_t h = input.channel[c0].h; + if (nb < 1) return JXL_FAILURE("Corrupted transforms"); + for (int i = 1; i < nb; i++) { + input.channel.insert( + input.channel.begin() + c0 + 1, + Channel(w, h, input.channel[c0].hshift, input.channel[c0].vshift)); + } + const Channel &palette = input.channel[0]; + const pixel_type *JXL_RESTRICT p_palette = input.channel[0].Row(0); + intptr_t onerow = input.channel[0].plane.PixelsPerRow(); + intptr_t onerow_image = input.channel[c0].plane.PixelsPerRow(); + const int bit_depth = std::min(input.bitdepth, 24); + + if (w == 0) { + // Nothing to do. + // Avoid touching "empty" channels with non-zero height. + } else if (nb_deltas == 0 && predictor == Predictor::Zero) { + if (nb == 1) { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, h, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + pixel_type *p = input.channel[c0].Row(y); + for (size_t x = 0; x < w; x++) { + const int index = Clamp1(p[x], 0, (pixel_type)palette.w - 1); + p[x] = palette_internal::GetPaletteValue( + p_palette, index, /*c=*/0, + /*palette_size=*/palette.w, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + } + }, + "UndoChannelPalette")); + } else { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, h, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + std::vector p_out(nb); + const pixel_type *p_index = input.channel[c0].Row(y); + for (int c = 0; c < nb; c++) + p_out[c] = input.channel[c0 + c].Row(y); + for (size_t x = 0; x < w; x++) { + const int index = p_index[x]; + for (int c = 0; c < nb; c++) { + p_out[c][x] = palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/palette.w, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + } + } + }, + "UndoPalette")); + } + } else { + // Parallelized per channel. + ImageI indices = std::move(input.channel[c0].plane); + input.channel[c0].plane = ImageI(indices.xsize(), indices.ysize()); + if (predictor == Predictor::Weighted) { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, nb, ThreadPool::NoInit, + [&](const uint32_t c, size_t /* thread */) { + Channel &channel = input.channel[c0 + c]; + weighted::State wp_state(wp_header, channel.w, channel.h); + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT p = channel.Row(y); + const pixel_type *JXL_RESTRICT idx = indices.Row(y); + for (size_t x = 0; x < channel.w; x++) { + int index = idx[x]; + pixel_type_w val = 0; + const pixel_type palette_entry = + palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/palette.w, /*onerow=*/onerow, + /*bit_depth=*/bit_depth); + if (index < static_cast(nb_deltas)) { + PredictionResult pred = + PredictNoTreeWP(channel.w, p + x, onerow_image, x, y, + predictor, &wp_state); + val = pred.guess + palette_entry; + } else { + val = palette_entry; + } + p[x] = val; + wp_state.UpdateErrors(p[x], x, y, channel.w); + } + } + }, + "UndoDeltaPaletteWP")); + } else { + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, nb, ThreadPool::NoInit, + [&](const uint32_t c, size_t /* thread */) { + Channel &channel = input.channel[c0 + c]; + for (size_t y = 0; y < channel.h; y++) { + pixel_type *JXL_RESTRICT p = channel.Row(y); + const pixel_type *JXL_RESTRICT idx = indices.Row(y); + for (size_t x = 0; x < channel.w; x++) { + int index = idx[x]; + pixel_type_w val = 0; + const pixel_type palette_entry = + palette_internal::GetPaletteValue( + p_palette, index, /*c=*/c, + /*palette_size=*/palette.w, + /*onerow=*/onerow, /*bit_depth=*/bit_depth); + if (index < static_cast(nb_deltas)) { + PredictionResult pred = PredictNoTreeNoWP( + channel.w, p + x, onerow_image, x, y, predictor); + val = pred.guess + palette_entry; + } else { + val = palette_entry; + } + p[x] = val; + } + } + }, + "UndoDeltaPaletteNoWP")); + } + } + if (c0 >= input.nb_meta_channels) { + // Palette was done on normal channels + input.nb_meta_channels--; + } else { + // Palette was done on metachannels + JXL_ASSERT(static_cast(input.nb_meta_channels) >= 2 - nb); + input.nb_meta_channels -= 2 - nb; + JXL_ASSERT(begin_c + nb - 1 < input.nb_meta_channels); + } + input.channel.erase(input.channel.begin(), input.channel.begin() + 1); + return num_errors.load(std::memory_order_relaxed) == 0; +} + +Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t nb_colors, uint32_t nb_deltas, bool lossy) { + JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, end_c)); + + size_t nb = end_c - begin_c + 1; + if (begin_c >= input.nb_meta_channels) { + // Palette was done on normal channels + input.nb_meta_channels++; + } else { + // Palette was done on metachannels + JXL_ASSERT(end_c < input.nb_meta_channels); + // we remove nb-1 metachannels and add one + input.nb_meta_channels += 2 - nb; + } + input.channel.erase(input.channel.begin() + begin_c + 1, + input.channel.begin() + end_c + 1); + Channel pch(nb_colors + nb_deltas, nb); + pch.hshift = -1; + pch.vshift = -1; + input.channel.insert(input.channel.begin(), std::move(pch)); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.h new file mode 100644 index 0000000000..cc0f67960b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/palette.h @@ -0,0 +1,129 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_ + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels + +namespace jxl { + +namespace palette_internal { + +static constexpr int kMaxPaletteLookupTableSize = 1 << 16; + +static constexpr int kRgbChannels = 3; + +// 5x5x5 color cube for the larger cube. +static constexpr int kLargeCube = 5; + +// Smaller interleaved color cube to fill the holes of the larger cube. +static constexpr int kSmallCube = 4; +static constexpr int kSmallCubeBits = 2; +// kSmallCube ** 3 +static constexpr int kLargeCubeOffset = kSmallCube * kSmallCube * kSmallCube; + +static inline pixel_type Scale(uint64_t value, uint64_t bit_depth, + uint64_t denom) { + // return (value * ((static_cast(1) << bit_depth) - 1)) / denom; + // We only call this function with kSmallCube or kLargeCube - 1 as denom, + // allowing us to avoid a division here. + JXL_ASSERT(denom == 4); + return (value * ((static_cast(1) << bit_depth) - 1)) >> 2; +} + +// The purpose of this function is solely to extend the interpretation of +// palette indices to implicit values. If index < nb_deltas, indicating that the +// result is a delta palette entry, it is the responsibility of the caller to +// treat it as such. +static JXL_MAYBE_UNUSED pixel_type +GetPaletteValue(const pixel_type *const palette, int index, const size_t c, + const int palette_size, const int onerow, const int bit_depth) { + if (index < 0) { + static constexpr std::array, 72> kDeltaPalette = { + { + {{0, 0, 0}}, {{4, 4, 4}}, {{11, 0, 0}}, + {{0, 0, -13}}, {{0, -12, 0}}, {{-10, -10, -10}}, + {{-18, -18, -18}}, {{-27, -27, -27}}, {{-18, -18, 0}}, + {{0, 0, -32}}, {{-32, 0, 0}}, {{-37, -37, -37}}, + {{0, -32, -32}}, {{24, 24, 45}}, {{50, 50, 50}}, + {{-45, -24, -24}}, {{-24, -45, -45}}, {{0, -24, -24}}, + {{-34, -34, 0}}, {{-24, 0, -24}}, {{-45, -45, -24}}, + {{64, 64, 64}}, {{-32, 0, -32}}, {{0, -32, 0}}, + {{-32, 0, 32}}, {{-24, -45, -24}}, {{45, 24, 45}}, + {{24, -24, -45}}, {{-45, -24, 24}}, {{80, 80, 80}}, + {{64, 0, 0}}, {{0, 0, -64}}, {{0, -64, -64}}, + {{-24, -24, 45}}, {{96, 96, 96}}, {{64, 64, 0}}, + {{45, -24, -24}}, {{34, -34, 0}}, {{112, 112, 112}}, + {{24, -45, -45}}, {{45, 45, -24}}, {{0, -32, 32}}, + {{24, -24, 45}}, {{0, 96, 96}}, {{45, -24, 24}}, + {{24, -45, -24}}, {{-24, -45, 24}}, {{0, -64, 0}}, + {{96, 0, 0}}, {{128, 128, 128}}, {{64, 0, 64}}, + {{144, 144, 144}}, {{96, 96, 0}}, {{-36, -36, 36}}, + {{45, -24, -45}}, {{45, -45, -24}}, {{0, 0, -96}}, + {{0, 128, 128}}, {{0, 96, 0}}, {{45, 24, -45}}, + {{-128, 0, 0}}, {{24, -45, 24}}, {{-45, 24, -45}}, + {{64, 0, -64}}, {{64, -64, -64}}, {{96, 0, 96}}, + {{45, -45, 24}}, {{24, 45, -45}}, {{64, 64, -64}}, + {{128, 128, 0}}, {{0, 0, -128}}, {{-24, 45, -45}}, + }}; + if (c >= kRgbChannels) { + return 0; + } + // Do not open the brackets, otherwise INT32_MIN negation could overflow. + index = -(index + 1); + index %= 1 + 2 * (kDeltaPalette.size() - 1); + static constexpr int kMultiplier[] = {-1, 1}; + pixel_type result = + kDeltaPalette[((index + 1) >> 1)][c] * kMultiplier[index & 1]; + if (bit_depth > 8) { + result *= static_cast(1) << (bit_depth - 8); + } + return result; + } else if (palette_size <= index && index < palette_size + kLargeCubeOffset) { + if (c >= kRgbChannels) return 0; + index -= palette_size; + index >>= c * kSmallCubeBits; + return Scale(index % kSmallCube, bit_depth, kSmallCube) + + (1 << (std::max(0, bit_depth - 3))); + } else if (palette_size + kLargeCubeOffset <= index) { + if (c >= kRgbChannels) return 0; + index -= palette_size + kLargeCubeOffset; + // TODO(eustas): should we take care of ambiguity created by + // index >= kLargeCube ** 3 ? + switch (c) { + case 0: + break; + case 1: + index /= kLargeCube; + break; + case 2: + index /= kLargeCube * kLargeCube; + break; + } + return Scale(index % kLargeCube, bit_depth, kLargeCube - 1); + } + return palette[c * onerow + static_cast(index)]; +} + +} // namespace palette_internal + +Status InvPalette(Image &input, uint32_t begin_c, uint32_t nb_colors, + uint32_t nb_deltas, Predictor predictor, + const weighted::Header &wp_header, ThreadPool *pool); + +Status MetaPalette(Image &input, uint32_t begin_c, uint32_t end_c, + uint32_t nb_colors, uint32_t nb_deltas, bool lossy); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_PALETTE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.cc new file mode 100644 index 0000000000..f3002a5ac3 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.cc @@ -0,0 +1,153 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/rct.h" +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/rct.cc" +#include +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; + +template +void InvRCTRow(const pixel_type* in0, const pixel_type* in1, + const pixel_type* in2, pixel_type* out0, pixel_type* out1, + pixel_type* out2, size_t w) { + static_assert(transform_type >= 0 && transform_type < 7, + "Invalid transform type"); + int second = transform_type >> 1; + int third = transform_type & 1; + + size_t x = 0; + const HWY_FULL(pixel_type) d; + const size_t N = Lanes(d); + for (; x + N - 1 < w; x += N) { + if (transform_type == 6) { + auto Y = Load(d, in0 + x); + auto Co = Load(d, in1 + x); + auto Cg = Load(d, in2 + x); + Y = Sub(Y, ShiftRight<1>(Cg)); + auto G = Add(Cg, Y); + Y = Sub(Y, ShiftRight<1>(Co)); + auto R = Add(Y, Co); + Store(R, d, out0 + x); + Store(G, d, out1 + x); + Store(Y, d, out2 + x); + } else { + auto First = Load(d, in0 + x); + auto Second = Load(d, in1 + x); + auto Third = Load(d, in2 + x); + if (third) Third = Add(Third, First); + if (second == 1) { + Second = Add(Second, First); + } else if (second == 2) { + Second = Add(Second, ShiftRight<1>(Add(First, Third))); + } + Store(First, d, out0 + x); + Store(Second, d, out1 + x); + Store(Third, d, out2 + x); + } + } + for (; x < w; x++) { + if (transform_type == 6) { + pixel_type Y = in0[x]; + pixel_type Co = in1[x]; + pixel_type Cg = in2[x]; + pixel_type tmp = PixelAdd(Y, -(Cg >> 1)); + pixel_type G = PixelAdd(Cg, tmp); + pixel_type B = PixelAdd(tmp, -(Co >> 1)); + pixel_type R = PixelAdd(B, Co); + out0[x] = R; + out1[x] = G; + out2[x] = B; + } else { + pixel_type First = in0[x]; + pixel_type Second = in1[x]; + pixel_type Third = in2[x]; + if (third) Third = PixelAdd(Third, First); + if (second == 1) { + Second = PixelAdd(Second, First); + } else if (second == 2) { + Second = PixelAdd(Second, (PixelAdd(First, Third) >> 1)); + } + out0[x] = First; + out1[x] = Second; + out2[x] = Third; + } + } +} + +Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { + JXL_RETURN_IF_ERROR(CheckEqualChannels(input, begin_c, begin_c + 2)); + size_t m = begin_c; + Channel& c0 = input.channel[m + 0]; + size_t w = c0.w; + size_t h = c0.h; + if (rct_type == 0) { // noop + return true; + } + // Permutation: 0=RGB, 1=GBR, 2=BRG, 3=RBG, 4=GRB, 5=BGR + int permutation = rct_type / 7; + JXL_CHECK(permutation < 6); + // 0-5 values have the low bit corresponding to Third and the high bits + // corresponding to Second. 6 corresponds to YCoCg. + // + // Second: 0=nop, 1=SubtractFirst, 2=SubtractAvgFirstThird + // + // Third: 0=nop, 1=SubtractFirst + int custom = rct_type % 7; + // Special case: permute-only. Swap channels around. + if (custom == 0) { + Channel ch0 = std::move(input.channel[m]); + Channel ch1 = std::move(input.channel[m + 1]); + Channel ch2 = std::move(input.channel[m + 2]); + input.channel[m + (permutation % 3)] = std::move(ch0); + input.channel[m + ((permutation + 1 + permutation / 3) % 3)] = + std::move(ch1); + input.channel[m + ((permutation + 2 - permutation / 3) % 3)] = + std::move(ch2); + return true; + } + constexpr decltype(&InvRCTRow<0>) inv_rct_row[] = { + InvRCTRow<0>, InvRCTRow<1>, InvRCTRow<2>, InvRCTRow<3>, + InvRCTRow<4>, InvRCTRow<5>, InvRCTRow<6>}; + JXL_RETURN_IF_ERROR(RunOnPool( + pool, 0, h, ThreadPool::NoInit, + [&](const uint32_t task, size_t /* thread */) { + const size_t y = task; + const pixel_type* in0 = input.channel[m].Row(y); + const pixel_type* in1 = input.channel[m + 1].Row(y); + const pixel_type* in2 = input.channel[m + 2].Row(y); + pixel_type* out0 = input.channel[m + (permutation % 3)].Row(y); + pixel_type* out1 = + input.channel[m + ((permutation + 1 + permutation / 3) % 3)].Row(y); + pixel_type* out2 = + input.channel[m + ((permutation + 2 - permutation / 3) % 3)].Row(y); + inv_rct_row[custom](in0, in1, in2, out0, out1, out2, w); + }, + "InvRCT")); + return true; +} + +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(InvRCT); +Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool) { + return HWY_DYNAMIC_DISPATCH(InvRCT)(input, begin_c, rct_type, pool); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.h new file mode 100644 index 0000000000..aef65621d5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/rct.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_RCT_H_ +#define LIB_JXL_MODULAR_TRANSFORM_RCT_H_ + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" // CheckEqualChannels + +namespace jxl { + +Status InvRCT(Image& input, size_t begin_c, size_t rct_type, ThreadPool* pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_RCT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.cc new file mode 100644 index 0000000000..8440d9e804 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.cc @@ -0,0 +1,478 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/squeeze.h" + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/modular/transform/squeeze.cc" +#include +#include + +#include "lib/jxl/simd_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Abs; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::IfThenZeroElse; +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::MulEven; +using hwy::HWY_NAMESPACE::Ne; +using hwy::HWY_NAMESPACE::Neg; +using hwy::HWY_NAMESPACE::OddEven; +using hwy::HWY_NAMESPACE::RebindToUnsigned; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; +using hwy::HWY_NAMESPACE::Xor; + +#if HWY_TARGET != HWY_SCALAR + +JXL_INLINE void FastUnsqueeze(const pixel_type *JXL_RESTRICT p_residual, + const pixel_type *JXL_RESTRICT p_avg, + const pixel_type *JXL_RESTRICT p_navg, + const pixel_type *p_pout, + pixel_type *JXL_RESTRICT p_out, + pixel_type *p_nout) { + const HWY_CAPPED(pixel_type, 8) d; + const RebindToUnsigned du; + const size_t N = Lanes(d); + auto onethird = Set(d, 0x55555556); + for (size_t x = 0; x < 8; x += N) { + auto avg = Load(d, p_avg + x); + auto next_avg = Load(d, p_navg + x); + auto top = Load(d, p_pout + x); + // Equivalent to SmoothTendency(top,avg,next_avg), but without branches + auto Ba = Sub(top, avg); + auto an = Sub(avg, next_avg); + auto nonmono = Xor(Ba, an); + auto absBa = Abs(Ba); + auto absan = Abs(an); + auto absBn = Abs(Sub(top, next_avg)); + // Compute a3 = absBa / 3 + auto a3e = BitCast(d, ShiftRight<32>(MulEven(absBa, onethird))); + auto a3oi = MulEven(Reverse(d, absBa), onethird); + auto a3o = BitCast( + d, Reverse(hwy::HWY_NAMESPACE::Repartition(), + a3oi)); + auto a3 = OddEven(a3o, a3e); + a3 = Add(a3, Add(absBn, Set(d, 2))); + auto absdiff = ShiftRight<2>(a3); + auto skipdiff = Ne(Ba, Zero(d)); + skipdiff = And(skipdiff, Ne(an, Zero(d))); + skipdiff = And(skipdiff, Lt(nonmono, Zero(d))); + auto absBa2 = Add(ShiftLeft<1>(absBa), And(absdiff, Set(d, 1))); + absdiff = IfThenElse(Gt(absdiff, absBa2), + Add(ShiftLeft<1>(absBa), Set(d, 1)), absdiff); + auto absan2 = ShiftLeft<1>(absan); + absdiff = IfThenElse(Gt(Add(absdiff, And(absdiff, Set(d, 1))), absan2), + absan2, absdiff); + auto diff1 = IfThenElse(Lt(top, next_avg), Neg(absdiff), absdiff); + auto tendency = IfThenZeroElse(skipdiff, diff1); + + auto diff_minus_tendency = Load(d, p_residual + x); + auto diff = Add(diff_minus_tendency, tendency); + auto out = + Add(avg, ShiftRight<1>( + Add(diff, BitCast(d, ShiftRight<31>(BitCast(du, diff)))))); + Store(out, d, p_out + x); + Store(Sub(out, diff), d, p_nout + x); + } +} + +#endif + +Status InvHSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) { + JXL_ASSERT(c < input.channel.size()); + JXL_ASSERT(rc < input.channel.size()); + Channel &chin = input.channel[c]; + const Channel &chin_residual = input.channel[rc]; + // These must be valid since we ran MetaApply already. + JXL_ASSERT(chin.w == DivCeil(chin.w + chin_residual.w, 2)); + JXL_ASSERT(chin.h == chin_residual.h); + + if (chin_residual.w == 0) { + // Short-circuit: output channel has same dimensions as input. + input.channel[c].hshift--; + return true; + } + + // Note: chin.w >= chin_residual.w and at most 1 different. + Channel chout(chin.w + chin_residual.w, chin.h, chin.hshift - 1, chin.vshift); + JXL_DEBUG_V(4, + "Undoing horizontal squeeze of channel %i using residuals in " + "channel %i (going from width %" PRIuS " to %" PRIuS ")", + c, rc, chin.w, chout.w); + + if (chin_residual.h == 0) { + // Short-circuit: channel with no pixels. + input.channel[c] = std::move(chout); + return true; + } + auto unsqueeze_row = [&](size_t y, size_t x0) { + const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y); + const pixel_type *JXL_RESTRICT p_avg = chin.Row(y); + pixel_type *JXL_RESTRICT p_out = chout.Row(y); + for (size_t x = x0; x < chin_residual.w; x++) { + pixel_type_w diff_minus_tendency = p_residual[x]; + pixel_type_w avg = p_avg[x]; + pixel_type_w next_avg = (x + 1 < chin.w ? p_avg[x + 1] : avg); + pixel_type_w left = (x ? p_out[(x << 1) - 1] : avg); + pixel_type_w tendency = SmoothTendency(left, avg, next_avg); + pixel_type_w diff = diff_minus_tendency + tendency; + pixel_type_w A = avg + (diff / 2); + p_out[(x << 1)] = A; + pixel_type_w B = A - diff; + p_out[(x << 1) + 1] = B; + } + if (chout.w & 1) p_out[chout.w - 1] = p_avg[chin.w - 1]; + }; + + // somewhat complicated trickery just to be able to SIMD this. + // Horizontal unsqueeze has horizontal data dependencies, so we do + // 8 rows at a time and treat it as a vertical unsqueeze of a + // transposed 8x8 block (or 9x8 for one input). + static constexpr const size_t kRowsPerThread = 8; + const auto unsqueeze_span = [&](const uint32_t task, size_t /* thread */) { + const size_t y0 = task * kRowsPerThread; + const size_t rows = std::min(kRowsPerThread, chin.h - y0); + size_t x = 0; + +#if HWY_TARGET != HWY_SCALAR + intptr_t onerow_in = chin.plane.PixelsPerRow(); + intptr_t onerow_inr = chin_residual.plane.PixelsPerRow(); + intptr_t onerow_out = chout.plane.PixelsPerRow(); + const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y0); + const pixel_type *JXL_RESTRICT p_avg = chin.Row(y0); + pixel_type *JXL_RESTRICT p_out = chout.Row(y0); + HWY_ALIGN pixel_type b_p_avg[9 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_residual[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_even[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_odd[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_evenT[8 * kRowsPerThread]; + HWY_ALIGN pixel_type b_p_out_oddT[8 * kRowsPerThread]; + const HWY_CAPPED(pixel_type, 8) d; + const size_t N = Lanes(d); + if (chin_residual.w > 16 && rows == kRowsPerThread) { + for (; x < chin_residual.w - 9; x += 8) { + Transpose8x8Block(p_residual + x, b_p_residual, onerow_inr); + Transpose8x8Block(p_avg + x, b_p_avg, onerow_in); + for (size_t y = 0; y < kRowsPerThread; y++) { + b_p_avg[8 * 8 + y] = p_avg[x + 8 + onerow_in * y]; + } + for (size_t i = 0; i < 8; i++) { + FastUnsqueeze( + b_p_residual + 8 * i, b_p_avg + 8 * i, b_p_avg + 8 * (i + 1), + (x + i ? b_p_out_odd + 8 * ((x + i - 1) & 7) : b_p_avg + 8 * i), + b_p_out_even + 8 * i, b_p_out_odd + 8 * i); + } + + Transpose8x8Block(b_p_out_even, b_p_out_evenT, 8); + Transpose8x8Block(b_p_out_odd, b_p_out_oddT, 8); + for (size_t y = 0; y < kRowsPerThread; y++) { + for (size_t i = 0; i < kRowsPerThread; i += N) { + auto even = Load(d, b_p_out_evenT + 8 * y + i); + auto odd = Load(d, b_p_out_oddT + 8 * y + i); + StoreInterleaved(d, even, odd, + p_out + ((x + i) << 1) + onerow_out * y); + } + } + } + } +#endif + for (size_t y = 0; y < rows; y++) { + unsqueeze_row(y0 + y, x); + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.h, kRowsPerThread), + ThreadPool::NoInit, unsqueeze_span, + "InvHorizontalSqueeze")); + input.channel[c] = std::move(chout); + return true; +} + +Status InvVSqueeze(Image &input, uint32_t c, uint32_t rc, ThreadPool *pool) { + JXL_ASSERT(c < input.channel.size()); + JXL_ASSERT(rc < input.channel.size()); + const Channel &chin = input.channel[c]; + const Channel &chin_residual = input.channel[rc]; + // These must be valid since we ran MetaApply already. + JXL_ASSERT(chin.h == DivCeil(chin.h + chin_residual.h, 2)); + JXL_ASSERT(chin.w == chin_residual.w); + + if (chin_residual.h == 0) { + // Short-circuit: output channel has same dimensions as input. + input.channel[c].vshift--; + return true; + } + + // Note: chin.h >= chin_residual.h and at most 1 different. + Channel chout(chin.w, chin.h + chin_residual.h, chin.hshift, chin.vshift - 1); + JXL_DEBUG_V( + 4, + "Undoing vertical squeeze of channel %i using residuals in channel " + "%i (going from height %" PRIuS " to %" PRIuS ")", + c, rc, chin.h, chout.h); + + if (chin_residual.w == 0) { + // Short-circuit: channel with no pixels. + input.channel[c] = std::move(chout); + return true; + } + + static constexpr const int kColsPerThread = 64; + const auto unsqueeze_slice = [&](const uint32_t task, size_t /* thread */) { + const size_t x0 = task * kColsPerThread; + const size_t x1 = std::min((size_t)(task + 1) * kColsPerThread, chin.w); + const size_t w = x1 - x0; + // We only iterate up to std::min(chin_residual.h, chin.h) which is + // always chin_residual.h. + for (size_t y = 0; y < chin_residual.h; y++) { + const pixel_type *JXL_RESTRICT p_residual = chin_residual.Row(y) + x0; + const pixel_type *JXL_RESTRICT p_avg = chin.Row(y) + x0; + const pixel_type *JXL_RESTRICT p_navg = + chin.Row(y + 1 < chin.h ? y + 1 : y) + x0; + pixel_type *JXL_RESTRICT p_out = chout.Row(y << 1) + x0; + pixel_type *JXL_RESTRICT p_nout = chout.Row((y << 1) + 1) + x0; + const pixel_type *p_pout = y > 0 ? chout.Row((y << 1) - 1) + x0 : p_avg; + size_t x = 0; +#if HWY_TARGET != HWY_SCALAR + for (; x + 7 < w; x += 8) { + FastUnsqueeze(p_residual + x, p_avg + x, p_navg + x, p_pout + x, + p_out + x, p_nout + x); + } +#endif + for (; x < w; x++) { + pixel_type_w avg = p_avg[x]; + pixel_type_w next_avg = p_navg[x]; + pixel_type_w top = p_pout[x]; + pixel_type_w tendency = SmoothTendency(top, avg, next_avg); + pixel_type_w diff_minus_tendency = p_residual[x]; + pixel_type_w diff = diff_minus_tendency + tendency; + pixel_type_w out = avg + (diff / 2); + p_out[x] = out; + // If the chin_residual.h == chin.h, the output has an even number + // of rows so the next line is fine. Otherwise, this loop won't + // write to the last output row which is handled separately. + p_nout[x] = out - diff; + } + } + }; + JXL_RETURN_IF_ERROR(RunOnPool(pool, 0, DivCeil(chin.w, kColsPerThread), + ThreadPool::NoInit, unsqueeze_slice, + "InvVertSqueeze")); + + if (chout.h & 1) { + size_t y = chin.h - 1; + const pixel_type *p_avg = chin.Row(y); + pixel_type *p_out = chout.Row(y << 1); + for (size_t x = 0; x < chin.w; x++) { + p_out[x] = p_avg[x]; + } + } + input.channel[c] = std::move(chout); + return true; +} + +Status InvSqueeze(Image &input, std::vector parameters, + ThreadPool *pool) { + for (int i = parameters.size() - 1; i >= 0; i--) { + JXL_RETURN_IF_ERROR( + CheckMetaSqueezeParams(parameters[i], input.channel.size())); + bool horizontal = parameters[i].horizontal; + bool in_place = parameters[i].in_place; + uint32_t beginc = parameters[i].begin_c; + uint32_t endc = parameters[i].begin_c + parameters[i].num_c - 1; + uint32_t offset; + if (in_place) { + offset = endc + 1; + } else { + offset = input.channel.size() + beginc - endc - 1; + } + if (beginc < input.nb_meta_channels) { + // This is checked in MetaSqueeze. + JXL_ASSERT(input.nb_meta_channels > parameters[i].num_c); + input.nb_meta_channels -= parameters[i].num_c; + } + + for (uint32_t c = beginc; c <= endc; c++) { + uint32_t rc = offset + c - beginc; + // MetaApply should imply that `rc` is within range, otherwise there's a + // programming bug. + JXL_ASSERT(rc < input.channel.size()); + if ((input.channel[c].w < input.channel[rc].w) || + (input.channel[c].h < input.channel[rc].h)) { + return JXL_FAILURE("Corrupted squeeze transform"); + } + if (horizontal) { + JXL_RETURN_IF_ERROR(InvHSqueeze(input, c, rc, pool)); + } else { + JXL_RETURN_IF_ERROR(InvVSqueeze(input, c, rc, pool)); + } + } + input.channel.erase(input.channel.begin() + offset, + input.channel.begin() + offset + (endc - beginc + 1)); + } + return true; +} + +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jxl { + +HWY_EXPORT(InvSqueeze); +Status InvSqueeze(Image &input, std::vector parameters, + ThreadPool *pool) { + return HWY_DYNAMIC_DISPATCH(InvSqueeze)(input, parameters, pool); +} + +void DefaultSqueezeParameters(std::vector *parameters, + const Image &image) { + int nb_channels = image.channel.size() - image.nb_meta_channels; + + parameters->clear(); + size_t w = image.channel[image.nb_meta_channels].w; + size_t h = image.channel[image.nb_meta_channels].h; + JXL_DEBUG_V( + 7, "Default squeeze parameters for %" PRIuS "x%" PRIuS " image: ", w, h); + + // do horizontal first on wide images; vertical first on tall images + bool wide = (w > h); + + if (nb_channels > 2 && image.channel[image.nb_meta_channels + 1].w == w && + image.channel[image.nb_meta_channels + 1].h == h) { + // assume channels 1 and 2 are chroma, and can be squeezed first for 4:2:0 + // previews + JXL_DEBUG_V(7, "(4:2:0 chroma), %" PRIuS "x%" PRIuS " image", w, h); + SqueezeParams params; + // horizontal chroma squeeze + params.horizontal = true; + params.in_place = false; + params.begin_c = image.nb_meta_channels + 1; + params.num_c = 2; + parameters->push_back(params); + params.horizontal = false; + // vertical chroma squeeze + parameters->push_back(params); + } + SqueezeParams params; + params.begin_c = image.nb_meta_channels; + params.num_c = nb_channels; + params.in_place = true; + + if (!wide) { + if (h > JXL_MAX_FIRST_PREVIEW_SIZE) { + params.horizontal = false; + parameters->push_back(params); + h = (h + 1) / 2; + JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h); + } + } + while (w > JXL_MAX_FIRST_PREVIEW_SIZE || h > JXL_MAX_FIRST_PREVIEW_SIZE) { + if (w > JXL_MAX_FIRST_PREVIEW_SIZE) { + params.horizontal = true; + parameters->push_back(params); + w = (w + 1) / 2; + JXL_DEBUG_V(7, "Horizontal (%" PRIuS "x%" PRIuS "), ", w, h); + } + if (h > JXL_MAX_FIRST_PREVIEW_SIZE) { + params.horizontal = false; + parameters->push_back(params); + h = (h + 1) / 2; + JXL_DEBUG_V(7, "Vertical (%" PRIuS "x%" PRIuS "), ", w, h); + } + } + JXL_DEBUG_V(7, "that's it"); +} + +Status CheckMetaSqueezeParams(const SqueezeParams ¶meter, + int num_channels) { + int c1 = parameter.begin_c; + int c2 = parameter.begin_c + parameter.num_c - 1; + if (c1 < 0 || c1 >= num_channels || c2 < 0 || c2 >= num_channels || c2 < c1) { + return JXL_FAILURE("Invalid channel range"); + } + return true; +} + +Status MetaSqueeze(Image &image, std::vector *parameters) { + if (parameters->empty()) { + DefaultSqueezeParameters(parameters, image); + } + + for (size_t i = 0; i < parameters->size(); i++) { + JXL_RETURN_IF_ERROR( + CheckMetaSqueezeParams((*parameters)[i], image.channel.size())); + bool horizontal = (*parameters)[i].horizontal; + bool in_place = (*parameters)[i].in_place; + uint32_t beginc = (*parameters)[i].begin_c; + uint32_t endc = (*parameters)[i].begin_c + (*parameters)[i].num_c - 1; + + uint32_t offset; + if (beginc < image.nb_meta_channels) { + if (endc >= image.nb_meta_channels) { + return JXL_FAILURE("Invalid squeeze: mix of meta and nonmeta channels"); + } + if (!in_place) { + return JXL_FAILURE( + "Invalid squeeze: meta channels require in-place residuals"); + } + image.nb_meta_channels += (*parameters)[i].num_c; + } + if (in_place) { + offset = endc + 1; + } else { + offset = image.channel.size(); + } + for (uint32_t c = beginc; c <= endc; c++) { + if (image.channel[c].hshift > 30 || image.channel[c].vshift > 30) { + return JXL_FAILURE("Too many squeezes: shift > 30"); + } + size_t w = image.channel[c].w; + size_t h = image.channel[c].h; + if (w == 0 || h == 0) return JXL_FAILURE("Squeezing empty channel"); + if (horizontal) { + image.channel[c].w = (w + 1) / 2; + if (image.channel[c].hshift >= 0) image.channel[c].hshift++; + w = w - (w + 1) / 2; + } else { + image.channel[c].h = (h + 1) / 2; + if (image.channel[c].vshift >= 0) image.channel[c].vshift++; + h = h - (h + 1) / 2; + } + image.channel[c].shrink(); + Channel dummy(w, h); + dummy.hshift = image.channel[c].hshift; + dummy.vshift = image.channel[c].vshift; + + image.channel.insert(image.channel.begin() + offset + (c - beginc), + std::move(dummy)); + JXL_DEBUG_V(8, "MetaSqueeze applied, current image: %s", + image.DebugString().c_str()); + } + } + return true; +} + +} // namespace jxl + +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.h new file mode 100644 index 0000000000..fb18710a6f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/squeeze.h @@ -0,0 +1,90 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_ +#define LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_ + +// Haar-like transform: halves the resolution in one direction +// A B -> (A+B)>>1 in one channel (average) -> same range as +// original channel +// A-B - tendency in a new channel ('residual' needed to make +// the transform reversible) +// -> theoretically range could be 2.5 +// times larger (2 times without the +// 'tendency'), but there should be lots +// of zeroes +// Repeated application (alternating horizontal and vertical squeezes) results +// in downscaling +// +// The default coefficient ordering is low-frequency to high-frequency, as in +// M. Antonini, M. Barlaud, P. Mathieu and I. Daubechies, "Image coding using +// wavelet transform", IEEE Transactions on Image Processing, vol. 1, no. 2, pp. +// 205-220, April 1992, doi: 10.1109/83.136597. + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/common.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/transform.h" + +#define JXL_MAX_FIRST_PREVIEW_SIZE 8 + +namespace jxl { + +/* + int avg=(A+B)>>1; + int diff=(A-B); + int rA=(diff+(avg<<1)+(diff&1))>>1; + int rB=rA-diff; + +*/ +// |A B|C D|E F| +// p a n p=avg(A,B), a=avg(C,D), n=avg(E,F) +// +// Goal: estimate C-D (avoiding ringing artifacts) +// (ensuring that in smooth areas, a zero residual corresponds to a smooth +// gradient) + +// best estimate for C: (B + 2*a)/3 +// best estimate for D: (n + 3*a)/4 +// best estimate for C-D: 4*B - 3*n - a /12 + +// avoid ringing by 1) only doing this if B <= a <= n or B >= a >= n +// (otherwise, this is not a smooth area and we cannot really estimate C-D) +// 2) making sure that B <= C <= D <= n or B >= C >= D >= n + +inline pixel_type_w SmoothTendency(pixel_type_w B, pixel_type_w a, + pixel_type_w n) { + pixel_type_w diff = 0; + if (B >= a && a >= n) { + diff = (4 * B - 3 * n - a + 6) / 12; + // 2C = a<<1 + diff - diff&1 <= 2B so diff - diff&1 <= 2B - 2a + // 2D = a<<1 - diff - diff&1 >= 2n so diff + diff&1 <= 2a - 2n + if (diff - (diff & 1) > 2 * (B - a)) diff = 2 * (B - a) + 1; + if (diff + (diff & 1) > 2 * (a - n)) diff = 2 * (a - n); + } else if (B <= a && a <= n) { + diff = (4 * B - 3 * n - a - 6) / 12; + // 2C = a<<1 + diff + diff&1 >= 2B so diff + diff&1 >= 2B - 2a + // 2D = a<<1 - diff + diff&1 <= 2n so diff - diff&1 >= 2a - 2n + if (diff + (diff & 1) < 2 * (B - a)) diff = 2 * (B - a) - 1; + if (diff - (diff & 1) < 2 * (a - n)) diff = 2 * (a - n); + } + return diff; +} + +void DefaultSqueezeParameters(std::vector *parameters, + const Image &image); + +Status CheckMetaSqueezeParams(const SqueezeParams ¶meter, int num_channels); + +Status MetaSqueeze(Image &image, std::vector *parameters); + +Status InvSqueeze(Image &input, std::vector parameters, + ThreadPool *pool); + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_SQUEEZE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.cc b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.cc new file mode 100644 index 0000000000..d9f2b435bf --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.cc @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/modular/transform/transform.h" + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/modular_image.h" +#include "lib/jxl/modular/transform/palette.h" +#include "lib/jxl/modular/transform/rct.h" +#include "lib/jxl/modular/transform/squeeze.h" + +namespace jxl { + +SqueezeParams::SqueezeParams() { Bundle::Init(this); } +Transform::Transform(TransformId id) { + Bundle::Init(this); + this->id = id; +} + +Status Transform::Inverse(Image &input, const weighted::Header &wp_header, + ThreadPool *pool) { + JXL_DEBUG_V(6, "Input channels (%" PRIuS ", %" PRIuS " meta): ", + input.channel.size(), input.nb_meta_channels); + switch (id) { + case TransformId::kRCT: + return InvRCT(input, begin_c, rct_type, pool); + case TransformId::kSqueeze: + return InvSqueeze(input, squeezes, pool); + case TransformId::kPalette: + return InvPalette(input, begin_c, nb_colors, nb_deltas, predictor, + wp_header, pool); + default: + return JXL_FAILURE("Unknown transformation (ID=%u)", + static_cast(id)); + } +} + +Status Transform::MetaApply(Image &input) { + JXL_DEBUG_V(6, "MetaApply input: %s", input.DebugString().c_str()); + switch (id) { + case TransformId::kRCT: + JXL_DEBUG_V(2, "Transform: kRCT, rct_type=%" PRIu32, rct_type); + return CheckEqualChannels(input, begin_c, begin_c + 2); + case TransformId::kSqueeze: + JXL_DEBUG_V(2, "Transform: kSqueeze:"); +#if JXL_DEBUG_V_LEVEL >= 2 + { + auto squeezes_copy = squeezes; + if (squeezes_copy.empty()) { + DefaultSqueezeParameters(&squeezes_copy, input); + } + for (const auto ¶ms : squeezes_copy) { + JXL_DEBUG_V( + 2, + " squeeze params: horizontal=%d, in_place=%d, begin_c=%" PRIu32 + ", num_c=%" PRIu32, + params.horizontal, params.in_place, params.begin_c, params.num_c); + } + } +#endif + return MetaSqueeze(input, &squeezes); + case TransformId::kPalette: + JXL_DEBUG_V(2, + "Transform: kPalette, begin_c=%" PRIu32 ", num_c=%" PRIu32 + ", nb_colors=%" PRIu32 ", nb_deltas=%" PRIu32, + begin_c, num_c, nb_colors, nb_deltas); + return MetaPalette(input, begin_c, begin_c + num_c - 1, nb_colors, + nb_deltas, lossy_palette); + default: + return JXL_FAILURE("Unknown transformation (ID=%u)", + static_cast(id)); + } +} + +Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2) { + if (c1 > image.channel.size() || c2 >= image.channel.size() || c2 < c1) { + return JXL_FAILURE("Invalid channel range: %u..%u (there are only %" PRIuS + " channels)", + c1, c2, image.channel.size()); + } + if (c1 < image.nb_meta_channels && c2 >= image.nb_meta_channels) { + return JXL_FAILURE("Invalid: transforming mix of meta and nonmeta"); + } + const auto &ch1 = image.channel[c1]; + for (size_t c = c1 + 1; c <= c2; c++) { + const auto &ch2 = image.channel[c]; + if (ch1.w != ch2.w || ch1.h != ch2.h || ch1.hshift != ch2.hshift || + ch1.vshift != ch2.vshift) { + return false; + } + } + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.h b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.h new file mode 100644 index 0000000000..d5d3259f7a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular/transform/transform.h @@ -0,0 +1,148 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_ +#define LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_ + +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/modular/encoding/context_predict.h" +#include "lib/jxl/modular/options.h" + +namespace jxl { + +enum class TransformId : uint32_t { + // G, R-G, B-G and variants (including YCoCg). + kRCT = 0, + + // Color palette. Parameters are: [begin_c] [end_c] [nb_colors] + kPalette = 1, + + // Squeezing (Haar-style) + kSqueeze = 2, + + // Invalid for now. + kInvalid = 3, +}; + +struct SqueezeParams : public Fields { + JXL_FIELDS_NAME(SqueezeParams) + bool horizontal; + bool in_place; + uint32_t begin_c; + uint32_t num_c; + SqueezeParams(); + Status VisitFields(Visitor *JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &horizontal)); + JXL_QUIET_RETURN_IF_ERROR(visitor->Bool(false, &in_place)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Bits(3), BitsOffset(6, 8), + BitsOffset(10, 72), + BitsOffset(13, 1096), 0, &begin_c)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(2), Val(3), BitsOffset(4, 4), 2, &num_c)); + return true; + } +}; + +class Transform : public Fields { + public: + TransformId id; + // for Palette and RCT. + uint32_t begin_c; + // for RCT. 42 possible values starting from 0. + uint32_t rct_type; + // Only for Palette and NearLossless. + uint32_t num_c; + // Only for Palette. + uint32_t nb_colors; + uint32_t nb_deltas; + // for Squeeze. Default squeeze if empty. + std::vector squeezes; + // for NearLossless, not serialized. + int max_delta_error; + // Serialized for Palette. + Predictor predictor; + // for Palette, not serialized. + bool ordered_palette = true; + bool lossy_palette = false; + + explicit Transform(TransformId id); + // default constructor for bundles. + Transform() : Transform(TransformId::kInvalid) {} + + Status VisitFields(Visitor *JXL_RESTRICT visitor) override { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + Val((uint32_t)TransformId::kRCT), Val((uint32_t)TransformId::kPalette), + Val((uint32_t)TransformId::kSqueeze), + Val((uint32_t)TransformId::kInvalid), (uint32_t)TransformId::kRCT, + reinterpret_cast(&id))); + if (id == TransformId::kInvalid) { + return JXL_FAILURE("Invalid transform ID"); + } + if (visitor->Conditional(id == TransformId::kRCT || + id == TransformId::kPalette)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Bits(3), BitsOffset(6, 8), BitsOffset(10, 72), + BitsOffset(13, 1096), 0, &begin_c)); + } + if (visitor->Conditional(id == TransformId::kRCT)) { + // 0-41, default YCoCg. + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(6), Bits(2), BitsOffset(4, 2), + BitsOffset(6, 10), 6, &rct_type)); + if (rct_type >= 42) { + return JXL_FAILURE("Invalid transform RCT type"); + } + } + if (visitor->Conditional(id == TransformId::kPalette)) { + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(1), Val(3), Val(4), BitsOffset(13, 1), 3, &num_c)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + BitsOffset(8, 0), BitsOffset(10, 256), BitsOffset(12, 1280), + BitsOffset(16, 5376), 256, &nb_colors)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), BitsOffset(8, 1), BitsOffset(10, 257), + BitsOffset(16, 1281), 0, &nb_deltas)); + JXL_QUIET_RETURN_IF_ERROR( + visitor->Bits(4, (uint32_t)Predictor::Zero, + reinterpret_cast(&predictor))); + if (predictor >= Predictor::Best) { + return JXL_FAILURE("Invalid predictor"); + } + } + + if (visitor->Conditional(id == TransformId::kSqueeze)) { + uint32_t num_squeezes = static_cast(squeezes.size()); + JXL_QUIET_RETURN_IF_ERROR( + visitor->U32(Val(0), BitsOffset(4, 1), BitsOffset(6, 9), + BitsOffset(8, 41), 0, &num_squeezes)); + if (visitor->IsReading()) squeezes.resize(num_squeezes); + for (size_t i = 0; i < num_squeezes; i++) { + JXL_QUIET_RETURN_IF_ERROR(visitor->VisitNested(&squeezes[i])); + } + } + return true; + } + + JXL_FIELDS_NAME(Transform) + + Status Inverse(Image &input, const weighted::Header &wp_header, + ThreadPool *pool = nullptr); + Status MetaApply(Image &input); +}; + +Status CheckEqualChannels(const Image &image, uint32_t c1, uint32_t c2); + +static inline pixel_type PixelAdd(pixel_type a, pixel_type b) { + return static_cast(static_cast(a) + + static_cast(b)); +} + +} // namespace jxl + +#endif // LIB_JXL_MODULAR_TRANSFORM_TRANSFORM_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/modular_test.cc b/third-party/libjxl/libjxl/lib/jxl/modular_test.cc new file mode 100644 index 0000000000..76f4a28425 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/modular_test.cc @@ -0,0 +1,538 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/extras/dec/jxl.h" +#include "lib/extras/metrics.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/enc_toc.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/modular/encoding/enc_encoding.h" +#include "lib/jxl/modular/encoding/encoding.h" +#include "lib/jxl/modular/encoding/ma_common.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { +using test::Roundtrip; + +void TestLosslessGroups(size_t group_size_shift) { + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CompressParams cparams; + cparams.SetLossless(); + cparams.modular_group_size_shift = group_size_shift; + + CodecInOut io_out; + + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(io.xsize() / 4, io.ysize() / 4); + + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size)); + EXPECT_LE(compressed_size, 280000u); + JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _)); +} + +TEST(ModularTest, RoundtripLosslessGroups128) { TestLosslessGroups(0); } + +TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups512)) { + TestLosslessGroups(2); +} + +TEST(ModularTest, JXL_TSAN_SLOW_TEST(RoundtripLosslessGroups1024)) { + TestLosslessGroups(3); +} + +TEST(ModularTest, RoundtripLosslessCustomWP_PermuteRCT) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CompressParams cparams; + cparams.SetLossless(); + // 9 = permute to GBR, to test the special case of permutation-only + cparams.colorspace = 9; + // slowest speed so different WP modes are tried + cparams.speed_tier = SpeedTier::kTortoise; + cparams.options.predictor = {Predictor::Weighted}; + + CodecInOut io_out; + + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(100, 100); + + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size)); + EXPECT_LE(compressed_size, 10169u); + JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io_out.Main().color(), _)); +} + +TEST(ModularTest, RoundtripLossyDeltaPalette) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CompressParams cparams; + cparams.modular_mode = true; + cparams.color_transform = jxl::ColorTransform::kNone; + cparams.lossy_palette = true; + cparams.palette_colors = 0; + + CodecInOut io_out; + + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(300, 100); + + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size)); + EXPECT_LE(compressed_size, 6800u); + EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.5)); +} +TEST(ModularTest, RoundtripLossyDeltaPaletteWP) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CompressParams cparams; + cparams.SetLossless(); + cparams.lossy_palette = true; + cparams.palette_colors = 0; + cparams.options.predictor = jxl::Predictor::Weighted; + + CodecInOut io_out; + + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(300, 100); + + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size)); + EXPECT_LE(compressed_size, 7000u); + EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(10.1)); +} + +TEST(ModularTest, RoundtripLossy) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CompressParams cparams; + cparams.modular_mode = true; + cparams.butteraugli_distance = 2.f; + cparams.SetCms(GetJxlCms()); + + CodecInOut io_out; + + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size)); + EXPECT_LE(compressed_size, 30000u); + EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(2.3)); +} + +TEST(ModularTest, RoundtripLossy16) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/raw.pixls/DJI-FC6310-16bit_709_v4_krita.png"); + CompressParams cparams; + cparams.modular_mode = true; + cparams.butteraugli_distance = 2.f; + + CodecInOut io_out; + + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + JXL_CHECK(!io.metadata.m.have_preview); + JXL_CHECK(io.frames.size() == 1); + JXL_CHECK(io.frames[0].TransformTo(ColorEncoding::SRGB(), GetJxlCms())); + io.metadata.m.color_encoding = ColorEncoding::SRGB(); + + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io_out, _, &compressed_size)); + EXPECT_LE(compressed_size, 300u); + EXPECT_THAT(ButteraugliDistance(io.frames, io_out.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.6)); +} + +TEST(ModularTest, RoundtripExtraProperties) { + constexpr size_t kSize = 250; + Image image(kSize, kSize, /*bitdepth=*/8, 3); + ModularOptions options; + options.max_properties = 4; + options.predictor = Predictor::Zero; + Rng rng(0); + for (size_t y = 0; y < kSize; y++) { + for (size_t x = 0; x < kSize; x++) { + image.channel[0].plane.Row(y)[x] = image.channel[2].plane.Row(y)[x] = + rng.UniformU(0, 9); + } + } + ZeroFillImage(&image.channel[1].plane); + BitWriter writer; + ASSERT_TRUE(ModularGenericCompress(image, options, &writer)); + writer.ZeroPadToByte(); + Image decoded(kSize, kSize, /*bitdepth=*/8, image.channel.size()); + for (size_t i = 0; i < image.channel.size(); i++) { + const Channel& ch = image.channel[i]; + decoded.channel[i] = Channel(ch.w, ch.h, ch.hshift, ch.vshift); + } + Status status = true; + { + BitReader reader(writer.GetSpan()); + BitReaderScopedCloser closer(&reader, &status); + ASSERT_TRUE(ModularGenericDecompress(&reader, decoded, /*header=*/nullptr, + /*group_id=*/0, &options)); + } + ASSERT_TRUE(status); + ASSERT_EQ(image.channel.size(), decoded.channel.size()); + for (size_t c = 0; c < image.channel.size(); c++) { + for (size_t y = 0; y < image.channel[c].plane.ysize(); y++) { + for (size_t x = 0; x < image.channel[c].plane.xsize(); x++) { + EXPECT_EQ(image.channel[c].plane.Row(y)[x], + decoded.channel[c].plane.Row(y)[x]) + << "c = " << c << ", x = " << x << ", y = " << y; + } + } + } +} + +TEST(ModularTest, RoundtripLosslessCustomSqueeze) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/tmshre_riaphotographs_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + + CompressParams cparams; + cparams.modular_mode = true; + cparams.color_transform = jxl::ColorTransform::kNone; + cparams.butteraugli_distance = 0.f; + cparams.options.predictor = {Predictor::Zero}; + cparams.speed_tier = SpeedTier::kThunder; + cparams.responsive = 1; + // Custom squeeze params, atm just for testing + SqueezeParams p; + p.horizontal = true; + p.in_place = false; + p.begin_c = 0; + p.num_c = 3; + cparams.squeezes.push_back(p); + p.begin_c = 1; + p.in_place = true; + p.horizontal = false; + cparams.squeezes.push_back(p); + + CodecInOut io2; + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size)); + EXPECT_LE(compressed_size, 265000u); + JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _)); +} + +struct RoundtripLosslessConfig { + int bitdepth; + int responsive; +}; +class ModularTestParam + : public ::testing::TestWithParam {}; + +std::vector GenerateLosslessTests() { + std::vector all; + for (int responsive = 0; responsive <= 1; responsive++) { + for (int bitdepth = 1; bitdepth < 32; bitdepth++) { + if (responsive && bitdepth > 30) continue; + all.push_back({bitdepth, responsive}); + } + } + return all; +} +std::string LosslessTestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param.bitdepth << "bit"; + if (info.param.responsive) name << "Squeeze"; + return name.str(); +} + +JXL_GTEST_INSTANTIATE_TEST_SUITE_P(RoundtripLossless, ModularTestParam, + testing::ValuesIn(GenerateLosslessTests()), + LosslessTestDescription); + +TEST_P(ModularTestParam, RoundtripLossless) { + RoundtripLosslessConfig config = GetParam(); + int bitdepth = config.bitdepth; + int responsive = config.responsive; + + ThreadPool* pool = nullptr; + Rng generator(123); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io1; + ASSERT_TRUE(SetFromBytes(Span(orig), &io1, pool)); + + // vary the dimensions a bit, in case of bugs related to + // even vs odd width or height. + size_t xsize = 423 + bitdepth; + size_t ysize = 467 + bitdepth; + + CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB(false); + io.metadata.m.SetUintSamples(bitdepth); + + double factor = ((1lu << bitdepth) - 1lu); + double ifactor = 1.0 / factor; + Image3F noise_added(xsize, ysize); + + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < ysize; y++) { + const float* in = io1.Main().color()->PlaneRow(c, y); + float* out = noise_added.PlaneRow(c, y); + for (size_t x = 0; x < xsize; x++) { + // make the least significant bits random + float f = in[x] + generator.UniformF(0.0f, 1.f / 255.f); + if (f > 1.f) f = 1.f; + // quantize to the bitdepth we're testing + unsigned int u = f * factor + 0.5; + out[x] = u * ifactor; + } + } + } + io.SetFromImage(std::move(noise_added), jxl::ColorEncoding::SRGB(false)); + + CompressParams cparams; + cparams.modular_mode = true; + cparams.color_transform = jxl::ColorTransform::kNone; + cparams.butteraugli_distance = 0.f; + cparams.options.predictor = {Predictor::Zero}; + cparams.speed_tier = SpeedTier::kThunder; + cparams.responsive = responsive; + CodecInOut io2; + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size)); + EXPECT_LE(compressed_size, bitdepth * xsize * ysize / 3); + EXPECT_LE(0, ComputeDistance2(io.Main(), io2.Main(), GetJxlCms())); + size_t different = 0; + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < ysize; y++) { + const float* in = io.Main().color()->PlaneRow(c, y); + const float* out = io2.Main().color()->PlaneRow(c, y); + for (size_t x = 0; x < xsize; x++) { + uint32_t uin = in[x] * factor + 0.5; + uint32_t uout = out[x] * factor + 0.5; + // check that the integer values are identical + if (uin != uout) different++; + } + } + } + EXPECT_EQ(different, 0); +} + +TEST(ModularTest, RoundtripLosslessCustomFloat) { + CodecInOut io; + size_t xsize = 100, ysize = 300; + io.SetSize(xsize, ysize); + io.metadata.m.bit_depth.bits_per_sample = 18; + io.metadata.m.bit_depth.exponent_bits_per_sample = 6; + io.metadata.m.bit_depth.floating_point_sample = true; + io.metadata.m.modular_16_bit_buffer_sufficient = false; + ColorEncoding color_encoding; + color_encoding.tf.SetTransferFunction(TransferFunction::kLinear); + color_encoding.SetColorSpace(ColorSpace::kRGB); + Image3F testimage(xsize, ysize); + float factor = 1.f / (1 << 14); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < ysize; y++) { + float* const JXL_RESTRICT row = testimage.PlaneRow(c, y); + for (size_t x = 0; x < xsize; x++) { + row[x] = factor * (x ^ y); + } + } + } + io.SetFromImage(std::move(testimage), color_encoding); + io.metadata.m.color_encoding = color_encoding; + io.metadata.m.SetIntensityTarget(255); + + CompressParams cparams; + cparams.modular_mode = true; + cparams.color_transform = jxl::ColorTransform::kNone; + cparams.butteraugli_distance = 0.f; + cparams.options.predictor = {Predictor::Zero}; + cparams.speed_tier = SpeedTier::kThunder; + cparams.decoding_speed_tier = 2; + + CodecInOut io2; + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size)); + EXPECT_LE(compressed_size, 23000u); + JXL_EXPECT_OK(SamePixels(*io.Main().color(), *io2.Main().color(), _)); +} + +void WriteHeaders(BitWriter* writer, size_t xsize, size_t ysize) { + BitWriter::Allotment allotment(writer, 16); + writer->Write(8, 0xFF); + writer->Write(8, kCodestreamMarker); + allotment.ReclaimAndCharge(writer, 0, nullptr); + CodecMetadata metadata; + EXPECT_TRUE(metadata.size.Set(xsize, ysize)); + EXPECT_TRUE(WriteSizeHeader(metadata.size, writer, 0, nullptr)); + metadata.m.color_encoding = ColorEncoding::LinearSRGB(/*is_gray=*/true); + metadata.m.xyb_encoded = false; + metadata.m.SetUintSamples(31); + EXPECT_TRUE(WriteImageMetadata(metadata.m, writer, 0, nullptr)); + metadata.transform_data.nonserialized_xyb_encoded = metadata.m.xyb_encoded; + EXPECT_TRUE(Bundle::Write(metadata.transform_data, writer, 0, nullptr)); + writer->ZeroPadToByte(); + FrameHeader frame_header(&metadata); + frame_header.encoding = FrameEncoding::kModular; + frame_header.loop_filter.gab = false; + frame_header.loop_filter.epf_iters = 0; + EXPECT_TRUE(WriteFrameHeader(frame_header, writer, nullptr)); +} + +// Tree with single node, zero predictor, offset is 1 and multiplier is 1, +// entropy code is prefix tree with alphabet size 256 and all bits lengths 8. +void WriteHistograms(BitWriter* writer) { + writer->Write(1, 1); // default DC quant + writer->Write(1, 1); // has_tree + // tree histograms + writer->Write(1, 0); // LZ77 disabled + writer->Write(3, 1); // simple context map + writer->Write(1, 1); // prefix code + writer->Write(7, 0x63); // UnintConfig(3, 2, 1) + writer->Write(12, 0xfef); // alphabet_size = 256 + writer->Write(32, 0x10003); // all bit lengths 8 + // tree tokens + writer->Write(8, 0); // tree leaf + writer->Write(8, 0); // zero predictor + writer->Write(8, 64); // offset = UnpackSigned(ReverseBits(64)) = 1 + writer->Write(16, 0); // multiplier = 1 + // histograms + writer->Write(1, 0); // LZ77 disabled + writer->Write(1, 1); // prefix code + writer->Write(7, 0x63); // UnintConfig(3, 2, 1) + writer->Write(12, 0xfef); // alphabet_size = 256 + writer->Write(32, 0x10003); // all bit lengths 8 +} + +TEST(ModularTest, PredictorIntegerOverflow) { + const size_t xsize = 1; + const size_t ysize = 1; + BitWriter writer; + WriteHeaders(&writer, xsize, ysize); + std::vector group_codes(1); + { + BitWriter* bw = &group_codes[0]; + BitWriter::Allotment allotment(bw, 1 << 20); + WriteHistograms(bw); + GroupHeader header; + header.use_global_tree = true; + EXPECT_TRUE(Bundle::Write(header, bw, 0, nullptr)); + // After UnpackSigned this becomes (1 << 31) - 1, the largest pixel_type, + // and after adding the offset we get -(1 << 31). + bw->Write(8, 119); + bw->Write(28, 0xfffffff); + bw->ZeroPadToByte(); + allotment.ReclaimAndCharge(bw, 0, nullptr); + } + EXPECT_TRUE(WriteGroupOffsets(group_codes, nullptr, &writer, nullptr)); + writer.AppendByteAligned(group_codes); + + PaddedBytes compressed = std::move(writer).TakeBytes(); + extras::PackedPixelFile ppf; + extras::JXLDecompressParams params; + params.accepted_formats.push_back({1, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}); + EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), params, + nullptr, &ppf)); + ASSERT_EQ(1, ppf.frames.size()); + const auto& img = ppf.frames[0].color; + const auto pixels = reinterpret_cast(img.pixels()); + EXPECT_EQ(-1.0f, pixels[0]); +} + +TEST(ModularTest, UnsqueezeIntegerOverflow) { + // Image width is 9 so we can test both the SIMD and non-vector code paths. + const size_t xsize = 9; + const size_t ysize = 2; + BitWriter writer; + WriteHeaders(&writer, xsize, ysize); + std::vector group_codes(1); + { + BitWriter* bw = &group_codes[0]; + BitWriter::Allotment allotment(bw, 1 << 20); + WriteHistograms(bw); + GroupHeader header; + header.use_global_tree = true; + header.transforms.emplace_back(); + header.transforms[0].id = TransformId::kSqueeze; + SqueezeParams params; + params.horizontal = false; + params.in_place = true; + params.begin_c = 0; + params.num_c = 1; + header.transforms[0].squeezes.emplace_back(params); + EXPECT_TRUE(Bundle::Write(header, bw, 0, nullptr)); + for (size_t i = 0; i < xsize * ysize; ++i) { + // After UnpackSigned and adding offset, this becomes (1 << 31) - 1, both + // in the image and in the residual channels, and unsqueeze makes them + // ~(3 << 30) and (1 << 30) (in pixel_type_w) and the first wraps around + // to about -(1 << 30). + bw->Write(8, 119); + bw->Write(28, 0xffffffe); + } + bw->ZeroPadToByte(); + allotment.ReclaimAndCharge(bw, 0, nullptr); + } + EXPECT_TRUE(WriteGroupOffsets(group_codes, nullptr, &writer, nullptr)); + writer.AppendByteAligned(group_codes); + + PaddedBytes compressed = std::move(writer).TakeBytes(); + extras::PackedPixelFile ppf; + extras::JXLDecompressParams params; + params.accepted_formats.push_back({1, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}); + EXPECT_TRUE(DecodeImageJXL(compressed.data(), compressed.size(), params, + nullptr, &ppf)); + ASSERT_EQ(1, ppf.frames.size()); + const auto& img = ppf.frames[0].color; + const auto pixels = reinterpret_cast(img.pixels()); + for (size_t x = 0; x < xsize; ++x) { + EXPECT_NEAR(-0.5f, pixels[x], 1e-10); + EXPECT_NEAR(0.5f, pixels[xsize + x], 1e-10); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/noise.h b/third-party/libjxl/libjxl/lib/jxl/noise.h new file mode 100644 index 0000000000..d897ea3abe --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/noise.h @@ -0,0 +1,60 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_NOISE_H_ +#define LIB_JXL_NOISE_H_ + +// Noise parameters shared by encoder/decoder. + +#include + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +const float kNoisePrecision = 1 << 10; + +struct NoiseParams { + // LUT index is an intensity of pixel / mean intensity of patch + static constexpr size_t kNumNoisePoints = 8; + float lut[kNumNoisePoints]; + + void Clear() { + for (float& i : lut) i = 0.f; + } + bool HasAny() const { + for (float i : lut) { + if (std::abs(i) > 1e-3f) return true; + } + return false; + } +}; + +static inline std::pair IndexAndFrac(float x) { + constexpr size_t kScaleNumerator = NoiseParams::kNumNoisePoints - 2; + // TODO: instead of 1, this should be a proper Y range. + constexpr float kScale = kScaleNumerator / 1; + float scaled_x = std::max(0.f, x * kScale); + float floor_x; + float frac_x = std::modf(scaled_x, &floor_x); + if (JXL_UNLIKELY(scaled_x >= kScaleNumerator + 1)) { + floor_x = kScaleNumerator; + frac_x = 1.f; + } + return std::make_pair(static_cast(floor_x), frac_x); +} + +struct NoiseLevel { + float noise_level; + float intensity; +}; + +} // namespace jxl + +#endif // LIB_JXL_NOISE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_image_test.cc b/third-party/libjxl/libjxl/lib/jxl/opsin_image_test.cc new file mode 100644 index 0000000000..07fd824f14 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/opsin_image_test.cc @@ -0,0 +1,123 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_xyb.h" +#include "lib/jxl/image.h" +#include "lib/jxl/matrix_ops.h" +#include "lib/jxl/opsin_params.h" + +namespace jxl { +namespace { + +// Convert a single linear sRGB color to xyb, using the exact image conversion +// procedure that jpeg xl uses. +void LinearSrgbToOpsin(float rgb_r, float rgb_g, float rgb_b, + float* JXL_RESTRICT xyb_x, float* JXL_RESTRICT xyb_y, + float* JXL_RESTRICT xyb_b) { + Image3F linear(1, 1); + linear.PlaneRow(0, 0)[0] = rgb_r; + linear.PlaneRow(1, 0)[0] = rgb_g; + linear.PlaneRow(2, 0)[0] = rgb_b; + + ImageMetadata metadata; + metadata.SetFloat32Samples(); + metadata.color_encoding = ColorEncoding::LinearSRGB(); + ImageBundle ib(&metadata); + ib.SetFromImage(std::move(linear), metadata.color_encoding); + Image3F opsin(1, 1); + (void)ToXYB(ib, /*pool=*/nullptr, &opsin, GetJxlCms()); + + *xyb_x = opsin.PlaneRow(0, 0)[0]; + *xyb_y = opsin.PlaneRow(1, 0)[0]; + *xyb_b = opsin.PlaneRow(2, 0)[0]; +} + +// Convert a single XYB color to linear sRGB, using the exact image conversion +// procedure that jpeg xl uses. +void OpsinToLinearSrgb(float xyb_x, float xyb_y, float xyb_b, + float* JXL_RESTRICT rgb_r, float* JXL_RESTRICT rgb_g, + float* JXL_RESTRICT rgb_b) { + Image3F opsin(1, 1); + opsin.PlaneRow(0, 0)[0] = xyb_x; + opsin.PlaneRow(1, 0)[0] = xyb_y; + opsin.PlaneRow(2, 0)[0] = xyb_b; + Image3F linear(1, 1); + OpsinParams opsin_params; + opsin_params.Init(/*intensity_target=*/255.0f); + OpsinToLinear(opsin, Rect(opsin), nullptr, &linear, opsin_params); + *rgb_r = linear.PlaneRow(0, 0)[0]; + *rgb_g = linear.PlaneRow(1, 0)[0]; + *rgb_b = linear.PlaneRow(2, 0)[0]; +} + +void OpsinRoundtripTestRGB(float r, float g, float b) { + float xyb_x, xyb_y, xyb_b; + LinearSrgbToOpsin(r, g, b, &xyb_x, &xyb_y, &xyb_b); + float r2, g2, b2; + OpsinToLinearSrgb(xyb_x, xyb_y, xyb_b, &r2, &g2, &b2); + EXPECT_NEAR(r, r2, 1e-3); + EXPECT_NEAR(g, g2, 1e-3); + EXPECT_NEAR(b, b2, 1e-3); +} + +TEST(OpsinImageTest, VerifyOpsinAbsorbanceInverseMatrix) { + float matrix[9]; // writable copy + for (int i = 0; i < 9; i++) { + matrix[i] = GetOpsinAbsorbanceInverseMatrix()[i]; + } + EXPECT_TRUE(Inv3x3Matrix(matrix)); + for (int i = 0; i < 9; i++) { + EXPECT_NEAR(matrix[i], kOpsinAbsorbanceMatrix[i], 1e-6); + } +} + +TEST(OpsinImageTest, OpsinRoundtrip) { + OpsinRoundtripTestRGB(0, 0, 0); + OpsinRoundtripTestRGB(1. / 255, 1. / 255, 1. / 255); + OpsinRoundtripTestRGB(128. / 255, 128. / 255, 128. / 255); + OpsinRoundtripTestRGB(1, 1, 1); + + OpsinRoundtripTestRGB(0, 0, 1. / 255); + OpsinRoundtripTestRGB(0, 0, 128. / 255); + OpsinRoundtripTestRGB(0, 0, 1); + + OpsinRoundtripTestRGB(0, 1. / 255, 0); + OpsinRoundtripTestRGB(0, 128. / 255, 0); + OpsinRoundtripTestRGB(0, 1, 0); + + OpsinRoundtripTestRGB(1. / 255, 0, 0); + OpsinRoundtripTestRGB(128. / 255, 0, 0); + OpsinRoundtripTestRGB(1, 0, 0); +} + +TEST(OpsinImageTest, VerifyZero) { + // Test that black color (zero energy) is 0,0,0 in xyb. + float x, y, b; + LinearSrgbToOpsin(0, 0, 0, &x, &y, &b); + EXPECT_NEAR(0, x, 1e-9); + EXPECT_NEAR(0, y, 1e-7); + EXPECT_NEAR(0, b, 1e-7); +} + +TEST(OpsinImageTest, VerifyGray) { + // Test that grayscale colors have a fixed y/b ratio and x==0. + for (size_t i = 1; i < 255; i++) { + float x, y, b; + LinearSrgbToOpsin(i / 255., i / 255., i / 255., &x, &y, &b); + EXPECT_NEAR(0, x, 1e-6); + EXPECT_NEAR(kYToBRatio, b / y, 3e-5); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_inverse_test.cc b/third-party/libjxl/libjxl/lib/jxl/opsin_inverse_test.cc new file mode 100644 index 0000000000..a948693ac6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/opsin_inverse_test.cc @@ -0,0 +1,59 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/color_management.h" +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_xyb.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(OpsinInverseTest, LinearInverseInverts) { + Image3F linear(128, 128); + RandomFillImage(&linear, 0.0f, 1.0f); + + CodecInOut io; + io.metadata.m.SetFloat32Samples(); + io.metadata.m.color_encoding = ColorEncoding::LinearSRGB(); + Image3F linear2(128, 128); + CopyImageTo(linear, &linear2); + io.SetFromImage(std::move(linear2), io.metadata.m.color_encoding); + ThreadPool* null_pool = nullptr; + Image3F opsin(io.xsize(), io.ysize()); + (void)ToXYB(io.Main(), null_pool, &opsin, GetJxlCms()); + + OpsinParams opsin_params; + opsin_params.Init(/*intensity_target=*/255.0f); + OpsinToLinearInplace(&opsin, /*pool=*/nullptr, opsin_params); + + JXL_ASSERT_OK(VerifyRelativeError(linear, opsin, 3E-3, 2E-4, _)); +} + +TEST(OpsinInverseTest, YcbCrInverts) { + Image3F rgb(128, 128); + RandomFillImage(&rgb, 0.0f, 1.0f); + + ThreadPool* null_pool = nullptr; + Image3F ycbcr(rgb.xsize(), rgb.ysize()); + EXPECT_TRUE(RgbToYcbcr(rgb.Plane(0), rgb.Plane(1), rgb.Plane(2), + &ycbcr.Plane(1), &ycbcr.Plane(0), &ycbcr.Plane(2), + null_pool)); + + Image3F rgb2(rgb.xsize(), rgb.ysize()); + YcbcrToRgb(ycbcr, &rgb2, Rect(rgb)); + + JXL_ASSERT_OK(VerifyRelativeError(rgb, rgb2, 4E-5, 4E-7, _)); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_params.cc b/third-party/libjxl/libjxl/lib/jxl/opsin_params.cc new file mode 100644 index 0000000000..ec3db4ee76 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/opsin_params.cc @@ -0,0 +1,44 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/opsin_params.h" + +#include + +#include "lib/jxl/matrix_ops.h" + +namespace jxl { + +#define INVERSE_OPSIN_FROM_SPEC 1 + +const float* GetOpsinAbsorbanceInverseMatrix() { +#if INVERSE_OPSIN_FROM_SPEC + return DefaultInverseOpsinAbsorbanceMatrix(); +#else // INVERSE_OPSIN_FROM_SPEC + // Compute the inverse opsin matrix from the forward matrix. Less precise + // than taking the values from the specification, but must be used if the + // forward transform is changed and the spec will require updating. + static const float* const kInverse = [] { + static float inverse[9]; + for (int i = 0; i < 9; i++) { + inverse[i] = kOpsinAbsorbanceMatrix[i]; + } + Inv3x3Matrix(inverse); + return inverse; + }(); + return kInverse; +#endif // INVERSE_OPSIN_FROM_SPEC +} + +void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse, + float* JXL_RESTRICT simd_inverse, + float intensity_target) { + for (size_t i = 0; i < 9; ++i) { + simd_inverse[4 * i] = simd_inverse[4 * i + 1] = simd_inverse[4 * i + 2] = + simd_inverse[4 * i + 3] = inverse[i] * (255.0f / intensity_target); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/opsin_params.h b/third-party/libjxl/libjxl/lib/jxl/opsin_params.h new file mode 100644 index 0000000000..3a7da97d8a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/opsin_params.h @@ -0,0 +1,86 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_OPSIN_PARAMS_H_ +#define LIB_JXL_OPSIN_PARAMS_H_ + +// Constants that define the XYB color space. + +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" + +namespace jxl { + +// Parameters for opsin absorbance. +static const float kM02 = 0.078f; +static const float kM00 = 0.30f; +static const float kM01 = 1.0f - kM02 - kM00; + +static const float kM12 = 0.078f; +static const float kM10 = 0.23f; +static const float kM11 = 1.0f - kM12 - kM10; + +static const float kM20 = 0.24342268924547819f; +static const float kM21 = 0.20476744424496821f; +static const float kM22 = 1.0f - kM20 - kM21; + +static const float kBScale = 1.0f; +static const float kYToBRatio = 1.0f; // works better with 0.50017729543783418 +static const float kBToYRatio = 1.0f / kYToBRatio; + +static const float kB0 = 0.0037930732552754493f; +static const float kB1 = kB0; +static const float kB2 = kB0; + +// Opsin absorbance matrix is now frozen. +static const float kOpsinAbsorbanceMatrix[9] = { + kM00, kM01, kM02, kM10, kM11, kM12, kM20, kM21, kM22, +}; + +// Must be the inverse matrix of kOpsinAbsorbanceMatrix and match the spec. +static inline const float* DefaultInverseOpsinAbsorbanceMatrix() { + static float kDefaultInverseOpsinAbsorbanceMatrix[9] = { + 11.031566901960783f, -9.866943921568629f, -0.16462299647058826f, + -3.254147380392157f, 4.418770392156863f, -0.16462299647058826f, + -3.6588512862745097f, 2.7129230470588235f, 1.9459282392156863f}; + return kDefaultInverseOpsinAbsorbanceMatrix; +} + +// Returns 3x3 row-major matrix inverse of kOpsinAbsorbanceMatrix. +// opsin_image_test verifies this is actually the inverse. +const float* GetOpsinAbsorbanceInverseMatrix(); + +void InitSIMDInverseMatrix(const float* JXL_RESTRICT inverse, + float* JXL_RESTRICT simd_inverse, + float intensity_target); + +static const float kOpsinAbsorbanceBias[3] = { + kB0, + kB1, + kB2, +}; + +static const float kNegOpsinAbsorbanceBiasRGB[4] = { + -kOpsinAbsorbanceBias[0], -kOpsinAbsorbanceBias[1], + -kOpsinAbsorbanceBias[2], 1.0f}; + +static const float kScaledXYBOffset[3] = { + 0.015386134f, + 0.0f, + 0.27770459f, +}; + +static const float kScaledXYBScale[3] = { + 22.995788804f, + 1.183000077f, + 1.502141333f, +}; + +} // namespace jxl + +#endif // LIB_JXL_OPSIN_PARAMS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/padded_bytes_test.cc b/third-party/libjxl/libjxl/lib/jxl/padded_bytes_test.cc new file mode 100644 index 0000000000..9ca7a22423 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/padded_bytes_test.cc @@ -0,0 +1,126 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/base/padded_bytes.h" + +#include // iota +#include + +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +TEST(PaddedBytesTest, TestNonEmptyFirstByteZero) { + PaddedBytes pb(1); + EXPECT_EQ(0, pb[0]); + // Even after resizing.. + pb.resize(20); + EXPECT_EQ(0, pb[0]); + // And reserving. + pb.reserve(200); + EXPECT_EQ(0, pb[0]); +} + +TEST(PaddedBytesTest, TestEmptyFirstByteZero) { + PaddedBytes pb(0); + // After resizing - new zero is written despite there being nothing to copy. + pb.resize(20); + EXPECT_EQ(0, pb[0]); +} + +TEST(PaddedBytesTest, TestFillWithoutReserve) { + PaddedBytes pb; + for (size_t i = 0; i < 170u; ++i) { + pb.push_back(i); + } + EXPECT_EQ(170u, pb.size()); + EXPECT_GE(pb.capacity(), 170u); +} + +TEST(PaddedBytesTest, TestFillWithExactReserve) { + PaddedBytes pb; + pb.reserve(170); + for (size_t i = 0; i < 170u; ++i) { + pb.push_back(i); + } + EXPECT_EQ(170u, pb.size()); + EXPECT_EQ(pb.capacity(), 170u); +} + +TEST(PaddedBytesTest, TestFillWithMoreReserve) { + PaddedBytes pb; + pb.reserve(171); + for (size_t i = 0; i < 170u; ++i) { + pb.push_back(i); + } + EXPECT_EQ(170u, pb.size()); + EXPECT_GT(pb.capacity(), 170u); +} + +// Can assign() a subset of the valid data. +TEST(PaddedBytesTest, TestAssignFromWithin) { + PaddedBytes pb; + pb.reserve(256); + for (size_t i = 0; i < 256; ++i) { + pb.push_back(i); + } + pb.assign(pb.data() + 64, pb.data() + 192); + EXPECT_EQ(128u, pb.size()); + for (size_t i = 0; i < 128; ++i) { + EXPECT_EQ(i + 64, pb[i]); + } +} + +// Can assign() a range with both valid and previously-allocated data. +TEST(PaddedBytesTest, TestAssignReclaim) { + PaddedBytes pb; + pb.reserve(256); + for (size_t i = 0; i < 256; ++i) { + pb.push_back(i); + } + + const uint8_t* mem = pb.data(); + pb.resize(200); + // Just shrank without reallocating + EXPECT_EQ(mem, pb.data()); + EXPECT_EQ(256u, pb.capacity()); + + // Reclaim part of initial allocation + pb.assign(pb.data() + 100, pb.data() + 240); + EXPECT_EQ(140u, pb.size()); + + for (size_t i = 0; i < 140; ++i) { + EXPECT_EQ(i + 100, pb[i]); + } +} + +// Can assign() smaller and larger ranges outside the current allocation. +TEST(PaddedBytesTest, TestAssignOutside) { + PaddedBytes pb; + pb.resize(400); + std::iota(pb.begin(), pb.end(), 1); + + std::vector small(64); + std::iota(small.begin(), small.end(), 500); + + pb.assign(small.data(), small.data() + small.size()); + EXPECT_EQ(64u, pb.size()); + for (size_t i = 0; i < 64; ++i) { + EXPECT_EQ((i + 500) & 0xFF, pb[i]); + } + + std::vector large(1000); + std::iota(large.begin(), large.end(), 600); + + pb.assign(large.data(), large.data() + large.size()); + EXPECT_EQ(1000u, pb.size()); + for (size_t i = 0; i < 1000; ++i) { + EXPECT_EQ((i + 600) & 0xFF, pb[i]); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/passes_state.cc b/third-party/libjxl/libjxl/lib/jxl/passes_state.cc new file mode 100644 index 0000000000..2f287ec9b6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/passes_state.cc @@ -0,0 +1,70 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/passes_state.h" + +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/common.h" + +namespace jxl { + +Status InitializePassesSharedState(const FrameHeader& frame_header, + PassesSharedState* JXL_RESTRICT shared, + bool encoder) { + JXL_ASSERT(frame_header.nonserialized_metadata != nullptr); + shared->frame_header = frame_header; + shared->metadata = frame_header.nonserialized_metadata; + shared->frame_dim = frame_header.ToFrameDimensions(); + shared->image_features.patches.SetPassesSharedState(shared); + + const FrameDimensions& frame_dim = shared->frame_dim; + + shared->ac_strategy = + AcStrategyImage(frame_dim.xsize_blocks, frame_dim.ysize_blocks); + shared->raw_quant_field = + ImageI(frame_dim.xsize_blocks, frame_dim.ysize_blocks); + shared->epf_sharpness = + ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks); + shared->cmap = ColorCorrelationMap(frame_dim.xsize, frame_dim.ysize); + + // In the decoder, we allocate coeff orders afterwards, when we know how many + // we will actually need. + shared->coeff_order_size = kCoeffOrderMaxSize; + if (encoder && + shared->coeff_orders.size() < + frame_header.passes.num_passes * kCoeffOrderMaxSize && + frame_header.encoding == FrameEncoding::kVarDCT) { + shared->coeff_orders.resize(frame_header.passes.num_passes * + kCoeffOrderMaxSize); + } + + shared->quant_dc = ImageB(frame_dim.xsize_blocks, frame_dim.ysize_blocks); + + bool use_dc_frame = !!(frame_header.flags & FrameHeader::kUseDcFrame); + if (!encoder && use_dc_frame) { + if (frame_header.dc_level == 4) { + return JXL_FAILURE("Invalid DC level for kUseDcFrame: %u", + frame_header.dc_level); + } + shared->dc_storage = Image3F(); + shared->dc = &shared->dc_frames[frame_header.dc_level]; + if (shared->dc->xsize() == 0) { + return JXL_FAILURE( + "kUseDcFrame specified for dc_level %u, but no frame was decoded " + "with level %u", + frame_header.dc_level, frame_header.dc_level + 1); + } + ZeroFillImage(&shared->quant_dc); + } else { + shared->dc_storage = + Image3F(frame_dim.xsize_blocks, frame_dim.ysize_blocks); + shared->dc = &shared->dc_storage; + } + + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/passes_state.h b/third-party/libjxl/libjxl/lib/jxl/passes_state.h new file mode 100644 index 0000000000..8d648a8feb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/passes_state.h @@ -0,0 +1,133 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_PASSES_STATE_H_ +#define LIB_JXL_PASSES_STATE_H_ + +#include "lib/jxl/ac_context.h" +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/frame_header.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/noise.h" +#include "lib/jxl/quant_weights.h" +#include "lib/jxl/quantizer.h" +#include "lib/jxl/splines.h" + +// Structures that hold the (en/de)coder state for a JPEG XL kVarDCT +// (en/de)coder. + +namespace jxl { + +struct ImageFeatures { + NoiseParams noise_params; + PatchDictionary patches; + Splines splines; +}; + +// State common to both encoder and decoder. +// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) +struct PassesSharedState { + PassesSharedState() : frame_header(nullptr) {} + + // Headers and metadata. + const CodecMetadata* metadata; + FrameHeader frame_header; + + FrameDimensions frame_dim; + + // Control fields and parameters. + AcStrategyImage ac_strategy; + + // Dequant matrices + quantizer. + DequantMatrices matrices; + Quantizer quantizer{&matrices}; + ImageI raw_quant_field; + + // Per-block side information for EPF detail preservation. + ImageB epf_sharpness; + + ColorCorrelationMap cmap; + + ImageFeatures image_features; + + // Memory area for storing coefficient orders. + // `coeff_order_size` is the size used by *one* set of coefficient orders (at + // most kMaxCoeffOrderSize). A set of coefficient orders is present for each + // pass. + size_t coeff_order_size = 0; + std::vector coeff_orders; + + // Decoder-side DC and quantized DC. + ImageB quant_dc; + Image3F dc_storage; + const Image3F* JXL_RESTRICT dc = &dc_storage; + + BlockCtxMap block_ctx_map; + + Image3F dc_frames[4]; + + struct { + ImageBundle frame; + // ImageBundle doesn't yet have a simple way to state it is in XYB. + bool ib_is_in_xyb = false; + } reference_frames[4] = {}; + + // Number of pre-clustered set of histograms (with the same ctx map), per + // pass. Encoded as num_histograms_ - 1. + size_t num_histograms = 0; + + bool IsGrayscale() const { return metadata->m.color_encoding.IsGray(); } + + Rect GroupRect(size_t group_index) const { + const size_t gx = group_index % frame_dim.xsize_groups; + const size_t gy = group_index / frame_dim.xsize_groups; + const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim, + frame_dim.group_dim, frame_dim.group_dim, frame_dim.xsize, + frame_dim.ysize); + return rect; + } + + Rect PaddedGroupRect(size_t group_index) const { + const size_t gx = group_index % frame_dim.xsize_groups; + const size_t gy = group_index / frame_dim.xsize_groups; + const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim, + frame_dim.group_dim, frame_dim.group_dim, + frame_dim.xsize_padded, frame_dim.ysize_padded); + return rect; + } + + Rect BlockGroupRect(size_t group_index) const { + const size_t gx = group_index % frame_dim.xsize_groups; + const size_t gy = group_index / frame_dim.xsize_groups; + const Rect rect(gx * (frame_dim.group_dim >> 3), + gy * (frame_dim.group_dim >> 3), frame_dim.group_dim >> 3, + frame_dim.group_dim >> 3, frame_dim.xsize_blocks, + frame_dim.ysize_blocks); + return rect; + } + + Rect DCGroupRect(size_t group_index) const { + const size_t gx = group_index % frame_dim.xsize_dc_groups; + const size_t gy = group_index / frame_dim.xsize_dc_groups; + const Rect rect(gx * frame_dim.group_dim, gy * frame_dim.group_dim, + frame_dim.group_dim, frame_dim.group_dim, + frame_dim.xsize_blocks, frame_dim.ysize_blocks); + return rect; + } +}; + +// Initialized the state information that is shared between encoder and decoder. +Status InitializePassesSharedState(const FrameHeader& frame_header, + PassesSharedState* JXL_RESTRICT shared, + bool encoder = false); + +} // namespace jxl + +#endif // LIB_JXL_PASSES_STATE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/passes_test.cc b/third-party/libjxl/libjxl/lib/jxl/passes_test.cc new file mode 100644 index 0000000000..b1bc7fb314 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/passes_test.cc @@ -0,0 +1,408 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +using test::Roundtrip; +using test::ThreadPoolForTests; + +namespace { + +TEST(PassesTest, RoundtripSmallPasses) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(io.xsize() / 8, io.ysize() / 8); + + CompressParams cparams; + cparams.butteraugli_distance = 1.0; + cparams.progressive_mode = true; + cparams.SetCms(GetJxlCms()); + + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _)); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.1)); +} + +TEST(PassesTest, RoundtripUnalignedPasses) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(io.xsize() / 12, io.ysize() / 7); + + CompressParams cparams; + cparams.butteraugli_distance = 2.0; + cparams.progressive_mode = true; + cparams.SetCms(GetJxlCms()); + + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _)); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.72)); +} + +TEST(PassesTest, RoundtripMultiGroupPasses) { + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CodecInOut io; + { + ThreadPoolForTests pool(4); + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + } + io.ShrinkTo(600, 1024); // partial X, full Y group + + auto test = [&](float target_distance, float threshold) { + ThreadPoolForTests pool(4); + CompressParams cparams; + cparams.butteraugli_distance = target_distance; + cparams.progressive_mode = true; + cparams.SetCms(GetJxlCms()); + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, + /* compressed_size */ nullptr, &pool)); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr, &pool), + IsSlightlyBelow(target_distance + threshold)); + }; + + auto run1 = std::async(std::launch::async, test, 1.0f, 0.5f); + auto run2 = std::async(std::launch::async, test, 2.0f, 0.0f); +} + +TEST(PassesTest, RoundtripLargeFastPasses) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.progressive_mode = true; + cparams.SetCms(GetJxlCms()); + + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, + /* compressed_size */ nullptr, &pool)); +} + +// Checks for differing size/distance in two consecutive runs of distance 2, +// which involves additional processing including adaptive reconstruction. +// Failing this may be a sign of race conditions or invalid memory accesses. +TEST(PassesTest, RoundtripProgressiveConsistent) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.progressive_mode = true; + cparams.butteraugli_distance = 2.0; + cparams.SetCms(GetJxlCms()); + + // Try each xsize mod kBlockDim to verify right border handling. + for (size_t xsize = 48; xsize > 40; --xsize) { + io.ShrinkTo(xsize, 15); + + CodecInOut io2; + size_t size2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &size2, &pool)); + + CodecInOut io3; + size_t size3; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io3, _, &size3, &pool)); + + // Exact same compressed size. + EXPECT_EQ(size2, size3); + + // Exact same distance. + const float dist2 = ButteraugliDistance(io.frames, io2.frames, + ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr, &pool); + const float dist3 = ButteraugliDistance(io.frames, io3.frames, + ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr, &pool); + EXPECT_EQ(dist2, dist3); + } +} + +TEST(PassesTest, AllDownsampleFeasible) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + + PaddedBytes compressed; + AuxOut aux; + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.progressive_mode = true; + cparams.butteraugli_distance = 1.0; + PassesEncoderState enc_state; + ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + &aux, &pool)); + + EXPECT_LE(compressed.size(), 240000u); + float target_butteraugli[9] = {}; + target_butteraugli[1] = 2.5f; + target_butteraugli[2] = 16.0f; + target_butteraugli[4] = 20.0f; + target_butteraugli[8] = 80.0f; + + // The default progressive encoding scheme should make all these downsampling + // factors achievable. + // TODO(veluca): re-enable downsampling 16. + std::vector downsamplings = {1, 2, 4, 8}; //, 16}; + + auto check = [&](const uint32_t task, size_t /* thread */) -> void { + const size_t downsampling = downsamplings[task]; + extras::JXLDecompressParams dparams; + dparams.max_downsampling = downsampling; + CodecInOut output; + ASSERT_TRUE( + test::DecodeFile(dparams, Span(compressed), &output)); + EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling; + EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling; + EXPECT_LE(ButteraugliDistance(io.frames, output.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr, nullptr), + target_butteraugli[downsampling]) + << "downsampling: " << downsampling; + }; + EXPECT_TRUE(RunOnPool(&pool, 0, downsamplings.size(), ThreadPool::NoInit, + check, "TestDownsampling")); +} + +TEST(PassesTest, AllDownsampleFeasibleQProgressive) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + + PaddedBytes compressed; + AuxOut aux; + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.qprogressive_mode = true; + cparams.butteraugli_distance = 1.0; + PassesEncoderState enc_state; + ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + &aux, &pool)); + + EXPECT_LE(compressed.size(), 220000u); + + float target_butteraugli[9] = {}; + target_butteraugli[1] = 3.0f; + target_butteraugli[2] = 6.0f; + target_butteraugli[4] = 10.0f; + target_butteraugli[8] = 80.0f; + + // The default progressive encoding scheme should make all these downsampling + // factors achievable. + std::vector downsamplings = {1, 2, 4, 8}; + + auto check = [&](const uint32_t task, size_t /* thread */) -> void { + const size_t downsampling = downsamplings[task]; + extras::JXLDecompressParams dparams; + dparams.max_downsampling = downsampling; + CodecInOut output; + ASSERT_TRUE( + test::DecodeFile(dparams, Span(compressed), &output)); + EXPECT_EQ(output.xsize(), io.xsize()) << "downsampling = " << downsampling; + EXPECT_EQ(output.ysize(), io.ysize()) << "downsampling = " << downsampling; + EXPECT_LE(ButteraugliDistance(io.frames, output.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + target_butteraugli[downsampling]) + << "downsampling: " << downsampling; + }; + EXPECT_TRUE(RunOnPool(&pool, 0, downsamplings.size(), ThreadPool::NoInit, + check, "TestQProgressive")); +} + +TEST(PassesTest, ProgressiveDownsample2DegradesCorrectlyGrayscale) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/cvo9xd_keong_macan_grayscale.png"); + CodecInOut io_orig; + ASSERT_TRUE(SetFromBytes(Span(orig), &io_orig, &pool)); + Rect rect(0, 0, io_orig.xsize(), 128); + // need 2 DC groups for the DC frame to actually be progressive. + Image3F large(4242, rect.ysize()); + ZeroFillImage(&large); + CopyImageTo(rect, *io_orig.Main().color(), rect, &large); + CodecInOut io; + io.metadata = io_orig.metadata; + io.SetFromImage(std::move(large), io_orig.Main().c_current()); + + PaddedBytes compressed; + AuxOut aux; + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.progressive_dc = 1; + cparams.responsive = true; + cparams.qprogressive_mode = true; + cparams.butteraugli_distance = 1.0; + PassesEncoderState enc_state; + ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + &aux, &pool)); + + EXPECT_LE(compressed.size(), 10000u); + + extras::JXLDecompressParams dparams; + dparams.max_downsampling = 1; + CodecInOut output; + ASSERT_TRUE( + test::DecodeFile(dparams, Span(compressed), &output)); + + dparams.max_downsampling = 2; + CodecInOut output_d2; + ASSERT_TRUE( + test::DecodeFile(dparams, Span(compressed), &output_d2)); + + // 0 if reading all the passes, ~15 if skipping the 8x pass. + float butteraugli_distance_down2_full = ButteraugliDistance( + output.frames, output_d2.frames, ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr); + + EXPECT_LE(butteraugli_distance_down2_full, 3.2f); + EXPECT_GE(butteraugli_distance_down2_full, 1.0f); +} + +TEST(PassesTest, ProgressiveDownsample2DegradesCorrectly) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CodecInOut io_orig; + ASSERT_TRUE(SetFromBytes(Span(orig), &io_orig, &pool)); + Rect rect(0, 0, io_orig.xsize(), 128); + // need 2 DC groups for the DC frame to actually be progressive. + Image3F large(4242, rect.ysize()); + ZeroFillImage(&large); + CopyImageTo(rect, *io_orig.Main().color(), rect, &large); + CodecInOut io; + io.SetFromImage(std::move(large), io_orig.Main().c_current()); + + PaddedBytes compressed; + AuxOut aux; + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.progressive_dc = 1; + cparams.responsive = true; + cparams.qprogressive_mode = true; + cparams.butteraugli_distance = 1.0; + PassesEncoderState enc_state; + ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + &aux, &pool)); + + EXPECT_LE(compressed.size(), 220000u); + + extras::JXLDecompressParams dparams; + dparams.max_downsampling = 1; + CodecInOut output; + ASSERT_TRUE( + test::DecodeFile(dparams, Span(compressed), &output)); + + dparams.max_downsampling = 2; + CodecInOut output_d2; + ASSERT_TRUE( + test::DecodeFile(dparams, Span(compressed), &output_d2)); + + // 0 if reading all the passes, ~15 if skipping the 8x pass. + float butteraugli_distance_down2_full = ButteraugliDistance( + output.frames, output_d2.frames, ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr); + + EXPECT_LE(butteraugli_distance_down2_full, 3.0f); + EXPECT_GE(butteraugli_distance_down2_full, 1.0f); +} + +TEST(PassesTest, NonProgressiveDCImage) { + ThreadPoolForTests pool(8); + const PaddedBytes orig = jxl::test::ReadTestData("jxl/flower/flower.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + + PaddedBytes compressed; + AuxOut aux; + + CompressParams cparams; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.progressive_mode = false; + cparams.butteraugli_distance = 2.0; + PassesEncoderState enc_state; + ASSERT_TRUE(EncodeFile(cparams, &io, &enc_state, &compressed, GetJxlCms(), + &aux, &pool)); + + // Even in non-progressive mode, it should be possible to return a DC-only + // image. + extras::JXLDecompressParams dparams; + dparams.max_downsampling = 100; + CodecInOut output; + ASSERT_TRUE(test::DecodeFile(dparams, Span(compressed), + &output, &pool)); + EXPECT_EQ(output.xsize(), io.xsize()); + EXPECT_EQ(output.ysize(), io.ysize()); +} + +TEST(PassesTest, RoundtripSmallNoGaborishPasses) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(io.xsize() / 8, io.ysize() / 8); + + CompressParams cparams; + cparams.gaborish = Override::kOff; + cparams.butteraugli_distance = 1.0; + cparams.progressive_mode = true; + cparams.SetCms(GetJxlCms()); + + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _)); + EXPECT_THAT(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + IsSlightlyBelow(1.2)); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_internal.h b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_internal.h new file mode 100644 index 0000000000..e4172f6db6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_internal.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_ +#define LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_ + +#include "lib/jxl/dec_patch_dictionary.h" +#include "lib/jxl/passes_state.h" // for PassesSharedState + +namespace jxl { + +// Context numbers as specified in Section C.4.5, Listing C.2: +enum Contexts { + kNumRefPatchContext = 0, + kReferenceFrameContext = 1, + kPatchSizeContext = 2, + kPatchReferencePositionContext = 3, + kPatchPositionContext = 4, + kPatchBlendModeContext = 5, + kPatchOffsetContext = 6, + kPatchCountContext = 7, + kPatchAlphaChannelContext = 8, + kPatchClampContext = 9, + kNumPatchDictionaryContexts +}; + +} // namespace jxl + +#endif // LIB_JXL_PATCH_DICTIONARY_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_test.cc b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_test.cc new file mode 100644 index 0000000000..a2fe49bce2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/patch_dictionary_test.cc @@ -0,0 +1,58 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/extras/codec.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +using ::jxl::test::Roundtrip; + +TEST(PatchDictionaryTest, GrayscaleModular) { + const PaddedBytes orig = jxl::test::ReadTestData("jxl/grayscale_patches.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + + CompressParams cparams; + cparams.SetLossless(); + cparams.patches = jxl::Override::kOn; + + CodecInOut io2; + // Without patches: ~25k + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size)); + EXPECT_LE(compressed_size, 8000u); + JXL_ASSERT_OK(VerifyRelativeError(*io.Main().color(), *io2.Main().color(), + 1e-7f, 0, _)); +} + +TEST(PatchDictionaryTest, GrayscaleVarDCT) { + const PaddedBytes orig = jxl::test::ReadTestData("jxl/grayscale_patches.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + + CompressParams cparams; + cparams.patches = jxl::Override::kOn; + + CodecInOut io2; + // Without patches: ~47k + size_t compressed_size; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _, &compressed_size)); + EXPECT_LE(compressed_size, 14000u); + // Without patches: ~1.2 + EXPECT_LE(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + 1.1); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/preview_test.cc b/third-party/libjxl/libjxl/lib/jxl/preview_test.cc new file mode 100644 index 0000000000..6c08821b94 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/preview_test.cc @@ -0,0 +1,69 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include + +#include "lib/extras/codec.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/override.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/headers.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { +using test::Roundtrip; + +TEST(PreviewTest, RoundtripGivenPreview) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + ASSERT_TRUE(SetFromBytes(Span(orig), &io)); + io.ShrinkTo(io.xsize() / 8, io.ysize() / 8); + // Same as main image + io.preview_frame = io.Main().Copy(); + const size_t preview_xsize = 15; + const size_t preview_ysize = 27; + io.preview_frame.ShrinkTo(preview_xsize, preview_ysize); + io.metadata.m.have_preview = true; + ASSERT_TRUE(io.metadata.m.preview_size.Set(io.preview_frame.xsize(), + io.preview_frame.ysize())); + + CompressParams cparams; + cparams.butteraugli_distance = 2.0; + cparams.speed_tier = SpeedTier::kSquirrel; + cparams.SetCms(GetJxlCms()); + + CodecInOut io2; + JXL_EXPECT_OK(Roundtrip(&io, cparams, {}, &io2, _)); + EXPECT_EQ(preview_xsize, io2.metadata.m.preview_size.xsize()); + EXPECT_EQ(preview_ysize, io2.metadata.m.preview_size.ysize()); + EXPECT_EQ(preview_xsize, io2.preview_frame.xsize()); + EXPECT_EQ(preview_ysize, io2.preview_frame.ysize()); + + EXPECT_LE(ButteraugliDistance(io.preview_frame, io2.preview_frame, + ButteraugliParams(), GetJxlCms(), + /*distmap=*/nullptr), + 2.5); + EXPECT_LE(ButteraugliDistance(io.Main(), io2.Main(), ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr), + 2.5); +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/quant_weights.cc b/third-party/libjxl/libjxl/lib/jxl/quant_weights.cc new file mode 100644 index 0000000000..5e3f3424aa --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quant_weights.cc @@ -0,0 +1,1239 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +#include "lib/jxl/quant_weights.h" + +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_scales.h" +#include "lib/jxl/dec_modular.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/quant_weights.cc" +#include +#include + +#include "lib/jxl/fast_math-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Sqrt; + +// kQuantWeights[N * N * c + N * y + x] is the relative weight of the (x, y) +// coefficient in component c. Higher weights correspond to finer quantization +// intervals and more bits spent in encoding. + +static constexpr const float kAlmostZero = 1e-8f; + +void GetQuantWeightsDCT2(const QuantEncoding::DCT2Weights& dct2weights, + float* weights) { + for (size_t c = 0; c < 3; c++) { + size_t start = c * 64; + weights[start] = 0xBAD; + weights[start + 1] = weights[start + 8] = dct2weights[c][0]; + weights[start + 9] = dct2weights[c][1]; + for (size_t y = 0; y < 2; y++) { + for (size_t x = 0; x < 2; x++) { + weights[start + y * 8 + x + 2] = dct2weights[c][2]; + weights[start + (y + 2) * 8 + x] = dct2weights[c][2]; + } + } + for (size_t y = 0; y < 2; y++) { + for (size_t x = 0; x < 2; x++) { + weights[start + (y + 2) * 8 + x + 2] = dct2weights[c][3]; + } + } + for (size_t y = 0; y < 4; y++) { + for (size_t x = 0; x < 4; x++) { + weights[start + y * 8 + x + 4] = dct2weights[c][4]; + weights[start + (y + 4) * 8 + x] = dct2weights[c][4]; + } + } + for (size_t y = 0; y < 4; y++) { + for (size_t x = 0; x < 4; x++) { + weights[start + (y + 4) * 8 + x + 4] = dct2weights[c][5]; + } + } + } +} + +void GetQuantWeightsIdentity(const QuantEncoding::IdWeights& idweights, + float* weights) { + for (size_t c = 0; c < 3; c++) { + for (int i = 0; i < 64; i++) { + weights[64 * c + i] = idweights[c][0]; + } + weights[64 * c + 1] = idweights[c][1]; + weights[64 * c + 8] = idweights[c][1]; + weights[64 * c + 9] = idweights[c][2]; + } +} + +float Interpolate(float pos, float max, const float* array, size_t len) { + float scaled_pos = pos * (len - 1) / max; + size_t idx = scaled_pos; + JXL_DASSERT(idx + 1 < len); + float a = array[idx]; + float b = array[idx + 1]; + return a * FastPowf(b / a, scaled_pos - idx); +} + +float Mult(float v) { + if (v > 0.0f) return 1.0f + v; + return 1.0f / (1.0f - v); +} + +using DF4 = HWY_CAPPED(float, 4); + +hwy::HWY_NAMESPACE::Vec InterpolateVec( + hwy::HWY_NAMESPACE::Vec scaled_pos, const float* array) { + HWY_CAPPED(int32_t, 4) di; + + auto idx = ConvertTo(di, scaled_pos); + + auto frac = Sub(scaled_pos, ConvertTo(DF4(), idx)); + + // TODO(veluca): in theory, this could be done with 8 TableLookupBytes, but + // it's probably slower. + auto a = GatherIndex(DF4(), array, idx); + auto b = GatherIndex(DF4(), array + 1, idx); + + return Mul(a, FastPowf(DF4(), Div(b, a), frac)); +} + +// Computes quant weights for a COLS*ROWS-sized transform, using num_bands +// eccentricity bands and num_ebands eccentricity bands. If print_mode is 1, +// prints the resulting matrix; if print_mode is 2, prints the matrix in a +// format suitable for a 3d plot with gnuplot. +Status GetQuantWeights( + size_t ROWS, size_t COLS, + const DctQuantWeightParams::DistanceBandsArray& distance_bands, + size_t num_bands, float* out) { + for (size_t c = 0; c < 3; c++) { + float bands[DctQuantWeightParams::kMaxDistanceBands] = { + distance_bands[c][0]}; + if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); + for (size_t i = 1; i < num_bands; i++) { + bands[i] = bands[i - 1] * Mult(distance_bands[c][i]); + if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid distance bands"); + } + float scale = (num_bands - 1) / (kSqrt2 + 1e-6f); + float rcpcol = scale / (COLS - 1); + float rcprow = scale / (ROWS - 1); + JXL_ASSERT(COLS >= Lanes(DF4())); + HWY_ALIGN float l0123[4] = {0, 1, 2, 3}; + for (uint32_t y = 0; y < ROWS; y++) { + float dy = y * rcprow; + float dy2 = dy * dy; + for (uint32_t x = 0; x < COLS; x += Lanes(DF4())) { + auto dx = + Mul(Add(Set(DF4(), x), Load(DF4(), l0123)), Set(DF4(), rcpcol)); + auto scaled_distance = Sqrt(MulAdd(dx, dx, Set(DF4(), dy2))); + auto weight = num_bands == 1 ? Set(DF4(), bands[0]) + : InterpolateVec(scaled_distance, bands); + StoreU(weight, DF4(), out + c * COLS * ROWS + y * COLS + x); + } + } + } + return true; +} + +// TODO(veluca): SIMD-fy. With 256x256, this is actually slow. +Status ComputeQuantTable(const QuantEncoding& encoding, + float* JXL_RESTRICT table, + float* JXL_RESTRICT inv_table, size_t table_num, + DequantMatrices::QuantTable kind, size_t* pos) { + constexpr size_t N = kBlockDim; + size_t wrows = 8 * DequantMatrices::required_size_x[kind], + wcols = 8 * DequantMatrices::required_size_y[kind]; + size_t num = wrows * wcols; + + std::vector weights(3 * num); + + switch (encoding.mode) { + case QuantEncoding::kQuantModeLibrary: { + // Library and copy quant encoding should get replaced by the actual + // parameters by the caller. + JXL_ASSERT(false); + break; + } + case QuantEncoding::kQuantModeID: { + JXL_ASSERT(num == kDCTBlockSize); + GetQuantWeightsIdentity(encoding.idweights, weights.data()); + break; + } + case QuantEncoding::kQuantModeDCT2: { + JXL_ASSERT(num == kDCTBlockSize); + GetQuantWeightsDCT2(encoding.dct2weights, weights.data()); + break; + } + case QuantEncoding::kQuantModeDCT4: { + JXL_ASSERT(num == kDCTBlockSize); + float weights4x4[3 * 4 * 4]; + // Always use 4x4 GetQuantWeights for DCT4 quantization tables. + JXL_RETURN_IF_ERROR( + GetQuantWeights(4, 4, encoding.dct_params.distance_bands, + encoding.dct_params.num_distance_bands, weights4x4)); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < kBlockDim; y++) { + for (size_t x = 0; x < kBlockDim; x++) { + weights[c * num + y * kBlockDim + x] = + weights4x4[c * 16 + (y / 2) * 4 + (x / 2)]; + } + } + weights[c * num + 1] /= encoding.dct4multipliers[c][0]; + weights[c * num + N] /= encoding.dct4multipliers[c][0]; + weights[c * num + N + 1] /= encoding.dct4multipliers[c][1]; + } + break; + } + case QuantEncoding::kQuantModeDCT4X8: { + JXL_ASSERT(num == kDCTBlockSize); + float weights4x8[3 * 4 * 8]; + // Always use 4x8 GetQuantWeights for DCT4X8 quantization tables. + JXL_RETURN_IF_ERROR( + GetQuantWeights(4, 8, encoding.dct_params.distance_bands, + encoding.dct_params.num_distance_bands, weights4x8)); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < kBlockDim; y++) { + for (size_t x = 0; x < kBlockDim; x++) { + weights[c * num + y * kBlockDim + x] = + weights4x8[c * 32 + (y / 2) * 8 + x]; + } + } + weights[c * num + N] /= encoding.dct4x8multipliers[c]; + } + break; + } + case QuantEncoding::kQuantModeDCT: { + JXL_RETURN_IF_ERROR(GetQuantWeights( + wrows, wcols, encoding.dct_params.distance_bands, + encoding.dct_params.num_distance_bands, weights.data())); + break; + } + case QuantEncoding::kQuantModeRAW: { + if (!encoding.qraw.qtable || encoding.qraw.qtable->size() != 3 * num) { + return JXL_FAILURE("Invalid table encoding"); + } + for (size_t i = 0; i < 3 * num; i++) { + weights[i] = + 1.f / (encoding.qraw.qtable_den * (*encoding.qraw.qtable)[i]); + } + break; + } + case QuantEncoding::kQuantModeAFV: { + constexpr float kFreqs[] = { + 0xBAD, + 0xBAD, + 0.8517778890324296, + 5.37778436506804, + 0xBAD, + 0xBAD, + 4.734747904497923, + 5.449245381693219, + 1.6598270267479331, + 4, + 7.275749096817861, + 10.423227632456525, + 2.662932286148962, + 7.630657783650829, + 8.962388608184032, + 12.97166202570235, + }; + + float weights4x8[3 * 4 * 8]; + JXL_RETURN_IF_ERROR(( + GetQuantWeights(4, 8, encoding.dct_params.distance_bands, + encoding.dct_params.num_distance_bands, weights4x8))); + float weights4x4[3 * 4 * 4]; + JXL_RETURN_IF_ERROR((GetQuantWeights( + 4, 4, encoding.dct_params_afv_4x4.distance_bands, + encoding.dct_params_afv_4x4.num_distance_bands, weights4x4))); + + constexpr float lo = 0.8517778890324296; + constexpr float hi = 12.97166202570235f - lo + 1e-6f; + for (size_t c = 0; c < 3; c++) { + float bands[4]; + bands[0] = encoding.afv_weights[c][5]; + if (bands[0] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); + for (size_t i = 1; i < 4; i++) { + bands[i] = bands[i - 1] * Mult(encoding.afv_weights[c][i + 5]); + if (bands[i] < kAlmostZero) return JXL_FAILURE("Invalid AFV bands"); + } + size_t start = c * 64; + auto set_weight = [&start, &weights](size_t x, size_t y, float val) { + weights[start + y * 8 + x] = val; + }; + weights[start] = 1; // Not used, but causes MSAN error otherwise. + // Weights for (0, 1) and (1, 0). + set_weight(0, 1, encoding.afv_weights[c][0]); + set_weight(1, 0, encoding.afv_weights[c][1]); + // AFV special weights for 3-pixel corner. + set_weight(0, 2, encoding.afv_weights[c][2]); + set_weight(2, 0, encoding.afv_weights[c][3]); + set_weight(2, 2, encoding.afv_weights[c][4]); + + // All other AFV weights. + for (size_t y = 0; y < 4; y++) { + for (size_t x = 0; x < 4; x++) { + if (x < 2 && y < 2) continue; + float val = Interpolate(kFreqs[y * 4 + x] - lo, hi, bands, 4); + set_weight(2 * x, 2 * y, val); + } + } + + // Put 4x8 weights in odd rows, except (1, 0). + for (size_t y = 0; y < kBlockDim / 2; y++) { + for (size_t x = 0; x < kBlockDim; x++) { + if (x == 0 && y == 0) continue; + weights[c * num + (2 * y + 1) * kBlockDim + x] = + weights4x8[c * 32 + y * 8 + x]; + } + } + // Put 4x4 weights in even rows / odd columns, except (0, 1). + for (size_t y = 0; y < kBlockDim / 2; y++) { + for (size_t x = 0; x < kBlockDim / 2; x++) { + if (x == 0 && y == 0) continue; + weights[c * num + (2 * y) * kBlockDim + 2 * x + 1] = + weights4x4[c * 16 + y * 4 + x]; + } + } + } + break; + } + } + size_t prev_pos = *pos; + HWY_CAPPED(float, 64) d; + for (size_t i = 0; i < num * 3; i += Lanes(d)) { + auto inv_val = LoadU(d, weights.data() + i); + if (JXL_UNLIKELY(!AllFalse(d, Ge(inv_val, Set(d, 1.0f / kAlmostZero))) || + !AllFalse(d, Lt(inv_val, Set(d, kAlmostZero))))) { + return JXL_FAILURE("Invalid quantization table"); + } + auto val = Div(Set(d, 1.0f), inv_val); + StoreU(val, d, table + *pos + i); + StoreU(inv_val, d, inv_table + *pos + i); + } + (*pos) += 3 * num; + + // Ensure that the lowest frequencies have a 0 inverse table. + // This does not affect en/decoding, but allows AC strategy selection to be + // slightly simpler. + size_t xs = DequantMatrices::required_size_x[kind]; + size_t ys = DequantMatrices::required_size_y[kind]; + CoefficientLayout(&ys, &xs); + for (size_t c = 0; c < 3; c++) { + for (size_t y = 0; y < ys; y++) { + for (size_t x = 0; x < xs; x++) { + inv_table[prev_pos + c * ys * xs * kDCTBlockSize + y * kBlockDim * xs + + x] = 0; + } + } + } + return true; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jxl { +namespace { + +HWY_EXPORT(ComputeQuantTable); + +static constexpr const float kAlmostZero = 1e-8f; + +Status DecodeDctParams(BitReader* br, DctQuantWeightParams* params) { + params->num_distance_bands = + br->ReadFixedBits() + 1; + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < params->num_distance_bands; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Read(br, ¶ms->distance_bands[c][i])); + } + if (params->distance_bands[c][0] < kAlmostZero) { + return JXL_FAILURE("Distance band seed is too small"); + } + params->distance_bands[c][0] *= 64.0f; + } + return true; +} + +Status Decode(BitReader* br, QuantEncoding* encoding, size_t required_size_x, + size_t required_size_y, size_t idx, + ModularFrameDecoder* modular_frame_decoder) { + size_t required_size = required_size_x * required_size_y; + required_size_x *= kBlockDim; + required_size_y *= kBlockDim; + int mode = br->ReadFixedBits(); + switch (mode) { + case QuantEncoding::kQuantModeLibrary: { + encoding->predefined = br->ReadFixedBits(); + if (encoding->predefined >= kNumPredefinedTables) { + return JXL_FAILURE("Invalid predefined table"); + } + break; + } + case QuantEncoding::kQuantModeID: { + if (required_size != 1) return JXL_FAILURE("Invalid mode"); + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 3; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->idweights[c][i])); + if (std::abs(encoding->idweights[c][i]) < kAlmostZero) { + return JXL_FAILURE("ID Quantizer is too small"); + } + encoding->idweights[c][i] *= 64; + } + } + break; + } + case QuantEncoding::kQuantModeDCT2: { + if (required_size != 1) return JXL_FAILURE("Invalid mode"); + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 6; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->dct2weights[c][i])); + if (std::abs(encoding->dct2weights[c][i]) < kAlmostZero) { + return JXL_FAILURE("Quantizer is too small"); + } + encoding->dct2weights[c][i] *= 64; + } + } + break; + } + case QuantEncoding::kQuantModeDCT4X8: { + if (required_size != 1) return JXL_FAILURE("Invalid mode"); + for (size_t c = 0; c < 3; c++) { + JXL_RETURN_IF_ERROR( + F16Coder::Read(br, &encoding->dct4x8multipliers[c])); + if (std::abs(encoding->dct4x8multipliers[c]) < kAlmostZero) { + return JXL_FAILURE("DCT4X8 multiplier is too small"); + } + } + JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); + break; + } + case QuantEncoding::kQuantModeDCT4: { + if (required_size != 1) return JXL_FAILURE("Invalid mode"); + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 2; i++) { + JXL_RETURN_IF_ERROR( + F16Coder::Read(br, &encoding->dct4multipliers[c][i])); + if (std::abs(encoding->dct4multipliers[c][i]) < kAlmostZero) { + return JXL_FAILURE("DCT4 multiplier is too small"); + } + } + } + JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); + break; + } + case QuantEncoding::kQuantModeAFV: { + if (required_size != 1) return JXL_FAILURE("Invalid mode"); + for (size_t c = 0; c < 3; c++) { + for (size_t i = 0; i < 9; i++) { + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &encoding->afv_weights[c][i])); + } + for (size_t i = 0; i < 6; i++) { + encoding->afv_weights[c][i] *= 64; + } + } + JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); + JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params_afv_4x4)); + break; + } + case QuantEncoding::kQuantModeDCT: { + JXL_RETURN_IF_ERROR(DecodeDctParams(br, &encoding->dct_params)); + break; + } + case QuantEncoding::kQuantModeRAW: { + // Set mode early, to avoid mem-leak. + encoding->mode = QuantEncoding::kQuantModeRAW; + JXL_RETURN_IF_ERROR(ModularFrameDecoder::DecodeQuantTable( + required_size_x, required_size_y, br, encoding, idx, + modular_frame_decoder)); + break; + } + default: + return JXL_FAILURE("Invalid quantization table encoding"); + } + encoding->mode = QuantEncoding::Mode(mode); + return true; +} + +} // namespace + +// These definitions are needed before C++17. +constexpr size_t DequantMatrices::required_size_[]; +constexpr size_t DequantMatrices::required_size_x[]; +constexpr size_t DequantMatrices::required_size_y[]; +constexpr DequantMatrices::QuantTable DequantMatrices::kQuantTable[]; + +Status DequantMatrices::Decode(BitReader* br, + ModularFrameDecoder* modular_frame_decoder) { + size_t all_default = br->ReadBits(1); + size_t num_tables = all_default ? 0 : static_cast(kNum); + encodings_.clear(); + encodings_.resize(kNum, QuantEncoding::Library(0)); + for (size_t i = 0; i < num_tables; i++) { + JXL_RETURN_IF_ERROR( + jxl::Decode(br, &encodings_[i], required_size_x[i % kNum], + required_size_y[i % kNum], i, modular_frame_decoder)); + } + computed_mask_ = 0; + return true; +} + +Status DequantMatrices::DecodeDC(BitReader* br) { + bool all_default = br->ReadBits(1); + if (!br->AllReadsWithinBounds()) return JXL_FAILURE("EOS during DecodeDC"); + if (!all_default) { + for (size_t c = 0; c < 3; c++) { + JXL_RETURN_IF_ERROR(F16Coder::Read(br, &dc_quant_[c])); + dc_quant_[c] *= 1.0f / 128.0f; + // Negative values and nearly zero are invalid values. + if (dc_quant_[c] < kAlmostZero) { + return JXL_FAILURE("Invalid dc_quant: coefficient is too small."); + } + inv_dc_quant_[c] = 1.0f / dc_quant_[c]; + } + } + return true; +} + +constexpr float V(float v) { return static_cast(v); } + +namespace { +struct DequantMatricesLibraryDef { + // DCT8 + static constexpr QuantEncodingInternal DCT() { + return QuantEncodingInternal::DCT(DctQuantWeightParams({{{{ + V(3150.0), + V(0.0), + V(-0.4), + V(-0.4), + V(-0.4), + V(-2.0), + }}, + {{ + V(560.0), + V(0.0), + V(-0.3), + V(-0.3), + V(-0.3), + V(-0.3), + }}, + {{ + V(512.0), + V(-2.0), + V(-1.0), + V(0.0), + V(-1.0), + V(-2.0), + }}}}, + 6)); + } + + // Identity + static constexpr QuantEncodingInternal IDENTITY() { + return QuantEncodingInternal::Identity({{{{ + V(280.0), + V(3160.0), + V(3160.0), + }}, + {{ + V(60.0), + V(864.0), + V(864.0), + }}, + {{ + V(18.0), + V(200.0), + V(200.0), + }}}}); + } + + // DCT2 + static constexpr QuantEncodingInternal DCT2X2() { + return QuantEncodingInternal::DCT2({{{{ + V(3840.0), + V(2560.0), + V(1280.0), + V(640.0), + V(480.0), + V(300.0), + }}, + {{ + V(960.0), + V(640.0), + V(320.0), + V(180.0), + V(140.0), + V(120.0), + }}, + {{ + V(640.0), + V(320.0), + V(128.0), + V(64.0), + V(32.0), + V(16.0), + }}}}); + } + + // DCT4 (quant_kind 3) + static constexpr QuantEncodingInternal DCT4X4() { + return QuantEncodingInternal::DCT4(DctQuantWeightParams({{{{ + V(2200.0), + V(0.0), + V(0.0), + V(0.0), + }}, + {{ + V(392.0), + V(0.0), + V(0.0), + V(0.0), + }}, + {{ + V(112.0), + V(-0.25), + V(-0.25), + V(-0.5), + }}}}, + 4), + /* kMul */ + {{{{ + V(1.0), + V(1.0), + }}, + {{ + V(1.0), + V(1.0), + }}, + {{ + V(1.0), + V(1.0), + }}}}); + } + + // DCT16 + static constexpr QuantEncodingInternal DCT16X16() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(8996.8725711814115328), + V(-1.3000777393353804), + V(-0.49424529824571225), + V(-0.439093774457103443), + V(-0.6350101832695744), + V(-0.90177264050827612), + V(-1.6162099239887414), + }}, + {{ + V(3191.48366296844234752), + V(-0.67424582104194355), + V(-0.80745813428471001), + V(-0.44925837484843441), + V(-0.35865440981033403), + V(-0.31322389111877305), + V(-0.37615025315725483), + }}, + {{ + V(1157.50408145487200256), + V(-2.0531423165804414), + V(-1.4), + V(-0.50687130033378396), + V(-0.42708730624733904), + V(-1.4856834539296244), + V(-4.9209142884401604), + }}}}, + 7)); + } + + // DCT32 + static constexpr QuantEncodingInternal DCT32X32() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(15718.40830982518931456), + V(-1.025), + V(-0.98), + V(-0.9012), + V(-0.4), + V(-0.48819395464), + V(-0.421064), + V(-0.27), + }}, + {{ + V(7305.7636810695983104), + V(-0.8041958212306401), + V(-0.7633036457487539), + V(-0.55660379990111464), + V(-0.49785304658857626), + V(-0.43699592683512467), + V(-0.40180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(3803.53173721215041536), + V(-3.060733579805728), + V(-2.0413270132490346), + V(-2.0235650159727417), + V(-0.5495389509954993), + V(-0.4), + V(-0.4), + V(-0.3), + }}}}, + 8)); + } + + // DCT16X8 + static constexpr QuantEncodingInternal DCT8X16() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(7240.7734393502), + V(-0.7), + V(-0.7), + V(-0.2), + V(-0.2), + V(-0.2), + V(-0.5), + }}, + {{ + V(1448.15468787004), + V(-0.5), + V(-0.5), + V(-0.5), + V(-0.2), + V(-0.2), + V(-0.2), + }}, + {{ + V(506.854140754517), + V(-1.4), + V(-0.2), + V(-0.5), + V(-0.5), + V(-1.5), + V(-3.6), + }}}}, + 7)); + } + + // DCT32X8 + static constexpr QuantEncodingInternal DCT8X32() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(16283.2494710648897), + V(-1.7812845336559429), + V(-1.6309059012653515), + V(-1.0382179034313539), + V(-0.85), + V(-0.7), + V(-0.9), + V(-1.2360638576849587), + }}, + {{ + V(5089.15750884921511936), + V(-0.320049391452786891), + V(-0.35362849922161446), + V(-0.30340000000000003), + V(-0.61), + V(-0.5), + V(-0.5), + V(-0.6), + }}, + {{ + V(3397.77603275308720128), + V(-0.321327362693153371), + V(-0.34507619223117997), + V(-0.70340000000000003), + V(-0.9), + V(-1.0), + V(-1.0), + V(-1.1754605576265209), + }}}}, + 8)); + } + + // DCT32X16 + static constexpr QuantEncodingInternal DCT16X32() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(13844.97076442300573), + V(-0.97113799999999995), + V(-0.658), + V(-0.42026), + V(-0.22712), + V(-0.2206), + V(-0.226), + V(-0.6), + }}, + {{ + V(4798.964084220744293), + V(-0.61125308982767057), + V(-0.83770786552491361), + V(-0.79014862079498627), + V(-0.2692727459704829), + V(-0.38272769465388551), + V(-0.22924222653091453), + V(-0.20719098826199578), + }}, + {{ + V(1807.236946760964614), + V(-1.2), + V(-1.2), + V(-0.7), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } + + // DCT4X8 and 8x4 + static constexpr QuantEncodingInternal DCT4X8() { + return QuantEncodingInternal::DCT4X8( + DctQuantWeightParams({{ + {{ + V(2198.050556016380522), + V(-0.96269623020744692), + V(-0.76194253026666783), + V(-0.6551140670773547), + }}, + {{ + V(764.3655248643528689), + V(-0.92630200888366945), + V(-0.9675229603596517), + V(-0.27845290869168118), + }}, + {{ + V(527.107573587542228), + V(-1.4594385811273854), + V(-1.450082094097871593), + V(-1.5843722511996204), + }}, + }}, + 4), + /* kMuls */ + {{ + V(1.0), + V(1.0), + V(1.0), + }}); + } + // AFV + static QuantEncodingInternal AFV0() { + return QuantEncodingInternal::AFV(DCT4X8().dct_params, DCT4X4().dct_params, + {{{{ + // 4x4/4x8 DC tendency. + V(3072.0), + V(3072.0), + // AFV corner. + V(256.0), + V(256.0), + V(256.0), + // AFV high freqs. + V(414.0), + V(0.0), + V(0.0), + V(0.0), + }}, + {{ + // 4x4/4x8 DC tendency. + V(1024.0), + V(1024.0), + // AFV corner. + V(50), + V(50), + V(50), + // AFV high freqs. + V(58.0), + V(0.0), + V(0.0), + V(0.0), + }}, + {{ + // 4x4/4x8 DC tendency. + V(384.0), + V(384.0), + // AFV corner. + V(12.0), + V(12.0), + V(12.0), + // AFV high freqs. + V(22.0), + V(-0.25), + V(-0.25), + V(-0.25), + }}}}); + } + + // DCT64 + static QuantEncodingInternal DCT64X64() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(0.9 * 26629.073922049845), + V(-1.025), + V(-0.78), + V(-0.65012), + V(-0.19041574084286472), + V(-0.20819395464), + V(-0.421064), + V(-0.32733845535848671), + }}, + {{ + V(0.9 * 9311.3238710010046), + V(-0.3041958212306401), + V(-0.3633036457487539), + V(-0.35660379990111464), + V(-0.3443074455424403), + V(-0.33699592683512467), + V(-0.30180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(0.9 * 4992.2486445538634), + V(-1.2), + V(-1.2), + V(-0.8), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } + + // DCT64X32 + static QuantEncodingInternal DCT32X64() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(0.65 * 23629.073922049845), + V(-1.025), + V(-0.78), + V(-0.65012), + V(-0.19041574084286472), + V(-0.20819395464), + V(-0.421064), + V(-0.32733845535848671), + }}, + {{ + V(0.65 * 8611.3238710010046), + V(-0.3041958212306401), + V(-0.3633036457487539), + V(-0.35660379990111464), + V(-0.3443074455424403), + V(-0.33699592683512467), + V(-0.30180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(0.65 * 4492.2486445538634), + V(-1.2), + V(-1.2), + V(-0.8), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } + // DCT128X128 + static QuantEncodingInternal DCT128X128() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(1.8 * 26629.073922049845), + V(-1.025), + V(-0.78), + V(-0.65012), + V(-0.19041574084286472), + V(-0.20819395464), + V(-0.421064), + V(-0.32733845535848671), + }}, + {{ + V(1.8 * 9311.3238710010046), + V(-0.3041958212306401), + V(-0.3633036457487539), + V(-0.35660379990111464), + V(-0.3443074455424403), + V(-0.33699592683512467), + V(-0.30180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(1.8 * 4992.2486445538634), + V(-1.2), + V(-1.2), + V(-0.8), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } + + // DCT128X64 + static QuantEncodingInternal DCT64X128() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(1.3 * 23629.073922049845), + V(-1.025), + V(-0.78), + V(-0.65012), + V(-0.19041574084286472), + V(-0.20819395464), + V(-0.421064), + V(-0.32733845535848671), + }}, + {{ + V(1.3 * 8611.3238710010046), + V(-0.3041958212306401), + V(-0.3633036457487539), + V(-0.35660379990111464), + V(-0.3443074455424403), + V(-0.33699592683512467), + V(-0.30180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(1.3 * 4492.2486445538634), + V(-1.2), + V(-1.2), + V(-0.8), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } + // DCT256X256 + static QuantEncodingInternal DCT256X256() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(3.6 * 26629.073922049845), + V(-1.025), + V(-0.78), + V(-0.65012), + V(-0.19041574084286472), + V(-0.20819395464), + V(-0.421064), + V(-0.32733845535848671), + }}, + {{ + V(3.6 * 9311.3238710010046), + V(-0.3041958212306401), + V(-0.3633036457487539), + V(-0.35660379990111464), + V(-0.3443074455424403), + V(-0.33699592683512467), + V(-0.30180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(3.6 * 4992.2486445538634), + V(-1.2), + V(-1.2), + V(-0.8), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } + + // DCT256X128 + static QuantEncodingInternal DCT128X256() { + return QuantEncodingInternal::DCT( + DctQuantWeightParams({{{{ + V(2.6 * 23629.073922049845), + V(-1.025), + V(-0.78), + V(-0.65012), + V(-0.19041574084286472), + V(-0.20819395464), + V(-0.421064), + V(-0.32733845535848671), + }}, + {{ + V(2.6 * 8611.3238710010046), + V(-0.3041958212306401), + V(-0.3633036457487539), + V(-0.35660379990111464), + V(-0.3443074455424403), + V(-0.33699592683512467), + V(-0.30180866526242109), + V(-0.27321683125358037), + }}, + {{ + V(2.6 * 4492.2486445538634), + V(-1.2), + V(-1.2), + V(-0.8), + V(-0.7), + V(-0.7), + V(-0.4), + V(-0.5), + }}}}, + 8)); + } +}; +} // namespace + +DequantMatrices::DequantLibraryInternal DequantMatrices::LibraryInit() { + static_assert(kNum == 17, + "Update this function when adding new quantization kinds."); + static_assert(kNumPredefinedTables == 1, + "Update this function when adding new quantization matrices to " + "the library."); + + // The library and the indices need to be kept in sync manually. + static_assert(0 == DCT, "Update the DequantLibrary array below."); + static_assert(1 == IDENTITY, "Update the DequantLibrary array below."); + static_assert(2 == DCT2X2, "Update the DequantLibrary array below."); + static_assert(3 == DCT4X4, "Update the DequantLibrary array below."); + static_assert(4 == DCT16X16, "Update the DequantLibrary array below."); + static_assert(5 == DCT32X32, "Update the DequantLibrary array below."); + static_assert(6 == DCT8X16, "Update the DequantLibrary array below."); + static_assert(7 == DCT8X32, "Update the DequantLibrary array below."); + static_assert(8 == DCT16X32, "Update the DequantLibrary array below."); + static_assert(9 == DCT4X8, "Update the DequantLibrary array below."); + static_assert(10 == AFV0, "Update the DequantLibrary array below."); + static_assert(11 == DCT64X64, "Update the DequantLibrary array below."); + static_assert(12 == DCT32X64, "Update the DequantLibrary array below."); + static_assert(13 == DCT128X128, "Update the DequantLibrary array below."); + static_assert(14 == DCT64X128, "Update the DequantLibrary array below."); + static_assert(15 == DCT256X256, "Update the DequantLibrary array below."); + static_assert(16 == DCT128X256, "Update the DequantLibrary array below."); + return DequantMatrices::DequantLibraryInternal{{ + DequantMatricesLibraryDef::DCT(), + DequantMatricesLibraryDef::IDENTITY(), + DequantMatricesLibraryDef::DCT2X2(), + DequantMatricesLibraryDef::DCT4X4(), + DequantMatricesLibraryDef::DCT16X16(), + DequantMatricesLibraryDef::DCT32X32(), + DequantMatricesLibraryDef::DCT8X16(), + DequantMatricesLibraryDef::DCT8X32(), + DequantMatricesLibraryDef::DCT16X32(), + DequantMatricesLibraryDef::DCT4X8(), + DequantMatricesLibraryDef::AFV0(), + DequantMatricesLibraryDef::DCT64X64(), + DequantMatricesLibraryDef::DCT32X64(), + // Same default for large transforms (128+) as for 64x* transforms. + DequantMatricesLibraryDef::DCT128X128(), + DequantMatricesLibraryDef::DCT64X128(), + DequantMatricesLibraryDef::DCT256X256(), + DequantMatricesLibraryDef::DCT128X256(), + }}; +} + +const QuantEncoding* DequantMatrices::Library() { + static const DequantMatrices::DequantLibraryInternal kDequantLibrary = + DequantMatrices::LibraryInit(); + // Downcast the result to a const QuantEncoding* from QuantEncodingInternal* + // since the subclass (QuantEncoding) doesn't add any new members and users + // will need to upcast to QuantEncodingInternal to access the members of that + // class. This allows to have kDequantLibrary as a constexpr value while still + // allowing to create QuantEncoding::RAW() instances that use std::vector in + // C++11. + return reinterpret_cast(kDequantLibrary.data()); +} + +DequantMatrices::DequantMatrices() { + encodings_.resize(size_t(QuantTable::kNum), QuantEncoding::Library(0)); + size_t pos = 0; + size_t offsets[kNum * 3]; + for (size_t i = 0; i < size_t(QuantTable::kNum); i++) { + size_t num = required_size_[i] * kDCTBlockSize; + for (size_t c = 0; c < 3; c++) { + offsets[3 * i + c] = pos + c * num; + } + pos += 3 * num; + } + for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { + for (size_t c = 0; c < 3; c++) { + table_offsets_[i * 3 + c] = offsets[kQuantTable[i] * 3 + c]; + } + } +} + +Status DequantMatrices::EnsureComputed(uint32_t acs_mask) { + const QuantEncoding* library = Library(); + + if (!table_storage_) { + table_storage_ = hwy::AllocateAligned(2 * kTotalTableSize); + table_ = table_storage_.get(); + inv_table_ = table_storage_.get() + kTotalTableSize; + } + + size_t offsets[kNum * 3 + 1]; + size_t pos = 0; + for (size_t i = 0; i < kNum; i++) { + size_t num = required_size_[i] * kDCTBlockSize; + for (size_t c = 0; c < 3; c++) { + offsets[3 * i + c] = pos + c * num; + } + pos += 3 * num; + } + offsets[kNum * 3] = pos; + JXL_ASSERT(pos == kTotalTableSize); + + uint32_t kind_mask = 0; + for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { + if (acs_mask & (1u << i)) { + kind_mask |= 1u << kQuantTable[i]; + } + } + uint32_t computed_kind_mask = 0; + for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { + if (computed_mask_ & (1u << i)) { + computed_kind_mask |= 1u << kQuantTable[i]; + } + } + for (size_t table = 0; table < kNum; table++) { + if ((1 << table) & computed_kind_mask) continue; + if ((1 << table) & ~kind_mask) continue; + size_t pos = offsets[table * 3]; + if (encodings_[table].mode == QuantEncoding::kQuantModeLibrary) { + JXL_CHECK(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)( + library[table], table_storage_.get(), + table_storage_.get() + kTotalTableSize, table, QuantTable(table), + &pos)); + } else { + JXL_RETURN_IF_ERROR(HWY_DYNAMIC_DISPATCH(ComputeQuantTable)( + encodings_[table], table_storage_.get(), + table_storage_.get() + kTotalTableSize, table, QuantTable(table), + &pos)); + } + JXL_ASSERT(pos == offsets[table * 3 + 3]); + } + computed_mask_ |= acs_mask; + + return true; +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/quant_weights.h b/third-party/libjxl/libjxl/lib/jxl/quant_weights.h new file mode 100644 index 0000000000..d76fc1d1e6 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quant_weights.h @@ -0,0 +1,448 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_QUANT_WEIGHTS_H_ +#define LIB_JXL_QUANT_WEIGHTS_H_ + +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/cache_aligned.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/image.h" + +namespace jxl { + +template +constexpr T ArraySum(T (&a)[N], size_t i = N - 1) { + static_assert(N > 0, "Trying to compute the sum of an empty array"); + return i == 0 ? a[0] : a[i] + ArraySum(a, i - 1); +} + +static constexpr size_t kMaxQuantTableSize = AcStrategy::kMaxCoeffArea; +static constexpr size_t kNumPredefinedTables = 1; +static constexpr size_t kCeilLog2NumPredefinedTables = 0; +static constexpr size_t kLog2NumQuantModes = 3; + +struct DctQuantWeightParams { + static constexpr size_t kLog2MaxDistanceBands = 4; + static constexpr size_t kMaxDistanceBands = 1 + (1 << kLog2MaxDistanceBands); + typedef std::array, 3> + DistanceBandsArray; + + size_t num_distance_bands = 0; + DistanceBandsArray distance_bands = {}; + + constexpr DctQuantWeightParams() : num_distance_bands(0) {} + + constexpr DctQuantWeightParams(const DistanceBandsArray& dist_bands, + size_t num_dist_bands) + : num_distance_bands(num_dist_bands), distance_bands(dist_bands) {} + + template + explicit DctQuantWeightParams(const float dist_bands[3][num_dist_bands]) { + num_distance_bands = num_dist_bands; + for (size_t c = 0; c < 3; c++) { + memcpy(distance_bands[c].data(), dist_bands[c], + sizeof(float) * num_dist_bands); + } + } +}; + +// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) +struct QuantEncodingInternal { + enum Mode { + kQuantModeLibrary, + kQuantModeID, + kQuantModeDCT2, + kQuantModeDCT4, + kQuantModeDCT4X8, + kQuantModeAFV, + kQuantModeDCT, + kQuantModeRAW, + }; + + template + struct Tag {}; + + typedef std::array, 3> IdWeights; + typedef std::array, 3> DCT2Weights; + typedef std::array, 3> DCT4Multipliers; + typedef std::array, 3> AFVWeights; + typedef std::array DCT4x8Multipliers; + + static constexpr QuantEncodingInternal Library(uint8_t predefined) { + return ((predefined < kNumPredefinedTables) || + JXL_ABORT("Assert predefined < kNumPredefinedTables")), + QuantEncodingInternal(Tag(), predefined); + } + constexpr QuantEncodingInternal(Tag /* tag */, + uint8_t predefined) + : mode(kQuantModeLibrary), predefined(predefined) {} + + // Identity + // xybweights is an array of {xweights, yweights, bweights}. + static constexpr QuantEncodingInternal Identity(const IdWeights& xybweights) { + return QuantEncodingInternal(Tag(), xybweights); + } + constexpr QuantEncodingInternal(Tag /* tag */, + const IdWeights& xybweights) + : mode(kQuantModeID), idweights(xybweights) {} + + // DCT2 + static constexpr QuantEncodingInternal DCT2(const DCT2Weights& xybweights) { + return QuantEncodingInternal(Tag(), xybweights); + } + constexpr QuantEncodingInternal(Tag /* tag */, + const DCT2Weights& xybweights) + : mode(kQuantModeDCT2), dct2weights(xybweights) {} + + // DCT4 + static constexpr QuantEncodingInternal DCT4( + const DctQuantWeightParams& params, const DCT4Multipliers& xybmul) { + return QuantEncodingInternal(Tag(), params, xybmul); + } + constexpr QuantEncodingInternal(Tag /* tag */, + const DctQuantWeightParams& params, + const DCT4Multipliers& xybmul) + : mode(kQuantModeDCT4), dct_params(params), dct4multipliers(xybmul) {} + + // DCT4x8 + static constexpr QuantEncodingInternal DCT4X8( + const DctQuantWeightParams& params, const DCT4x8Multipliers& xybmul) { + return QuantEncodingInternal(Tag(), params, xybmul); + } + constexpr QuantEncodingInternal(Tag /* tag */, + const DctQuantWeightParams& params, + const DCT4x8Multipliers& xybmul) + : mode(kQuantModeDCT4X8), dct_params(params), dct4x8multipliers(xybmul) {} + + // DCT + static constexpr QuantEncodingInternal DCT( + const DctQuantWeightParams& params) { + return QuantEncodingInternal(Tag(), params); + } + constexpr QuantEncodingInternal(Tag /* tag */, + const DctQuantWeightParams& params) + : mode(kQuantModeDCT), dct_params(params) {} + + // AFV + static constexpr QuantEncodingInternal AFV( + const DctQuantWeightParams& params4x8, + const DctQuantWeightParams& params4x4, const AFVWeights& weights) { + return QuantEncodingInternal(Tag(), params4x8, params4x4, + weights); + } + constexpr QuantEncodingInternal(Tag /* tag */, + const DctQuantWeightParams& params4x8, + const DctQuantWeightParams& params4x4, + const AFVWeights& weights) + : mode(kQuantModeAFV), + dct_params(params4x8), + afv_weights(weights), + dct_params_afv_4x4(params4x4) {} + + // This constructor is not constexpr so it can't be used in any of the + // constexpr cases above. + explicit QuantEncodingInternal(Mode mode) : mode(mode) {} + + Mode mode; + + // Weights for DCT4+ tables. + DctQuantWeightParams dct_params; + + union { + // Weights for identity. + IdWeights idweights; + + // Weights for DCT2. + DCT2Weights dct2weights; + + // Extra multipliers for coefficients 01/10 and 11 for DCT4 and AFV. + DCT4Multipliers dct4multipliers; + + // Weights for AFV. {0, 1} are used directly for coefficients (0, 1) and (1, + // 0); {2, 3, 4} are used directly corner DC, (1,0) - (0,1) and (0, 1) + + // (1, 0) - (0, 0) inside the AFV block. Values from 5 to 8 are interpolated + // as in GetQuantWeights for DC and are used for other coefficients. + AFVWeights afv_weights = {}; + + // Extra multipliers for coefficients 01 or 10 for DCT4X8 and DCT8X4. + DCT4x8Multipliers dct4x8multipliers; + + // Only used in kQuantModeRAW mode. + struct { + // explicit quantization table (like in JPEG) + std::vector* qtable = nullptr; + float qtable_den = 1.f / (8 * 255); + } qraw; + }; + + // Weights for 4x4 sub-block in AFV. + DctQuantWeightParams dct_params_afv_4x4; + + union { + // Which predefined table to use. Only used if mode is kQuantModeLibrary. + uint8_t predefined = 0; + + // Which other quant table to copy; must copy from a table that comes before + // the current one. Only used if mode is kQuantModeCopy. + uint8_t source; + }; +}; + +class QuantEncoding final : public QuantEncodingInternal { + public: + QuantEncoding(const QuantEncoding& other) + : QuantEncodingInternal( + static_cast(other)) { + if (mode == kQuantModeRAW && qraw.qtable) { + // Need to make a copy of the passed *qtable. + qraw.qtable = new std::vector(*other.qraw.qtable); + } + } + QuantEncoding(QuantEncoding&& other) noexcept + : QuantEncodingInternal( + static_cast(other)) { + // Steal the qtable from the other object if any. + if (mode == kQuantModeRAW) { + other.qraw.qtable = nullptr; + } + } + QuantEncoding& operator=(const QuantEncoding& other) { + if (mode == kQuantModeRAW && qraw.qtable) { + delete qraw.qtable; + } + *static_cast(this) = + QuantEncodingInternal(static_cast(other)); + if (mode == kQuantModeRAW && qraw.qtable) { + // Need to make a copy of the passed *qtable. + qraw.qtable = new std::vector(*other.qraw.qtable); + } + return *this; + } + + ~QuantEncoding() { + if (mode == kQuantModeRAW && qraw.qtable) { + delete qraw.qtable; + } + } + + // Wrappers of the QuantEncodingInternal:: static functions that return a + // QuantEncoding instead. This is using the explicit and private cast from + // QuantEncodingInternal to QuantEncoding, which would be inlined anyway. + // In general, you should use this wrappers. The only reason to directly + // create a QuantEncodingInternal instance is if you need a constexpr version + // of this class. Note that RAW() is not supported in that case since it uses + // a std::vector. + static QuantEncoding Library(uint8_t predefined_arg) { + return QuantEncoding(QuantEncodingInternal::Library(predefined_arg)); + } + static QuantEncoding Identity(const IdWeights& xybweights) { + return QuantEncoding(QuantEncodingInternal::Identity(xybweights)); + } + static QuantEncoding DCT2(const DCT2Weights& xybweights) { + return QuantEncoding(QuantEncodingInternal::DCT2(xybweights)); + } + static QuantEncoding DCT4(const DctQuantWeightParams& params, + const DCT4Multipliers& xybmul) { + return QuantEncoding(QuantEncodingInternal::DCT4(params, xybmul)); + } + static QuantEncoding DCT4X8(const DctQuantWeightParams& params, + const DCT4x8Multipliers& xybmul) { + return QuantEncoding(QuantEncodingInternal::DCT4X8(params, xybmul)); + } + static QuantEncoding DCT(const DctQuantWeightParams& params) { + return QuantEncoding(QuantEncodingInternal::DCT(params)); + } + static QuantEncoding AFV(const DctQuantWeightParams& params4x8, + const DctQuantWeightParams& params4x4, + const AFVWeights& weights) { + return QuantEncoding( + QuantEncodingInternal::AFV(params4x8, params4x4, weights)); + } + + // RAW, note that this one is not a constexpr one. + static QuantEncoding RAW(const std::vector& qtable, int shift = 0) { + QuantEncoding encoding(kQuantModeRAW); + encoding.qraw.qtable = new std::vector(); + *encoding.qraw.qtable = qtable; + encoding.qraw.qtable_den = (1 << shift) * (1.f / (8 * 255)); + return encoding; + } + + private: + explicit QuantEncoding(const QuantEncodingInternal& other) + : QuantEncodingInternal(other) {} + + explicit QuantEncoding(QuantEncodingInternal::Mode mode_arg) + : QuantEncodingInternal(mode_arg) {} +}; + +// A constexpr QuantEncodingInternal instance is often downcasted to the +// QuantEncoding subclass even if the instance wasn't an instance of the +// subclass. This is safe because user will upcast to QuantEncodingInternal to +// access any of its members. +static_assert(sizeof(QuantEncoding) == sizeof(QuantEncodingInternal), + "Don't add any members to QuantEncoding"); + +// Let's try to keep these 2**N for possible future simplicity. +const float kInvDCQuant[3] = { + 4096.0f, + 512.0f, + 256.0f, +}; + +const float kDCQuant[3] = { + 1.0f / kInvDCQuant[0], + 1.0f / kInvDCQuant[1], + 1.0f / kInvDCQuant[2], +}; + +class ModularFrameEncoder; +class ModularFrameDecoder; + +class DequantMatrices { + public: + enum QuantTable : size_t { + DCT = 0, + IDENTITY, + DCT2X2, + DCT4X4, + DCT16X16, + DCT32X32, + // DCT16X8 + DCT8X16, + // DCT32X8 + DCT8X32, + // DCT32X16 + DCT16X32, + DCT4X8, + // DCT8X4 + AFV0, + // AFV1 + // AFV2 + // AFV3 + DCT64X64, + // DCT64X32, + DCT32X64, + DCT128X128, + // DCT128X64, + DCT64X128, + DCT256X256, + // DCT256X128, + DCT128X256, + kNum + }; + + static constexpr QuantTable kQuantTable[] = { + QuantTable::DCT, QuantTable::IDENTITY, QuantTable::DCT2X2, + QuantTable::DCT4X4, QuantTable::DCT16X16, QuantTable::DCT32X32, + QuantTable::DCT8X16, QuantTable::DCT8X16, QuantTable::DCT8X32, + QuantTable::DCT8X32, QuantTable::DCT16X32, QuantTable::DCT16X32, + QuantTable::DCT4X8, QuantTable::DCT4X8, QuantTable::AFV0, + QuantTable::AFV0, QuantTable::AFV0, QuantTable::AFV0, + QuantTable::DCT64X64, QuantTable::DCT32X64, QuantTable::DCT32X64, + QuantTable::DCT128X128, QuantTable::DCT64X128, QuantTable::DCT64X128, + QuantTable::DCT256X256, QuantTable::DCT128X256, QuantTable::DCT128X256, + }; + static_assert(AcStrategy::kNumValidStrategies == + sizeof(kQuantTable) / sizeof *kQuantTable, + "Update this array when adding or removing AC strategies."); + + DequantMatrices(); + + static const QuantEncoding* Library(); + + typedef std::array + DequantLibraryInternal; + // Return the array of library kNumPredefinedTables QuantEncoding entries as + // a constexpr array. Use Library() to obtain a pointer to the copy in the + // .cc file. + static DequantLibraryInternal LibraryInit(); + + // Returns aligned memory. + JXL_INLINE const float* Matrix(size_t quant_kind, size_t c) const { + JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies); + JXL_DASSERT((1 << quant_kind) & computed_mask_); + return &table_[table_offsets_[quant_kind * 3 + c]]; + } + + JXL_INLINE const float* InvMatrix(size_t quant_kind, size_t c) const { + JXL_DASSERT(quant_kind < AcStrategy::kNumValidStrategies); + JXL_DASSERT((1 << quant_kind) & computed_mask_); + return &inv_table_[table_offsets_[quant_kind * 3 + c]]; + } + + // DC quants are used in modular mode for XYB multipliers. + JXL_INLINE float DCQuant(size_t c) const { return dc_quant_[c]; } + JXL_INLINE const float* DCQuants() const { return dc_quant_; } + + JXL_INLINE float InvDCQuant(size_t c) const { return inv_dc_quant_[c]; } + + // For encoder. + void SetEncodings(const std::vector& encodings) { + encodings_ = encodings; + computed_mask_ = 0; + } + + // For encoder. + void SetDCQuant(const float dc[3]) { + for (size_t c = 0; c < 3; c++) { + dc_quant_[c] = 1.0f / dc[c]; + inv_dc_quant_[c] = dc[c]; + } + } + + Status Decode(BitReader* br, + ModularFrameDecoder* modular_frame_decoder = nullptr); + Status DecodeDC(BitReader* br); + + const std::vector& encodings() const { return encodings_; } + + static constexpr size_t required_size_x[] = {1, 1, 1, 1, 2, 4, 1, 1, 2, + 1, 1, 8, 4, 16, 8, 32, 16}; + static_assert(kNum == sizeof(required_size_x) / sizeof(*required_size_x), + "Update this array when adding or removing quant tables."); + + static constexpr size_t required_size_y[] = {1, 1, 1, 1, 2, 4, 2, 4, 4, + 1, 1, 8, 8, 16, 16, 32, 32}; + static_assert(kNum == sizeof(required_size_y) / sizeof(*required_size_y), + "Update this array when adding or removing quant tables."); + + Status EnsureComputed(uint32_t acs_mask); + + private: + static constexpr size_t required_size_[] = { + 1, 1, 1, 1, 4, 16, 2, 4, 8, 1, 1, 64, 32, 256, 128, 1024, 512}; + static_assert(kNum == sizeof(required_size_) / sizeof(*required_size_), + "Update this array when adding or removing quant tables."); + static constexpr size_t kTotalTableSize = + ArraySum(required_size_) * kDCTBlockSize * 3; + + uint32_t computed_mask_ = 0; + // kTotalTableSize entries followed by kTotalTableSize for inv_table + hwy::AlignedFreeUniquePtr table_storage_; + const float* table_; + const float* inv_table_; + float dc_quant_[3] = {kDCQuant[0], kDCQuant[1], kDCQuant[2]}; + float inv_dc_quant_[3] = {kInvDCQuant[0], kInvDCQuant[1], kInvDCQuant[2]}; + size_t table_offsets_[AcStrategy::kNumValidStrategies * 3]; + std::vector encodings_; +}; + +} // namespace jxl + +#endif // LIB_JXL_QUANT_WEIGHTS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/quant_weights_test.cc b/third-party/libjxl/libjxl/lib/jxl/quant_weights_test.cc new file mode 100644 index 0000000000..f0497948a7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quant_weights_test.cc @@ -0,0 +1,240 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +#include "lib/jxl/quant_weights.h" + +#include + +#include +#include +#include // HWY_ALIGN_MAX +#include +#include + +#include "lib/jxl/base/random.h" +#include "lib/jxl/dct_for_test.h" +#include "lib/jxl/dec_transforms_testonly.h" +#include "lib/jxl/enc_modular.h" +#include "lib/jxl/enc_quant_weights.h" +#include "lib/jxl/enc_transforms.h" + +namespace jxl { +namespace { + +template +void CheckSimilar(T a, T b) { + EXPECT_EQ(a, b); +} +// minimum exponent = -15. +template <> +void CheckSimilar(float a, float b) { + float m = std::max(std::abs(a), std::abs(b)); + // 10 bits of precision are used in the format. Relative error should be + // below 2^-10. + EXPECT_LE(std::abs(a - b), m / 1024.0f) << "a: " << a << " b: " << b; +} + +TEST(QuantWeightsTest, DC) { + DequantMatrices mat; + float dc_quant[3] = {1e+5, 1e+3, 1e+1}; + DequantMatricesSetCustomDC(&mat, dc_quant); + for (size_t c = 0; c < 3; c++) { + CheckSimilar(mat.InvDCQuant(c), dc_quant[c]); + } +} + +void RoundtripMatrices(const std::vector& encodings) { + ASSERT_TRUE(encodings.size() == DequantMatrices::kNum); + DequantMatrices mat; + CodecMetadata metadata; + FrameHeader frame_header(&metadata); + ModularFrameEncoder encoder(frame_header, CompressParams{}); + DequantMatricesSetCustom(&mat, encodings, &encoder); + const std::vector& encodings_dec = mat.encodings(); + for (size_t i = 0; i < encodings.size(); i++) { + const QuantEncoding& e = encodings[i]; + const QuantEncoding& d = encodings_dec[i]; + // Check values roundtripped correctly. + EXPECT_EQ(e.mode, d.mode); + EXPECT_EQ(e.predefined, d.predefined); + EXPECT_EQ(e.source, d.source); + + EXPECT_EQ(static_cast(e.dct_params.num_distance_bands), + static_cast(d.dct_params.num_distance_bands)); + for (size_t c = 0; c < 3; c++) { + for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) { + CheckSimilar(e.dct_params.distance_bands[c][j], + d.dct_params.distance_bands[c][j]); + } + } + + if (e.mode == QuantEncoding::kQuantModeRAW) { + EXPECT_FALSE(!e.qraw.qtable); + EXPECT_FALSE(!d.qraw.qtable); + EXPECT_EQ(e.qraw.qtable->size(), d.qraw.qtable->size()); + for (size_t j = 0; j < e.qraw.qtable->size(); j++) { + EXPECT_EQ((*e.qraw.qtable)[j], (*d.qraw.qtable)[j]); + } + EXPECT_NEAR(e.qraw.qtable_den, d.qraw.qtable_den, 1e-7f); + } else { + // modes different than kQuantModeRAW use one of the other fields used + // here, which all happen to be arrays of floats. + for (size_t c = 0; c < 3; c++) { + for (size_t j = 0; j < 3; j++) { + CheckSimilar(e.idweights[c][j], d.idweights[c][j]); + } + for (size_t j = 0; j < 6; j++) { + CheckSimilar(e.dct2weights[c][j], d.dct2weights[c][j]); + } + for (size_t j = 0; j < 2; j++) { + CheckSimilar(e.dct4multipliers[c][j], d.dct4multipliers[c][j]); + } + CheckSimilar(e.dct4x8multipliers[c], d.dct4x8multipliers[c]); + for (size_t j = 0; j < 9; j++) { + CheckSimilar(e.afv_weights[c][j], d.afv_weights[c][j]); + } + for (size_t j = 0; j < DctQuantWeightParams::kMaxDistanceBands; j++) { + CheckSimilar(e.dct_params_afv_4x4.distance_bands[c][j], + d.dct_params_afv_4x4.distance_bands[c][j]); + } + } + } + } +} + +TEST(QuantWeightsTest, AllDefault) { + std::vector encodings(DequantMatrices::kNum, + QuantEncoding::Library(0)); + RoundtripMatrices(encodings); +} + +void TestSingleQuantMatrix(DequantMatrices::QuantTable kind) { + std::vector encodings(DequantMatrices::kNum, + QuantEncoding::Library(0)); + encodings[kind] = DequantMatrices::Library()[kind]; + RoundtripMatrices(encodings); +} + +// Ensure we can reasonably represent default quant tables. +TEST(QuantWeightsTest, DCT) { TestSingleQuantMatrix(DequantMatrices::DCT); } +TEST(QuantWeightsTest, IDENTITY) { + TestSingleQuantMatrix(DequantMatrices::IDENTITY); +} +TEST(QuantWeightsTest, DCT2X2) { + TestSingleQuantMatrix(DequantMatrices::DCT2X2); +} +TEST(QuantWeightsTest, DCT4X4) { + TestSingleQuantMatrix(DequantMatrices::DCT4X4); +} +TEST(QuantWeightsTest, DCT16X16) { + TestSingleQuantMatrix(DequantMatrices::DCT16X16); +} +TEST(QuantWeightsTest, DCT32X32) { + TestSingleQuantMatrix(DequantMatrices::DCT32X32); +} +TEST(QuantWeightsTest, DCT8X16) { + TestSingleQuantMatrix(DequantMatrices::DCT8X16); +} +TEST(QuantWeightsTest, DCT8X32) { + TestSingleQuantMatrix(DequantMatrices::DCT8X32); +} +TEST(QuantWeightsTest, DCT16X32) { + TestSingleQuantMatrix(DequantMatrices::DCT16X32); +} +TEST(QuantWeightsTest, DCT4X8) { + TestSingleQuantMatrix(DequantMatrices::DCT4X8); +} +TEST(QuantWeightsTest, AFV0) { TestSingleQuantMatrix(DequantMatrices::AFV0); } +TEST(QuantWeightsTest, RAW) { + std::vector encodings(DequantMatrices::kNum, + QuantEncoding::Library(0)); + std::vector matrix(3 * 32 * 32); + Rng rng(0); + for (size_t i = 0; i < matrix.size(); i++) matrix[i] = rng.UniformI(1, 256); + encodings[DequantMatrices::kQuantTable[AcStrategy::DCT32X32]] = + QuantEncoding::RAW(matrix, 2); + RoundtripMatrices(encodings); +} + +class QuantWeightsTargetTest : public hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(QuantWeightsTargetTest); + +TEST_P(QuantWeightsTargetTest, DCTUniform) { + constexpr float kUniformQuant = 4; + float weights[3][2] = {{1.0f / kUniformQuant, 0}, + {1.0f / kUniformQuant, 0}, + {1.0f / kUniformQuant, 0}}; + DctQuantWeightParams dct_params(weights); + std::vector encodings(DequantMatrices::kNum, + QuantEncoding::DCT(dct_params)); + DequantMatrices dequant_matrices; + CodecMetadata metadata; + FrameHeader frame_header(&metadata); + ModularFrameEncoder encoder(frame_header, CompressParams{}); + DequantMatricesSetCustom(&dequant_matrices, encodings, &encoder); + JXL_CHECK(dequant_matrices.EnsureComputed(~0u)); + + const float dc_quant[3] = {1.0f / kUniformQuant, 1.0f / kUniformQuant, + 1.0f / kUniformQuant}; + DequantMatricesSetCustomDC(&dequant_matrices, dc_quant); + + HWY_ALIGN_MAX float scratch_space[16 * 16 * 2]; + + // DCT8 + { + HWY_ALIGN_MAX float pixels[64]; + std::iota(std::begin(pixels), std::end(pixels), 0); + HWY_ALIGN_MAX float coeffs[64]; + const AcStrategy::Type dct = AcStrategy::DCT; + TransformFromPixels(dct, pixels, 8, coeffs, scratch_space); + HWY_ALIGN_MAX double slow_coeffs[64]; + for (size_t i = 0; i < 64; i++) slow_coeffs[i] = pixels[i]; + DCTSlow<8>(slow_coeffs); + + for (size_t i = 0; i < 64; i++) { + // DCTSlow doesn't multiply/divide by 1/N, so we do it manually. + slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant; + coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) * + dequant_matrices.Matrix(dct, 0)[i]; + } + IDCTSlow<8>(slow_coeffs); + TransformToPixels(dct, coeffs, pixels, 8, scratch_space); + for (size_t i = 0; i < 64; i++) { + EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4); + } + } + + // DCT16 + { + HWY_ALIGN_MAX float pixels[64 * 4]; + std::iota(std::begin(pixels), std::end(pixels), 0); + HWY_ALIGN_MAX float coeffs[64 * 4]; + const AcStrategy::Type dct = AcStrategy::DCT16X16; + TransformFromPixels(dct, pixels, 16, coeffs, scratch_space); + HWY_ALIGN_MAX double slow_coeffs[64 * 4]; + for (size_t i = 0; i < 64 * 4; i++) slow_coeffs[i] = pixels[i]; + DCTSlow<16>(slow_coeffs); + + for (size_t i = 0; i < 64 * 4; i++) { + slow_coeffs[i] = roundf(slow_coeffs[i] / kUniformQuant) * kUniformQuant; + coeffs[i] = roundf(coeffs[i] / dequant_matrices.Matrix(dct, 0)[i]) * + dequant_matrices.Matrix(dct, 0)[i]; + } + + IDCTSlow<16>(slow_coeffs); + TransformToPixels(dct, coeffs, pixels, 16, scratch_space); + for (size_t i = 0; i < 64 * 4; i++) { + EXPECT_NEAR(pixels[i], slow_coeffs[i], 1e-4); + } + } + + // Check that all matrices have the same DC quantization, i.e. that they all + // have the same scaling. + for (size_t i = 0; i < AcStrategy::kNumValidStrategies; i++) { + EXPECT_NEAR(dequant_matrices.Matrix(i, 0)[0], kUniformQuant, 1e-6); + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer-inl.h b/third-party/libjxl/libjxl/lib/jxl/quantizer-inl.h new file mode 100644 index 0000000000..64d273c552 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quantizer-inl.h @@ -0,0 +1,74 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#if defined(LIB_JXL_QUANTIZER_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_QUANTIZER_INL_H_ +#undef LIB_JXL_QUANTIZER_INL_H_ +#else +#define LIB_JXL_QUANTIZER_INL_H_ +#endif + +#include + +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::AndNot; +using hwy::HWY_NAMESPACE::ApproximateReciprocal; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::IfThenElseZero; +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::Xor; + +template +HWY_INLINE HWY_MAYBE_UNUSED Vec> AdjustQuantBias( + DI di, const size_t c, const Vec quant_i, + const float* HWY_RESTRICT biases) { + const Rebind df; + + const auto quant = ConvertTo(df, quant_i); + + // Compare |quant|, keep sign bit for negating result. + const auto kSign = BitCast(df, Set(di, INT32_MIN)); + const auto sign = And(quant, kSign); // TODO(janwas): = abs ^ orig + const auto abs_quant = AndNot(kSign, quant); + + // If |x| is 1, kZeroBias creates a different bias for each channel. + // We're implementing the following: + // if (quant == 0) return 0; + // if (quant == 1) return biases[c]; + // if (quant == -1) return -biases[c]; + // return quant - biases[3] / quant; + + // Integer comparison is not helpful because Clang incurs bypass penalties + // from unnecessarily mixing integer and float. + const auto is_01 = Lt(abs_quant, Set(df, 1.125f)); + const auto not_0 = Gt(abs_quant, Zero(df)); + + // Bitwise logic is faster than quant * biases[c]. + const auto one_bias = IfThenElseZero(not_0, Xor(Set(df, biases[c]), sign)); + + // About 2E-5 worse than ReciprocalNR or division. + const auto bias = + NegMulAdd(Set(df, biases[3]), ApproximateReciprocal(quant), quant); + + return IfThenElse(is_01, one_bias, bias); +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_QUANTIZER_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer.cc b/third-party/libjxl/libjxl/lib/jxl/quantizer.cc new file mode 100644 index 0000000000..153cf19b21 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quantizer.cc @@ -0,0 +1,156 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/quantizer.h" + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/field_encodings.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/quant_weights.h" + +namespace jxl { + +static const int32_t kDefaultQuant = 64; + +constexpr int32_t Quantizer::kQuantMax; + +Quantizer::Quantizer(const DequantMatrices* dequant) + : Quantizer(dequant, kDefaultQuant, kGlobalScaleDenom / kDefaultQuant) {} + +Quantizer::Quantizer(const DequantMatrices* dequant, int quant_dc, + int global_scale) + : global_scale_(global_scale), quant_dc_(quant_dc), dequant_(dequant) { + JXL_ASSERT(dequant_ != nullptr); + RecomputeFromGlobalScale(); + inv_quant_dc_ = inv_global_scale_ / quant_dc_; + + memcpy(zero_bias_, kZeroBiasDefault, sizeof(kZeroBiasDefault)); +} + +void Quantizer::ComputeGlobalScaleAndQuant(float quant_dc, float quant_median, + float quant_median_absd) { + // Target value for the median value in the quant field. + const float kQuantFieldTarget = 5; + // We reduce the median of the quant field by the median absolute deviation: + // higher resolution on highly varying quant fields. + float scale = kGlobalScaleDenom * (quant_median - quant_median_absd) / + kQuantFieldTarget; + // Ensure that new_global_scale is positive and no more than 1<<15. + if (scale < 1) scale = 1; + if (scale > (1 << 15)) scale = 1 << 15; + int new_global_scale = static_cast(scale); + // Ensure that quant_dc_ will always be at least + // 0.625 * kGlobalScaleDenom/kGlobalScaleNumerator = 10. + const int scaled_quant_dc = + static_cast(quant_dc * kGlobalScaleNumerator * 1.6); + if (new_global_scale > scaled_quant_dc) { + new_global_scale = scaled_quant_dc; + if (new_global_scale <= 0) new_global_scale = 1; + } + global_scale_ = new_global_scale; + // Code below uses inv_global_scale_. + RecomputeFromGlobalScale(); + + float fval = quant_dc * inv_global_scale_ + 0.5f; + fval = std::min(1 << 16, fval); + const int new_quant_dc = static_cast(fval); + quant_dc_ = new_quant_dc; + + // quant_dc_ was updated, recompute values. + RecomputeFromGlobalScale(); +} + +void Quantizer::SetQuantFieldRect(const ImageF& qf, const Rect& rect, + ImageI* JXL_RESTRICT raw_quant_field) const { + for (size_t y = 0; y < rect.ysize(); ++y) { + const float* JXL_RESTRICT row_qf = rect.ConstRow(qf, y); + int32_t* JXL_RESTRICT row_qi = rect.Row(raw_quant_field, y); + for (size_t x = 0; x < rect.xsize(); ++x) { + int val = ClampVal(row_qf[x] * inv_global_scale_ + 0.5f); + row_qi[x] = val; + } + } +} + +void Quantizer::SetQuantField(const float quant_dc, const ImageF& qf, + ImageI* JXL_RESTRICT raw_quant_field) { + std::vector data(qf.xsize() * qf.ysize()); + for (size_t y = 0; y < qf.ysize(); ++y) { + const float* JXL_RESTRICT row_qf = qf.Row(y); + for (size_t x = 0; x < qf.xsize(); ++x) { + float quant = row_qf[x]; + data[qf.xsize() * y + x] = quant; + } + } + std::nth_element(data.begin(), data.begin() + data.size() / 2, data.end()); + const float quant_median = data[data.size() / 2]; + std::vector deviations(data.size()); + for (size_t i = 0; i < data.size(); i++) { + deviations[i] = fabsf(data[i] - quant_median); + } + std::nth_element(deviations.begin(), + deviations.begin() + deviations.size() / 2, + deviations.end()); + const float quant_median_absd = deviations[deviations.size() / 2]; + ComputeGlobalScaleAndQuant(quant_dc, quant_median, quant_median_absd); + if (raw_quant_field) { + JXL_CHECK(SameSize(*raw_quant_field, qf)); + SetQuantFieldRect(qf, Rect(qf), raw_quant_field); + } +} + +void Quantizer::SetQuant(float quant_dc, float quant_ac, + ImageI* JXL_RESTRICT raw_quant_field) { + ComputeGlobalScaleAndQuant(quant_dc, quant_ac, 0); + int32_t val = ClampVal(quant_ac * inv_global_scale_ + 0.5f); + FillImage(val, raw_quant_field); +} + +Status QuantizerParams::VisitFields(Visitor* JXL_RESTRICT visitor) { + JXL_QUIET_RETURN_IF_ERROR(visitor->U32( + BitsOffset(11, 1), BitsOffset(11, 2049), BitsOffset(12, 4097), + BitsOffset(16, 8193), 1, &global_scale)); + JXL_QUIET_RETURN_IF_ERROR(visitor->U32(Val(16), BitsOffset(5, 1), + BitsOffset(8, 1), BitsOffset(16, 1), 1, + &quant_dc)); + return true; +} + +QuantizerParams Quantizer::GetParams() const { + QuantizerParams params; + params.global_scale = global_scale_; + params.quant_dc = quant_dc_; + return params; +} + +Status Quantizer::Decode(BitReader* reader) { + QuantizerParams params; + JXL_RETURN_IF_ERROR(Bundle::Read(reader, ¶ms)); + global_scale_ = static_cast(params.global_scale); + quant_dc_ = static_cast(params.quant_dc); + RecomputeFromGlobalScale(); + return true; +} + +void Quantizer::DumpQuantizationMap(const ImageI& raw_quant_field) const { + printf("Global scale: %d (%.7f)\nDC quant: %d\n", global_scale_, + global_scale_ * 1.0 / kGlobalScaleDenom, quant_dc_); + printf("AC quantization Map:\n"); + for (size_t y = 0; y < raw_quant_field.ysize(); ++y) { + for (size_t x = 0; x < raw_quant_field.xsize(); ++x) { + printf(" %3d", raw_quant_field.Row(y)[x]); + } + printf("\n"); + } +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer.h b/third-party/libjxl/libjxl/lib/jxl/quantizer.h new file mode 100644 index 0000000000..2829575729 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quantizer.h @@ -0,0 +1,181 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_QUANTIZER_H_ +#define LIB_JXL_QUANTIZER_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#include "lib/jxl/ac_strategy.h" +#include "lib/jxl/base/bits.h" +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_util.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/fields.h" +#include "lib/jxl/image.h" +#include "lib/jxl/quant_weights.h" + +// Quantizes DC and AC coefficients, with separate quantization tables according +// to the quant_kind (which is currently computed from the AC strategy and the +// block index inside that strategy). + +namespace jxl { + +static constexpr int kGlobalScaleDenom = 1 << 16; +static constexpr int kGlobalScaleNumerator = 4096; + +// zero-biases for quantizing channels X, Y, B +static constexpr float kZeroBiasDefault[3] = {0.5f, 0.5f, 0.5f}; + +// Returns adjusted version of a quantized integer, such that its value is +// closer to the expected value of the original. +// The residuals of AC coefficients that we quantize are not uniformly +// distributed. Numerical experiments show that they have a distribution with +// the "shape" of 1/(1+x^2) [up to some coefficients]. This means that the +// expected value of a coefficient that gets quantized to x will not be x +// itself, but (at least with reasonable approximation): +// - 0 if x is 0 +// - x * biases[c] if x is 1 or -1 +// - x - biases[3]/x otherwise +// This follows from computing the distribution of the quantization bias, which +// can be approximated fairly well by /x when |x| is at least two. +static constexpr float kBiasNumerator = 0.145f; + +static constexpr float kDefaultQuantBias[4] = { + 1.0f - 0.05465007330715401f, + 1.0f - 0.07005449891748593f, + 1.0f - 0.049935103337343655f, + 0.145f, +}; + +struct QuantizerParams; + +class Quantizer { + public: + explicit Quantizer(const DequantMatrices* dequant); + Quantizer(const DequantMatrices* dequant, int quant_dc, int global_scale); + + static constexpr int32_t kQuantMax = 256; + + static JXL_INLINE int32_t ClampVal(float val) { + return static_cast( + std::max(1.0f, std::min(val, kQuantMax))); + } + + float ScaleGlobalScale(const float scale) { + int new_global_scale = static_cast(global_scale_ * scale + 0.5f); + float scale_out = new_global_scale * 1.0f / global_scale_; + global_scale_ = new_global_scale; + RecomputeFromGlobalScale(); + return scale_out; + } + + // Recomputes other derived fields after global_scale_ has changed. + void RecomputeFromGlobalScale() { + global_scale_float_ = global_scale_ * (1.0 / kGlobalScaleDenom); + inv_global_scale_ = 1.0 * kGlobalScaleDenom / global_scale_; + inv_quant_dc_ = inv_global_scale_ / quant_dc_; + for (size_t c = 0; c < 3; c++) { + mul_dc_[c] = GetDcStep(c); + inv_mul_dc_[c] = GetInvDcStep(c); + } + } + + // Returns scaling factor such that Scale() * (RawDC() or RawQuantField()) + // pixels yields the same float values returned by GetQuantField. + JXL_INLINE float Scale() const { return global_scale_float_; } + + // Reciprocal of Scale(). + JXL_INLINE float InvGlobalScale() const { return inv_global_scale_; } + + void SetQuantFieldRect(const ImageF& qf, const Rect& rect, + ImageI* JXL_RESTRICT raw_quant_field) const; + + void SetQuantField(float quant_dc, const ImageF& qf, + ImageI* JXL_RESTRICT raw_quant_field); + + void SetQuant(float quant_dc, float quant_ac, + ImageI* JXL_RESTRICT raw_quant_field); + + // Returns the DC quantization base value, which is currently global (not + // adaptive). The actual scale factor used to dequantize pixels in channel c + // is: inv_quant_dc() * dequant_->DCQuant(c). + float inv_quant_dc() const { return inv_quant_dc_; } + + // Dequantize by multiplying with this times dequant_matrix. + float inv_quant_ac(int32_t quant) const { return inv_global_scale_ / quant; } + + QuantizerParams GetParams() const; + + Status Decode(BitReader* reader); + + void DumpQuantizationMap(const ImageI& raw_quant_field) const; + + JXL_INLINE const float* DequantMatrix(size_t quant_kind, size_t c) const { + return dequant_->Matrix(quant_kind, c); + } + + JXL_INLINE const float* InvDequantMatrix(size_t quant_kind, size_t c) const { + return dequant_->InvMatrix(quant_kind, c); + } + + // Calculates DC quantization step. + JXL_INLINE float GetDcStep(size_t c) const { + return inv_quant_dc_ * dequant_->DCQuant(c); + } + JXL_INLINE float GetInvDcStep(size_t c) const { + return dequant_->InvDCQuant(c) * (global_scale_float_ * quant_dc_); + } + + JXL_INLINE const float* MulDC() const { return mul_dc_; } + JXL_INLINE const float* InvMulDC() const { return inv_mul_dc_; } + + JXL_INLINE void ClearDCMul() { + std::fill(mul_dc_, mul_dc_ + 4, 1.f); + std::fill(inv_mul_dc_, inv_mul_dc_ + 4, 1.f); + } + + void ComputeGlobalScaleAndQuant(float quant_dc, float quant_median, + float quant_median_absd); + + private: + float mul_dc_[4]; + float inv_mul_dc_[4]; + + // These are serialized: + int global_scale_; + int quant_dc_; + + // These are derived from global_scale_: + float inv_global_scale_; + float global_scale_float_; // reciprocal of inv_global_scale_ + float inv_quant_dc_; + + float zero_bias_[3]; + const DequantMatrices* dequant_; +}; + +struct QuantizerParams : public Fields { + QuantizerParams() { Bundle::Init(this); } + JXL_FIELDS_NAME(QuantizerParams) + + Status VisitFields(Visitor* JXL_RESTRICT visitor) override; + + uint32_t global_scale; + uint32_t quant_dc; +}; + +} // namespace jxl + +#endif // LIB_JXL_QUANTIZER_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/quantizer_test.cc b/third-party/libjxl/libjxl/lib/jxl/quantizer_test.cc new file mode 100644 index 0000000000..f9cf2c838e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/quantizer_test.cc @@ -0,0 +1,81 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/quantizer.h" + +#include "lib/jxl/base/span.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/enc_fields.h" +#include "lib/jxl/image_ops.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +void TestEquivalence(int qxsize, int qysize, const Quantizer& quantizer1, + const Quantizer& quantizer2) { + ASSERT_NEAR(quantizer1.inv_quant_dc(), quantizer2.inv_quant_dc(), 1e-7); +} + +TEST(QuantizerTest, QuantizerParams) { + for (uint32_t i = 1; i < 10000; ++i) { + QuantizerParams p; + p.global_scale = i; + size_t extension_bits = 0, total_bits = 0; + EXPECT_TRUE(Bundle::CanEncode(p, &extension_bits, &total_bits)); + EXPECT_EQ(0u, extension_bits); + EXPECT_GE(total_bits, 4u); + } +} + +TEST(QuantizerTest, BitStreamRoundtripSameQuant) { + const int qxsize = 8; + const int qysize = 8; + DequantMatrices dequant; + Quantizer quantizer1(&dequant); + ImageI raw_quant_field(qxsize, qysize); + quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field); + BitWriter writer; + QuantizerParams params = quantizer1.GetParams(); + EXPECT_TRUE(WriteQuantizerParams(params, &writer, 0, nullptr)); + writer.ZeroPadToByte(); + const size_t bits_written = writer.BitsWritten(); + Quantizer quantizer2(&dequant); + BitReader reader(writer.GetSpan()); + EXPECT_TRUE(quantizer2.Decode(&reader)); + EXPECT_TRUE(reader.JumpToByteBoundary()); + EXPECT_EQ(reader.TotalBitsConsumed(), bits_written); + EXPECT_TRUE(reader.Close()); + TestEquivalence(qxsize, qysize, quantizer1, quantizer2); +} + +TEST(QuantizerTest, BitStreamRoundtripRandomQuant) { + const int qxsize = 8; + const int qysize = 8; + DequantMatrices dequant; + Quantizer quantizer1(&dequant); + ImageI raw_quant_field(qxsize, qysize); + quantizer1.SetQuant(0.17f, 0.17f, &raw_quant_field); + float quant_dc = 0.17f; + ImageF qf(qxsize, qysize); + RandomFillImage(&qf, 0.0f, 1.0f); + quantizer1.SetQuantField(quant_dc, qf, &raw_quant_field); + BitWriter writer; + QuantizerParams params = quantizer1.GetParams(); + EXPECT_TRUE(WriteQuantizerParams(params, &writer, 0, nullptr)); + writer.ZeroPadToByte(); + const size_t bits_written = writer.BitsWritten(); + Quantizer quantizer2(&dequant); + BitReader reader(writer.GetSpan()); + EXPECT_TRUE(quantizer2.Decode(&reader)); + EXPECT_TRUE(reader.JumpToByteBoundary()); + EXPECT_EQ(reader.TotalBitsConsumed(), bits_written); + EXPECT_TRUE(reader.Close()); + TestEquivalence(qxsize, qysize, quantizer1, quantizer2); +} +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/rational_polynomial-inl.h b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial-inl.h new file mode 100644 index 0000000000..176e24092c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial-inl.h @@ -0,0 +1,98 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Fast SIMD evaluation of rational polynomials for approximating functions. + +#if defined(LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_ +#undef LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_ +#else +#define LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_ +#endif + +#include + +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::MulAdd; + +// Primary template: default to actual division. +template +struct FastDivision { + HWY_INLINE V operator()(const V n, const V d) const { return n / d; } +}; +// Partial specialization for float vectors. +template +struct FastDivision { + // One Newton-Raphson iteration. + static HWY_INLINE V ReciprocalNR(const V x) { + const auto rcp = ApproximateReciprocal(x); + const auto sum = Add(rcp, rcp); + const auto x_rcp = Mul(x, rcp); + return NegMulAdd(x_rcp, rcp, sum); + } + + V operator()(const V n, const V d) const { +#if 1 // Faster on SKX + return Div(n, d); +#else + return n * ReciprocalNR(d); +#endif + } +}; + +// Approximates smooth functions via rational polynomials (i.e. dividing two +// polynomials). Evaluates polynomials via Horner's scheme, which is faster than +// Clenshaw recurrence for Chebyshev polynomials. LoadDup128 allows us to +// specify constants (replicated 4x) independently of the lane count. +template +HWY_INLINE HWY_MAYBE_UNUSED V EvalRationalPolynomial(const D d, const V x, + const T (&p)[NP], + const T (&q)[NQ]) { + constexpr size_t kDegP = NP / 4 - 1; + constexpr size_t kDegQ = NQ / 4 - 1; + auto yp = LoadDup128(d, &p[kDegP * 4]); + auto yq = LoadDup128(d, &q[kDegQ * 4]); + // We use pointer arithmetic to refer to &p[(kDegP - n) * 4] to avoid a + // compiler warning that the index is out of bounds since we are already + // checking that it is not out of bounds with (kDegP >= n) and the access + // will be optimized away. Similarly with q and kDegQ. + HWY_FENCE; + if (kDegP >= 1) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 1) * 4))); + if (kDegQ >= 1) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 1) * 4))); + HWY_FENCE; + if (kDegP >= 2) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 2) * 4))); + if (kDegQ >= 2) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 2) * 4))); + HWY_FENCE; + if (kDegP >= 3) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 3) * 4))); + if (kDegQ >= 3) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 3) * 4))); + HWY_FENCE; + if (kDegP >= 4) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 4) * 4))); + if (kDegQ >= 4) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 4) * 4))); + HWY_FENCE; + if (kDegP >= 5) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 5) * 4))); + if (kDegQ >= 5) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 5) * 4))); + HWY_FENCE; + if (kDegP >= 6) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 6) * 4))); + if (kDegQ >= 6) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 6) * 4))); + HWY_FENCE; + if (kDegP >= 7) yp = MulAdd(yp, x, LoadDup128(d, p + ((kDegP - 7) * 4))); + if (kDegQ >= 7) yq = MulAdd(yq, x, LoadDup128(d, q + ((kDegQ - 7) * 4))); + + return FastDivision()(yp, yq); +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); +#endif // LIB_JXL_RATIONAL_POLYNOMIAL_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/rational_polynomial_test.cc b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial_test.cc new file mode 100644 index 0000000000..13fc044a55 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/rational_polynomial_test.cc @@ -0,0 +1,238 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/rational_polynomial_test.cc" +#include +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/common.h" +#include "lib/jxl/rational_polynomial-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +using T = float; // required by EvalLog2 +using D = HWY_FULL(T); + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::GetLane; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; + +// Generic: only computes polynomial +struct EvalPoly { + template + T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const { + const HWY_FULL(T) d; + const auto vx = Set(d, x); + const auto approx = EvalRationalPolynomial(d, vx, p, q); + return GetLane(approx); + } +}; + +// Range reduction for log2 +struct EvalLog2 { + template + T operator()(T x, const T (&p)[NP], const T (&q)[NQ]) const { + const HWY_FULL(T) d; + auto vx = Set(d, x); + + const HWY_FULL(int32_t) di; + const auto x_bits = BitCast(di, vx); + // Cannot handle negative numbers / NaN. + JXL_DASSERT(AllTrue(di, Eq(Abs(x_bits), x_bits))); + + // Range reduction to [-1/3, 1/3] - 3 integer, 2 float ops + const auto exp_bits = Sub(x_bits, Set(di, 0x3f2aaaab)); // = 2/3 + // Shifted exponent = log2; also used to clear mantissa. + const auto exp_shifted = ShiftRight<23>(exp_bits); + const auto mantissa = BitCast(d, Sub(x_bits, ShiftLeft<23>(exp_shifted))); + const auto exp_val = ConvertTo(d, exp_shifted); + vx = Sub(mantissa, Set(d, 1.0f)); + + const auto approx = Add(EvalRationalPolynomial(d, vx, p, q), exp_val); + return GetLane(approx); + } +}; + +// Functions to approximate: + +T LinearToSrgb8Direct(T val) { + if (val < 0.0) return 0.0; + if (val >= 255.0) return 255.0; + if (val <= 10.0 / 12.92) return val * 12.92; + return 255.0 * (std::pow(val / 255.0, 1.0 / 2.4) * 1.055 - 0.055); +} + +T SimpleGamma(T v) { + static const T kGamma = 0.387494322593; + static const T limit = 43.01745241042018; + T bright = v - limit; + if (bright >= 0) { + static const T mul = 0.0383723643799; + v -= bright * mul; + } + static const T limit2 = 94.68634353321337; + T bright2 = v - limit2; + if (bright2 >= 0) { + static const T mul = 0.22885405968; + v -= bright2 * mul; + } + static const T offset = 0.156775786057; + static const T scale = 8.898059160493739; + T retval = scale * (offset + pow(v, kGamma)); + return retval; +} + +// Runs CaratheodoryFejer and verifies the polynomial using a lot of samples to +// return the biggest error. +template +T RunApproximation(T x0, T x1, const T (&p)[NP], const T (&q)[NQ], + const Eval& eval, T func_to_approx(T)) { + float maxerr = 0; + T lastPrint = 0; + // NOLINTNEXTLINE(clang-analyzer-security.FloatLoopCounter) + for (T x = x0; x <= x1; x += (x1 - x0) / 10000.0) { + const T f = func_to_approx(x); + const T g = eval(x, p, q); + maxerr = std::max(fabsf(g - f), maxerr); + if (x == x0 || x - lastPrint > (x1 - x0) / 20.0) { + printf("x: %11.6f, f: %11.6f, g: %11.6f, e: %11.6f\n", x, f, g, + fabs(g - f)); + lastPrint = x; + } + } + return maxerr; +} + +void TestSimpleGamma() { + const T p[4 * (6 + 1)] = { + HWY_REP4(-5.0646949363741811E-05), HWY_REP4(6.7369380528439771E-05), + HWY_REP4(8.9376652530412794E-05), HWY_REP4(2.1153513301520462E-06), + HWY_REP4(-6.9130322970386449E-08), HWY_REP4(3.9424752749293728E-10), + HWY_REP4(1.2360288207619576E-13)}; + + const T q[4 * (6 + 1)] = { + HWY_REP4(-6.6389733798591366E-06), HWY_REP4(1.3299859726565908E-05), + HWY_REP4(3.8538748358398873E-06), HWY_REP4(-2.8707687262928236E-08), + HWY_REP4(-6.6897385800005434E-10), HWY_REP4(6.1428748869186003E-12), + HWY_REP4(-2.5475738169252870E-15)}; + + const T err = RunApproximation(0.77, 274.579999999999984, p, q, EvalPoly(), + SimpleGamma); + EXPECT_LT(err, 0.05); +} + +void TestLinearToSrgb8Direct() { + const T p[4 * (5 + 1)] = { + HWY_REP4(-9.5357499040105154E-05), HWY_REP4(4.6761186249798248E-04), + HWY_REP4(2.5708174333943594E-04), HWY_REP4(1.5250087770436082E-05), + HWY_REP4(1.1946768008931187E-07), HWY_REP4(5.9916446295972850E-11)}; + + const T q[4 * (4 + 1)] = { + HWY_REP4(1.8932479758079768E-05), HWY_REP4(2.7312342474687321E-05), + HWY_REP4(4.3901204783327006E-06), HWY_REP4(1.0417787306920273E-07), + HWY_REP4(3.0084206762140419E-10)}; + + const T err = + RunApproximation(0.77, 255, p, q, EvalPoly(), LinearToSrgb8Direct); + EXPECT_LT(err, 0.05); +} + +void TestExp() { + const T p[4 * (2 + 1)] = {HWY_REP4(9.6266879665530902E-01), + HWY_REP4(4.8961265681586763E-01), + HWY_REP4(8.2619259189548433E-02)}; + const T q[4 * (2 + 1)] = {HWY_REP4(9.6259895571622622E-01), + HWY_REP4(-4.7272457588933831E-01), + HWY_REP4(7.4802088567547664E-02)}; + const T err = + RunApproximation(-1, 1, p, q, EvalPoly(), [](T x) { return T(exp(x)); }); + EXPECT_LT(err, 1E-4); +} + +void TestNegExp() { + // 4,3 is the min required for monotonicity; max error in 0,10: 751 ppm + // no benefit for k>50. + const T p[4 * (4 + 1)] = { + HWY_REP4(5.9580258551150123E-02), HWY_REP4(-2.5073728806886408E-02), + HWY_REP4(4.1561830213689248E-03), HWY_REP4(-3.1815408488900372E-04), + HWY_REP4(9.3866690094906802E-06)}; + const T q[4 * (3 + 1)] = { + HWY_REP4(5.9579108238812878E-02), HWY_REP4(3.4542074345478582E-02), + HWY_REP4(8.7263562483501714E-03), HWY_REP4(1.4095109143061216E-03)}; + + const T err = + RunApproximation(0, 10, p, q, EvalPoly(), [](T x) { return T(exp(-x)); }); + EXPECT_LT(err, sizeof(T) == 8 ? 2E-5 : 3E-5); +} + +void TestSin() { + const T p[4 * (6 + 1)] = { + HWY_REP4(1.5518122109203780E-05), HWY_REP4(2.3388958643675966E+00), + HWY_REP4(-8.6705520940849157E-01), HWY_REP4(-1.9702294764873535E-01), + HWY_REP4(1.2193404314472320E-01), HWY_REP4(-1.7373966109788839E-02), + HWY_REP4(7.8829435883034796E-04)}; + const T q[4 * (5 + 1)] = { + HWY_REP4(2.3394371422557279E+00), HWY_REP4(-8.7028221081288615E-01), + HWY_REP4(2.0052872219658430E-01), HWY_REP4(-3.2460335995264836E-02), + HWY_REP4(3.1546157932479282E-03), HWY_REP4(-1.6692542019380155E-04)}; + + const T err = RunApproximation(0, Pi(1) * 2, p, q, EvalPoly(), + [](T x) { return T(sin(x)); }); + EXPECT_LT(err, sizeof(T) == 8 ? 5E-4 : 7E-4); +} + +void TestLog() { + HWY_ALIGN const T p[4 * (2 + 1)] = {HWY_REP4(-1.8503833400518310E-06), + HWY_REP4(1.4287160470083755E+00), + HWY_REP4(7.4245873327820566E-01)}; + HWY_ALIGN const T q[4 * (2 + 1)] = {HWY_REP4(9.9032814277590719E-01), + HWY_REP4(1.0096718572241148E+00), + HWY_REP4(1.7409343003366853E-01)}; + const T err = RunApproximation(1E-6, 1000, p, q, EvalLog2(), std::log2); + printf("%E\n", err); +} + +HWY_NOINLINE void TestRationalPolynomial() { + TestSimpleGamma(); + TestLinearToSrgb8Direct(); + TestExp(); + TestNegExp(); + TestSin(); + TestLog(); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class RationalPolynomialTest : public hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(RationalPolynomialTest); + +HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSimpleGamma); +HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLinearToSrgb8Direct); +HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestExp); +HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestNegExp); +HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestSin); +HWY_EXPORT_AND_TEST_P(RationalPolynomialTest, TestLog); + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc new file mode 100644 index 0000000000..7116326e4a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.cc @@ -0,0 +1,865 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/low_memory_render_pipeline.h" + +#include +#include +#include + +#include "lib/jxl/base/arch_macros.h" +#include "lib/jxl/image_ops.h" + +namespace jxl { +std::pair +LowMemoryRenderPipeline::ColorDimensionsToChannelDimensions( + std::pair in, size_t c, size_t stage) const { + std::pair ret; + std::pair shift = channel_shifts_[stage][c]; + ret.first = + ((in.first << base_color_shift_) + (1 << shift.first) - 1) >> shift.first; + ret.second = ((in.second << base_color_shift_) + (1 << shift.second) - 1) >> + shift.second; + return ret; +} + +std::pair LowMemoryRenderPipeline::BorderToStore( + size_t c) const { + auto ret = ColorDimensionsToChannelDimensions(group_border_, c, 0); + ret.first += padding_[0][c].first; + ret.second += padding_[0][c].second; + return ret; +} + +void LowMemoryRenderPipeline::SaveBorders(size_t group_id, size_t c, + const ImageF& in) { + size_t gy = group_id / frame_dimensions_.xsize_groups; + size_t gx = group_id % frame_dimensions_.xsize_groups; + size_t hshift = channel_shifts_[0][c].first; + size_t vshift = channel_shifts_[0][c].second; + size_t x0 = gx * GroupInputXSize(c); + size_t x1 = std::min((gx + 1) * GroupInputXSize(c), + DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift)); + size_t y0 = gy * GroupInputYSize(c); + size_t y1 = std::min((gy + 1) * GroupInputYSize(c), + DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift)); + + auto borders = BorderToStore(c); + size_t borderx_write = borders.first; + size_t bordery_write = borders.second; + + if (gy > 0) { + Rect from(group_data_x_border_, group_data_y_border_, x1 - x0, + bordery_write); + Rect to(x0, (gy * 2 - 1) * bordery_write, x1 - x0, bordery_write); + CopyImageTo(from, in, to, &borders_horizontal_[c]); + } + if (gy + 1 < frame_dimensions_.ysize_groups) { + Rect from(group_data_x_border_, + group_data_y_border_ + y1 - y0 - bordery_write, x1 - x0, + bordery_write); + Rect to(x0, (gy * 2) * bordery_write, x1 - x0, bordery_write); + CopyImageTo(from, in, to, &borders_horizontal_[c]); + } + if (gx > 0) { + Rect from(group_data_x_border_, group_data_y_border_, borderx_write, + y1 - y0); + Rect to((gx * 2 - 1) * borderx_write, y0, borderx_write, y1 - y0); + CopyImageTo(from, in, to, &borders_vertical_[c]); + } + if (gx + 1 < frame_dimensions_.xsize_groups) { + Rect from(group_data_x_border_ + x1 - x0 - borderx_write, + group_data_y_border_, borderx_write, y1 - y0); + Rect to((gx * 2) * borderx_write, y0, borderx_write, y1 - y0); + CopyImageTo(from, in, to, &borders_vertical_[c]); + } +} + +void LowMemoryRenderPipeline::LoadBorders(size_t group_id, size_t c, + const Rect& r, ImageF* out) { + size_t gy = group_id / frame_dimensions_.xsize_groups; + size_t gx = group_id % frame_dimensions_.xsize_groups; + size_t hshift = channel_shifts_[0][c].first; + size_t vshift = channel_shifts_[0][c].second; + // Coordinates of the group in the image. + size_t x0 = gx * GroupInputXSize(c); + size_t x1 = std::min((gx + 1) * GroupInputXSize(c), + DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift)); + size_t y0 = gy * GroupInputYSize(c); + size_t y1 = std::min((gy + 1) * GroupInputYSize(c), + DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift)); + + size_t paddingx = padding_[0][c].first; + size_t paddingy = padding_[0][c].second; + + auto borders = BorderToStore(c); + size_t borderx_write = borders.first; + size_t bordery_write = borders.second; + + // Limits of the area to copy from, in image coordinates. + JXL_DASSERT(r.x0() == 0 || (r.x0() << base_color_shift_) >= paddingx); + size_t x0src = DivCeil(r.x0() << base_color_shift_, 1 << hshift); + if (x0src != 0) { + x0src -= paddingx; + } + // r may be such that r.x1 (namely x0() + xsize()) is within paddingx of the + // right side of the image, so we use min() here. + size_t x1src = + DivCeil((r.x0() + r.xsize()) << base_color_shift_, 1 << hshift); + x1src = std::min(x1src + paddingx, + DivCeil(frame_dimensions_.xsize_upsampled, 1 << hshift)); + + // Similar computation for y. + JXL_DASSERT(r.y0() == 0 || (r.y0() << base_color_shift_) >= paddingy); + size_t y0src = DivCeil(r.y0() << base_color_shift_, 1 << vshift); + if (y0src != 0) { + y0src -= paddingy; + } + size_t y1src = + DivCeil((r.y0() + r.ysize()) << base_color_shift_, 1 << vshift); + y1src = std::min(y1src + paddingy, + DivCeil(frame_dimensions_.ysize_upsampled, 1 << vshift)); + + // Copy other groups' borders from the border storage. + if (y0src < y0) { + JXL_DASSERT(gy > 0); + CopyImageTo( + Rect(x0src, (gy * 2 - 2) * bordery_write, x1src - x0src, bordery_write), + borders_horizontal_[c], + Rect(group_data_x_border_ + x0src - x0, + group_data_y_border_ - bordery_write, x1src - x0src, + bordery_write), + out); + } + if (y1src > y1) { + // When copying the bottom border we must not be on the bottom groups. + JXL_DASSERT(gy + 1 < frame_dimensions_.ysize_groups); + CopyImageTo( + Rect(x0src, (gy * 2 + 1) * bordery_write, x1src - x0src, bordery_write), + borders_horizontal_[c], + Rect(group_data_x_border_ + x0src - x0, group_data_y_border_ + y1 - y0, + x1src - x0src, bordery_write), + out); + } + if (x0src < x0) { + JXL_DASSERT(gx > 0); + CopyImageTo( + Rect((gx * 2 - 2) * borderx_write, y0src, borderx_write, y1src - y0src), + borders_vertical_[c], + Rect(group_data_x_border_ - borderx_write, + group_data_y_border_ + y0src - y0, borderx_write, y1src - y0src), + out); + } + if (x1src > x1) { + // When copying the right border we must not be on the rightmost groups. + JXL_DASSERT(gx + 1 < frame_dimensions_.xsize_groups); + CopyImageTo( + Rect((gx * 2 + 1) * borderx_write, y0src, borderx_write, y1src - y0src), + borders_vertical_[c], + Rect(group_data_x_border_ + x1 - x0, group_data_y_border_ + y0src - y0, + borderx_write, y1src - y0src), + out); + } +} + +size_t LowMemoryRenderPipeline::GroupInputXSize(size_t c) const { + return (frame_dimensions_.group_dim << base_color_shift_) >> + channel_shifts_[0][c].first; +} + +size_t LowMemoryRenderPipeline::GroupInputYSize(size_t c) const { + return (frame_dimensions_.group_dim << base_color_shift_) >> + channel_shifts_[0][c].second; +} + +void LowMemoryRenderPipeline::EnsureBordersStorage() { + const auto& shifts = channel_shifts_[0]; + if (borders_horizontal_.size() < shifts.size()) { + borders_horizontal_.resize(shifts.size()); + borders_vertical_.resize(shifts.size()); + } + for (size_t c = 0; c < shifts.size(); c++) { + auto borders = BorderToStore(c); + size_t borderx = borders.first; + size_t bordery = borders.second; + JXL_DASSERT(frame_dimensions_.xsize_groups > 0); + size_t num_xborders = (frame_dimensions_.xsize_groups - 1) * 2; + JXL_DASSERT(frame_dimensions_.ysize_groups > 0); + size_t num_yborders = (frame_dimensions_.ysize_groups - 1) * 2; + size_t downsampled_xsize = + DivCeil(frame_dimensions_.xsize_upsampled_padded, 1 << shifts[c].first); + size_t downsampled_ysize = DivCeil(frame_dimensions_.ysize_upsampled_padded, + 1 << shifts[c].second); + Rect horizontal = Rect(0, 0, downsampled_xsize, bordery * num_yborders); + if (!SameSize(horizontal, borders_horizontal_[c])) { + borders_horizontal_[c] = ImageF(horizontal.xsize(), horizontal.ysize()); + } + Rect vertical = Rect(0, 0, borderx * num_xborders, downsampled_ysize); + if (!SameSize(vertical, borders_vertical_[c])) { + borders_vertical_[c] = ImageF(vertical.xsize(), vertical.ysize()); + } + } +} + +void LowMemoryRenderPipeline::Init() { + group_border_ = {0, 0}; + base_color_shift_ = CeilLog2Nonzero(frame_dimensions_.xsize_upsampled_padded / + frame_dimensions_.xsize_padded); + + const auto& shifts = channel_shifts_[0]; + + // Ensure that each channel has enough many border pixels. + for (size_t c = 0; c < shifts.size(); c++) { + group_border_.first = + std::max(group_border_.first, + DivCeil(padding_[0][c].first << channel_shifts_[0][c].first, + 1 << base_color_shift_)); + group_border_.second = + std::max(group_border_.second, + DivCeil(padding_[0][c].second << channel_shifts_[0][c].second, + 1 << base_color_shift_)); + } + + // Ensure that all channels have an integer number of border pixels in the + // input. + for (size_t c = 0; c < shifts.size(); c++) { + if (channel_shifts_[0][c].first >= base_color_shift_) { + group_border_.first = + RoundUpTo(group_border_.first, + 1 << (channel_shifts_[0][c].first - base_color_shift_)); + } + if (channel_shifts_[0][c].second >= base_color_shift_) { + group_border_.second = + RoundUpTo(group_border_.second, + 1 << (channel_shifts_[0][c].second - base_color_shift_)); + } + } + // Ensure that the X border on color channels is a multiple of kBlockDim or + // the vector size (required for EPF stages). Vectors on ARM NEON are never + // wider than 4 floats, so rounding to multiples of 4 is enough. +#if JXL_ARCH_ARM + constexpr size_t kGroupXAlign = 4; +#else + constexpr size_t kGroupXAlign = 16; +#endif + group_border_.first = RoundUpTo(group_border_.first, kGroupXAlign); + // Allocate borders in group images that are just enough for storing the + // borders to be copied in, plus any rounding to ensure alignment. + std::pair max_border = {0, 0}; + for (size_t c = 0; c < shifts.size(); c++) { + max_border.first = std::max(BorderToStore(c).first, max_border.first); + max_border.second = std::max(BorderToStore(c).second, max_border.second); + } + group_data_x_border_ = RoundUpTo(max_border.first, kGroupXAlign); + group_data_y_border_ = max_border.second; + + EnsureBordersStorage(); + group_border_assigner_.Init(frame_dimensions_); + + for (first_trailing_stage_ = stages_.size(); first_trailing_stage_ > 0; + first_trailing_stage_--) { + bool has_inout_c = false; + for (size_t c = 0; c < shifts.size(); c++) { + if (stages_[first_trailing_stage_ - 1]->GetChannelMode(c) == + RenderPipelineChannelMode::kInOut) { + has_inout_c = true; + } + } + if (has_inout_c) { + break; + } + } + + first_image_dim_stage_ = stages_.size(); + for (size_t i = 0; i < stages_.size(); i++) { + std::vector> input_sizes(shifts.size()); + for (size_t c = 0; c < shifts.size(); c++) { + input_sizes[c] = + std::make_pair(DivCeil(frame_dimensions_.xsize_upsampled, + 1 << channel_shifts_[i][c].first), + DivCeil(frame_dimensions_.ysize_upsampled, + 1 << channel_shifts_[i][c].second)); + } + stages_[i]->SetInputSizes(input_sizes); + if (stages_[i]->SwitchToImageDimensions()) { + // We don't allow kInOut after switching to image dimensions. + JXL_ASSERT(i >= first_trailing_stage_); + first_image_dim_stage_ = i + 1; + stages_[i]->GetImageDimensions(&full_image_xsize_, &full_image_ysize_, + &frame_origin_); + break; + } + } + for (size_t i = first_image_dim_stage_; i < stages_.size(); i++) { + if (stages_[i]->SwitchToImageDimensions()) { + JXL_UNREACHABLE("Cannot switch to image dimensions multiple times"); + } + std::vector> input_sizes(shifts.size()); + for (size_t c = 0; c < shifts.size(); c++) { + input_sizes[c] = {full_image_xsize_, full_image_ysize_}; + } + stages_[i]->SetInputSizes(input_sizes); + } + + anyc_.resize(stages_.size()); + for (size_t i = 0; i < stages_.size(); i++) { + for (size_t c = 0; c < shifts.size(); c++) { + if (stages_[i]->GetChannelMode(c) != + RenderPipelineChannelMode::kIgnored) { + anyc_[i] = c; + } + } + } + + stage_input_for_channel_ = std::vector>( + stages_.size(), std::vector(shifts.size())); + for (size_t c = 0; c < shifts.size(); c++) { + int input = -1; + for (size_t i = 0; i < stages_.size(); i++) { + stage_input_for_channel_[i][c] = input; + if (stages_[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) { + input = i; + } + } + } + + image_rect_.resize(stages_.size()); + for (size_t i = 0; i < stages_.size(); i++) { + size_t x1 = DivCeil(frame_dimensions_.xsize_upsampled, + 1 << channel_shifts_[i][anyc_[i]].first); + size_t y1 = DivCeil(frame_dimensions_.ysize_upsampled, + 1 << channel_shifts_[i][anyc_[i]].second); + image_rect_[i] = Rect(0, 0, x1, y1); + } + + virtual_ypadding_for_output_.resize(stages_.size()); + xpadding_for_output_.resize(stages_.size()); + for (size_t c = 0; c < shifts.size(); c++) { + int ypad = 0; + int xpad = 0; + for (size_t i = stages_.size(); i-- > 0;) { + if (stages_[i]->GetChannelMode(c) != + RenderPipelineChannelMode::kIgnored) { + virtual_ypadding_for_output_[i] = + std::max(ypad, virtual_ypadding_for_output_[i]); + xpadding_for_output_[i] = std::max(xpad, xpadding_for_output_[i]); + } + if (stages_[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) { + ypad = (DivCeil(ypad, 1 << channel_shifts_[i][c].second) + + stages_[i]->settings_.border_y) + << channel_shifts_[i][c].second; + xpad = DivCeil(xpad, 1 << stages_[i]->settings_.shift_x) + + stages_[i]->settings_.border_x; + } + } + } +} + +void LowMemoryRenderPipeline::PrepareForThreadsInternal(size_t num, + bool use_group_ids) { + const auto& shifts = channel_shifts_[0]; + + use_group_ids_ = use_group_ids; + size_t num_buffers = use_group_ids_ ? frame_dimensions_.num_groups : num; + for (size_t t = group_data_.size(); t < num_buffers; t++) { + group_data_.emplace_back(); + group_data_[t].resize(shifts.size()); + for (size_t c = 0; c < shifts.size(); c++) { + group_data_[t][c] = ImageF(GroupInputXSize(c) + group_data_x_border_ * 2, + GroupInputYSize(c) + group_data_y_border_ * 2); + } + } + // TODO(veluca): avoid reallocating buffers if not needed. + stage_data_.resize(num); + size_t upsampling = 1u << base_color_shift_; + size_t group_dim = frame_dimensions_.group_dim * upsampling; + size_t padding = + 2 * group_data_x_border_ * upsampling + // maximum size of a rect + 2 * kRenderPipelineXOffset; // extra padding for processing + size_t stage_buffer_xsize = group_dim + padding; + for (size_t t = 0; t < num; t++) { + stage_data_[t].resize(shifts.size()); + for (size_t c = 0; c < shifts.size(); c++) { + stage_data_[t][c].resize(stages_.size()); + size_t next_y_border = 0; + for (size_t i = stages_.size(); i-- > 0;) { + if (stages_[i]->GetChannelMode(c) == + RenderPipelineChannelMode::kInOut) { + size_t stage_buffer_ysize = + 2 * next_y_border + (1 << stages_[i]->settings_.shift_y); + stage_buffer_ysize = 1 << CeilLog2Nonzero(stage_buffer_ysize); + next_y_border = stages_[i]->settings_.border_y; + stage_data_[t][c][i] = ImageF(stage_buffer_xsize, stage_buffer_ysize); + } + } + } + } + if (first_image_dim_stage_ != stages_.size()) { + RectT image_rect(0, 0, frame_dimensions_.xsize_upsampled, + frame_dimensions_.ysize_upsampled); + RectT full_image_rect(0, 0, full_image_xsize_, full_image_ysize_); + image_rect = image_rect.Translate(frame_origin_.x0, frame_origin_.y0); + image_rect = image_rect.Intersection(full_image_rect); + if (image_rect.xsize() == 0 || image_rect.ysize() == 0) { + image_rect = RectT(0, 0, 0, 0); + } + size_t left_padding = image_rect.x0(); + size_t middle_padding = group_dim; + size_t right_padding = full_image_xsize_ - image_rect.x1(); + size_t out_of_frame_xsize = + padding + + std::max(left_padding, std::max(middle_padding, right_padding)); + out_of_frame_data_.resize(num); + for (size_t t = 0; t < num; t++) { + out_of_frame_data_[t] = ImageF(out_of_frame_xsize, shifts.size()); + } + } +} + +std::vector> LowMemoryRenderPipeline::PrepareBuffers( + size_t group_id, size_t thread_id) { + std::vector> ret(channel_shifts_[0].size()); + const size_t gx = group_id % frame_dimensions_.xsize_groups; + const size_t gy = group_id / frame_dimensions_.xsize_groups; + for (size_t c = 0; c < channel_shifts_[0].size(); c++) { + ret[c].first = &group_data_[use_group_ids_ ? group_id : thread_id][c]; + ret[c].second = Rect(group_data_x_border_, group_data_y_border_, + GroupInputXSize(c), GroupInputYSize(c), + DivCeil(frame_dimensions_.xsize_upsampled, + 1 << channel_shifts_[0][c].first) - + gx * GroupInputXSize(c) + group_data_x_border_, + DivCeil(frame_dimensions_.ysize_upsampled, + 1 << channel_shifts_[0][c].second) - + gy * GroupInputYSize(c) + group_data_y_border_); + } + return ret; +} + +namespace { + +JXL_INLINE int GetMirroredY(int y, ssize_t group_y0, ssize_t image_ysize) { + if (group_y0 == 0 && (y < 0 || y + group_y0 >= image_ysize)) { + return Mirror(y, image_ysize); + } + if (y + group_y0 >= image_ysize) { + // Here we know that the one mirroring step is sufficient. + return 2 * image_ysize - (y + group_y0) - 1 - group_y0; + } + return y; +} + +JXL_INLINE void ApplyXMirroring(float* row, ssize_t borderx, ssize_t group_x0, + ssize_t group_xsize, ssize_t image_xsize) { + if (image_xsize <= borderx) { + if (group_x0 == 0) { + for (ssize_t ix = 0; ix < borderx; ix++) { + row[kRenderPipelineXOffset - ix - 1] = + row[kRenderPipelineXOffset + Mirror(-ix - 1, image_xsize)]; + } + } + if (group_xsize + borderx + group_x0 >= image_xsize) { + for (ssize_t ix = 0; ix < borderx; ix++) { + row[kRenderPipelineXOffset + image_xsize + ix - group_x0] = + row[kRenderPipelineXOffset + Mirror(image_xsize + ix, image_xsize) - + group_x0]; + } + } + } else { + // Here we know that the one mirroring step is sufficient. + if (group_x0 == 0) { + for (ssize_t ix = 0; ix < borderx; ix++) { + row[kRenderPipelineXOffset - ix - 1] = row[kRenderPipelineXOffset + ix]; + } + } + if (group_xsize + borderx + group_x0 >= image_xsize) { + for (ssize_t ix = 0; ix < borderx; ix++) { + row[kRenderPipelineXOffset + image_xsize - group_x0 + ix] = + row[kRenderPipelineXOffset + image_xsize - group_x0 - ix - 1]; + } + } + } +} + +// Information about where the *output* of each stage is stored. +class Rows { + public: + Rows(const std::vector>& stages, + const Rect data_max_color_channel_rect, int group_data_x_border, + int group_data_y_border, + const std::vector>& group_data_shift, + size_t base_color_shift, std::vector>& thread_data, + std::vector& input_data) { + size_t num_stages = stages.size(); + size_t num_channels = input_data.size(); + + JXL_ASSERT(thread_data.size() == num_channels); + JXL_ASSERT(group_data_shift.size() == num_channels); + +#if JXL_ENABLE_ASSERT + for (const auto& td : thread_data) { + JXL_ASSERT(td.size() == num_stages); + } +#endif + + rows_.resize(num_stages + 1, std::vector(num_channels)); + + for (size_t i = 0; i < num_stages; i++) { + for (size_t c = 0; c < input_data.size(); c++) { + if (stages[i]->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) { + rows_[i + 1][c].ymod_minus_1 = thread_data[c][i].ysize() - 1; + rows_[i + 1][c].base_ptr = thread_data[c][i].Row(0); + rows_[i + 1][c].stride = thread_data[c][i].PixelsPerRow(); + } + } + } + + for (size_t c = 0; c < input_data.size(); c++) { + auto channel_group_data_rect = + data_max_color_channel_rect.As() + .Translate(-group_data_x_border, -group_data_y_border) + .ShiftLeft(base_color_shift) + .CeilShiftRight(group_data_shift[c]) + .Translate(group_data_x_border - ssize_t(kRenderPipelineXOffset), + group_data_y_border); + rows_[0][c].base_ptr = channel_group_data_rect.Row(&input_data[c], 0); + rows_[0][c].stride = input_data[c].PixelsPerRow(); + rows_[0][c].ymod_minus_1 = -1; + } + } + + // Stage -1 refers to the input data; all other values must be nonnegative and + // refer to the data for the output of that stage. + JXL_INLINE float* GetBuffer(int stage, int y, size_t c) const { + JXL_DASSERT(stage >= -1); + const RowInfo& info = rows_[stage + 1][c]; + return info.base_ptr + ssize_t(info.stride) * (y & info.ymod_minus_1); + } + + private: + struct RowInfo { + // Pointer to beginning of the first row. + float* base_ptr; + // Modulo value for the y axis minus 1 (ymod is guaranteed to be a power of + // 2, which allows efficient mod computation by masking). + int ymod_minus_1; + // Number of floats per row. + size_t stride; + }; + std::vector> rows_; +}; + +} // namespace + +void LowMemoryRenderPipeline::RenderRect(size_t thread_id, + std::vector& input_data, + Rect data_max_color_channel_rect, + Rect image_max_color_channel_rect) { + // For each stage, the rect corresponding to the image area currently being + // processed, in the coordinates of that stage (i.e. with the scaling factor + // that that stage has). + std::vector group_rect; + group_rect.resize(stages_.size()); + Rect image_area_rect = + image_max_color_channel_rect.ShiftLeft(base_color_shift_) + .Crop(frame_dimensions_.xsize_upsampled, + frame_dimensions_.ysize_upsampled); + for (size_t i = 0; i < stages_.size(); i++) { + group_rect[i] = + image_area_rect.CeilShiftRight(channel_shifts_[i][anyc_[i]]); + } + + ssize_t frame_x0 = + first_image_dim_stage_ == stages_.size() ? 0 : frame_origin_.x0; + ssize_t frame_y0 = + first_image_dim_stage_ == stages_.size() ? 0 : frame_origin_.y0; + size_t full_image_xsize = first_image_dim_stage_ == stages_.size() + ? frame_dimensions_.xsize_upsampled + : full_image_xsize_; + size_t full_image_ysize = first_image_dim_stage_ == stages_.size() + ? frame_dimensions_.ysize_upsampled + : full_image_ysize_; + + // Compute actual x-axis bounds for the current image area in the context of + // the full image this frame is part of. As the left boundary may be negative, + // we also create the x_pixels_skip value, defined as follows: + // - both x_pixels_skip and full_image_x0 are >= 0, and at least one is 0; + // - full_image_x0 - x_pixels_skip is the position of the current frame area + // in the full image. + ssize_t full_image_x0 = frame_x0 + image_area_rect.x0(); + ssize_t x_pixels_skip = 0; + if (full_image_x0 < 0) { + x_pixels_skip = -full_image_x0; + full_image_x0 = 0; + } + ssize_t full_image_x1 = frame_x0 + image_area_rect.x1(); + full_image_x1 = std::min(full_image_x1, full_image_xsize); + + // If the current image area is entirely outside of the visible image, there + // is no point in proceeding. Note: this uses the assumption that if there is + // a stage with observable effects (i.e. a kInput stage), it only appears + // after the stage that switches to image dimensions. + if (full_image_x1 <= full_image_x0) return; + + // Data structures to hold information about input/output rows and their + // buffers. + Rows rows(stages_, data_max_color_channel_rect, group_data_x_border_, + group_data_y_border_, channel_shifts_[0], base_color_shift_, + stage_data_[thread_id], input_data); + + std::vector input_rows(first_trailing_stage_ + + 1); + for (size_t i = 0; i < first_trailing_stage_; i++) { + input_rows[i].resize(input_data.size()); + } + input_rows[first_trailing_stage_].resize(input_data.size(), + std::vector(1)); + + // Maximum possible shift is 3. + RenderPipelineStage::RowInfo output_rows(input_data.size(), + std::vector(8)); + + // Fills in input_rows and output_rows for a given y value (relative to the + // start of the group, measured in actual pixels at the appropriate vertical + // scaling factor) and a given stage, applying mirroring if necessary. This + // function is somewhat inefficient for trailing kInOut or kInput stages, + // where just filling the input row once ought to be sufficient. + auto prepare_io_rows = [&](int y, size_t i) { + ssize_t bordery = stages_[i]->settings_.border_y; + size_t shifty = stages_[i]->settings_.shift_y; + auto make_row = [&](size_t c, ssize_t iy) { + size_t mirrored_y = GetMirroredY(y + iy - bordery, group_rect[i].y0(), + image_rect_[i].ysize()); + input_rows[i][c][iy] = + rows.GetBuffer(stage_input_for_channel_[i][c], mirrored_y, c); + ApplyXMirroring(input_rows[i][c][iy], stages_[i]->settings_.border_x, + group_rect[i].x0(), group_rect[i].xsize(), + image_rect_[i].xsize()); + }; + for (size_t c = 0; c < input_data.size(); c++) { + RenderPipelineChannelMode mode = stages_[i]->GetChannelMode(c); + if (mode == RenderPipelineChannelMode::kIgnored) { + continue; + } + // If we already have rows from a previous iteration, we can just shift + // the rows by 1 and insert the new one. + if (input_rows[i][c].size() == 2 * size_t(bordery) + 1) { + for (ssize_t iy = 0; iy < 2 * bordery; iy++) { + input_rows[i][c][iy] = input_rows[i][c][iy + 1]; + } + make_row(c, bordery * 2); + } else { + input_rows[i][c].resize(2 * bordery + 1); + for (ssize_t iy = 0; iy < 2 * bordery + 1; iy++) { + make_row(c, iy); + } + } + + // If necessary, get the output buffers. + if (mode == RenderPipelineChannelMode::kInOut) { + for (size_t iy = 0; iy < (1u << shifty); iy++) { + output_rows[c][iy] = rows.GetBuffer(i, y * (1 << shifty) + iy, c); + } + } + } + }; + + // We pretend that every stage has a vertical shift of 0, i.e. it is as tall + // as the final image. + // We call each such row a "virtual" row, because it may or may not correspond + // to an actual row of the current processing stage; actual processing happens + // when vy % (1<> channel_shifts_[i][anyc_[i]].second; + + ssize_t image_y = ssize_t(group_rect[i].y0()) + y; + // Do not produce rows in out-of-bounds areas. + if (image_y < 0 || image_y >= ssize_t(image_rect_[i].ysize())) { + continue; + } + + // Get the input/output rows and potentially apply mirroring to the input. + prepare_io_rows(y, i); + + // Produce output rows. + stages_[i]->ProcessRow(input_rows[i], output_rows, + xpadding_for_output_[i], group_rect[i].xsize(), + group_rect[i].x0(), image_y, thread_id); + } + + // Process trailing stages, i.e. the final set of non-kInOut stages; they + // all have the same input buffer and no need to use any mirroring. + + int y = vy - num_extra_rows; + + for (size_t c = 0; c < input_data.size(); c++) { + // Skip pixels that are not part of the actual final image area. + input_rows[first_trailing_stage_][c][0] = + rows.GetBuffer(stage_input_for_channel_[first_trailing_stage_][c], y, + c) + + x_pixels_skip; + } + + // Check that we are not outside of the bounds for the current rendering + // rect. Not doing so might result in overwriting some rows that have been + // written (or will be written) by other threads. + if (y < 0 || y >= ssize_t(image_area_rect.ysize())) { + continue; + } + + // Avoid running pipeline stages on pixels that are outside the full image + // area. As trailing stages have no borders, this is a free optimization + // (and may be necessary for correctness, as some stages assume coordinates + // are within bounds). + ssize_t full_image_y = frame_y0 + image_area_rect.y0() + y; + if (full_image_y < 0 || full_image_y >= ssize_t(full_image_ysize)) { + continue; + } + + for (size_t i = first_trailing_stage_; i < stages_.size(); i++) { + // Before the first_image_dim_stage_, coordinates are relative to the + // current frame. + size_t x0 = + i < first_image_dim_stage_ ? full_image_x0 - frame_x0 : full_image_x0; + size_t y = + i < first_image_dim_stage_ ? full_image_y - frame_y0 : full_image_y; + stages_[i]->ProcessRow(input_rows[first_trailing_stage_], output_rows, + /*xextra=*/0, full_image_x1 - full_image_x0, x0, y, + thread_id); + } + } +} + +void LowMemoryRenderPipeline::RenderPadding(size_t thread_id, Rect rect) { + if (rect.xsize() == 0) return; + size_t numc = channel_shifts_[0].size(); + RenderPipelineStage::RowInfo input_rows(numc, std::vector(1)); + RenderPipelineStage::RowInfo output_rows; + + for (size_t c = 0; c < numc; c++) { + input_rows[c][0] = out_of_frame_data_[thread_id].Row(c); + } + + for (size_t y = 0; y < rect.ysize(); y++) { + stages_[first_image_dim_stage_ - 1]->ProcessPaddingRow( + input_rows, rect.xsize(), rect.x0(), rect.y0() + y); + for (size_t i = first_image_dim_stage_; i < stages_.size(); i++) { + stages_[i]->ProcessRow(input_rows, output_rows, + /*xextra=*/0, rect.xsize(), rect.x0(), + rect.y0() + y, thread_id); + } + } +} + +void LowMemoryRenderPipeline::ProcessBuffers(size_t group_id, + size_t thread_id) { + std::vector& input_data = + group_data_[use_group_ids_ ? group_id : thread_id]; + + // Copy the group borders to the border storage. + for (size_t c = 0; c < input_data.size(); c++) { + SaveBorders(group_id, c, input_data[c]); + } + + size_t gy = group_id / frame_dimensions_.xsize_groups; + size_t gx = group_id % frame_dimensions_.xsize_groups; + + if (first_image_dim_stage_ != stages_.size()) { + size_t group_dim = frame_dimensions_.group_dim << base_color_shift_; + RectT group_rect(gx * group_dim, gy * group_dim, group_dim, + group_dim); + RectT image_rect(0, 0, frame_dimensions_.xsize_upsampled, + frame_dimensions_.ysize_upsampled); + RectT full_image_rect(0, 0, full_image_xsize_, full_image_ysize_); + group_rect = group_rect.Translate(frame_origin_.x0, frame_origin_.y0); + image_rect = image_rect.Translate(frame_origin_.x0, frame_origin_.y0); + image_rect = image_rect.Intersection(full_image_rect); + group_rect = group_rect.Intersection(image_rect); + size_t x0 = group_rect.x0(); + size_t y0 = group_rect.y0(); + size_t x1 = group_rect.x1(); + size_t y1 = group_rect.y1(); + JXL_DEBUG_V(6, + "Rendering padding for full image rect %s " + "outside group rect %s", + Description(full_image_rect).c_str(), + Description(group_rect).c_str()); + + if (group_id == 0 && (image_rect.xsize() == 0 || image_rect.ysize() == 0)) { + // If this frame does not intersect with the full image, we have to + // initialize the whole image area with RenderPadding. + RenderPadding(thread_id, + Rect(0, 0, full_image_xsize_, full_image_ysize_)); + } + + // Render padding for groups that intersect with the full image. The case + // where no groups intersect was handled above. + if (group_rect.xsize() > 0 && group_rect.ysize() > 0) { + if (gx == 0 && gy == 0) { + RenderPadding(thread_id, Rect(0, 0, x0, y0)); + } + if (gy == 0) { + RenderPadding(thread_id, Rect(x0, 0, x1 - x0, y0)); + } + if (gx == 0) { + RenderPadding(thread_id, Rect(0, y0, x0, y1 - y0)); + } + if (gx == 0 && gy + 1 == frame_dimensions_.ysize_groups) { + RenderPadding(thread_id, Rect(0, y1, x0, full_image_ysize_ - y1)); + } + if (gy + 1 == frame_dimensions_.ysize_groups) { + RenderPadding(thread_id, Rect(x0, y1, x1 - x0, full_image_ysize_ - y1)); + } + if (gy == 0 && gx + 1 == frame_dimensions_.xsize_groups) { + RenderPadding(thread_id, Rect(x1, 0, full_image_xsize_ - x1, y0)); + } + if (gx + 1 == frame_dimensions_.xsize_groups) { + RenderPadding(thread_id, Rect(x1, y0, full_image_xsize_ - x1, y1 - y0)); + } + if (gy + 1 == frame_dimensions_.ysize_groups && + gx + 1 == frame_dimensions_.xsize_groups) { + RenderPadding(thread_id, Rect(x1, y1, full_image_xsize_ - x1, + full_image_ysize_ - y1)); + } + } + } + + Rect ready_rects[GroupBorderAssigner::kMaxToFinalize]; + size_t num_ready_rects = 0; + group_border_assigner_.GroupDone(group_id, group_border_.first, + group_border_.second, ready_rects, + &num_ready_rects); + for (size_t i = 0; i < num_ready_rects; i++) { + const Rect& image_max_color_channel_rect = ready_rects[i]; + for (size_t c = 0; c < input_data.size(); c++) { + LoadBorders(group_id, c, image_max_color_channel_rect, &input_data[c]); + } + Rect data_max_color_channel_rect( + group_data_x_border_ + image_max_color_channel_rect.x0() - + gx * frame_dimensions_.group_dim, + group_data_y_border_ + image_max_color_channel_rect.y0() - + gy * frame_dimensions_.group_dim, + image_max_color_channel_rect.xsize(), + image_max_color_channel_rect.ysize()); + RenderRect(thread_id, input_data, data_max_color_channel_rect, + image_max_color_channel_rect); + } +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.h new file mode 100644 index 0000000000..b386f7c078 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/low_memory_render_pipeline.h @@ -0,0 +1,111 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_ +#define LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_ + +#include + +#include "lib/jxl/dec_group_border.h" +#include "lib/jxl/render_pipeline/render_pipeline.h" + +namespace jxl { + +// A multithreaded, low-memory rendering pipeline that only allocates a minimal +// amount of buffers. +class LowMemoryRenderPipeline final : public RenderPipeline { + private: + std::vector> PrepareBuffers( + size_t group_id, size_t thread_id) override; + + void PrepareForThreadsInternal(size_t num, bool use_group_ids) override; + + void ProcessBuffers(size_t group_id, size_t thread_id) override; + + void ClearDone(size_t i) override { group_border_assigner_.ClearDone(i); } + + void Init() override; + + void EnsureBordersStorage(); + size_t GroupInputXSize(size_t c) const; + size_t GroupInputYSize(size_t c) const; + void RenderRect(size_t thread_id, std::vector& input_data, + Rect data_max_color_channel_rect, + Rect image_max_color_channel_rect); + void RenderPadding(size_t thread_id, Rect rect); + + void SaveBorders(size_t group_id, size_t c, const ImageF& in); + void LoadBorders(size_t group_id, size_t c, const Rect& r, ImageF* out); + + std::pair ColorDimensionsToChannelDimensions( + std::pair in, size_t c, size_t stage) const; + + std::pair BorderToStore(size_t c) const; + + bool use_group_ids_; + + // Storage for borders between groups. Borders of adjacent groups are stacked + // together, e.g. bottom border of current group is followed by top border + // of next group. + std::vector borders_horizontal_; + std::vector borders_vertical_; + + // Manages the status of borders. + GroupBorderAssigner group_border_assigner_; + + // Size (in color-channel-pixels) of the border around each group that might + // be assigned to that group. + std::pair group_border_; + // base_color_shift_ defines the size of groups in terms of final image + // pixels. + size_t base_color_shift_; + + // Buffer for decoded pixel data for a group, indexed by [thread][channel] or + // [group][channel] depending on `use_group_ids_`. + std::vector> group_data_; + + // Borders for storing group data. + size_t group_data_x_border_; + size_t group_data_y_border_; + + // Buffers for intermediate rows for the various stages, indexed by + // [thread][channel][stage]. + std::vector>> stage_data_; + + // Buffers for out-of-frame data, indexed by [thread]; every row is a + // different channel. + std::vector out_of_frame_data_; + + // For each stage, a non-kIgnored channel. + std::vector anyc_; + + // Size of the image at each stage. + std::vector image_rect_; + + // For each stage, for each channel, keep track of the kInOut stage that + // produced the input to that stage (which corresponds to the buffer index + // containing the data). -1 if data comes from the original input. + std::vector> stage_input_for_channel_; + + // Number of (virtual) extra rows that must be processed at each stage + // to produce sufficient output for future stages. + std::vector virtual_ypadding_for_output_; + + // Same thing for columns, except these are real columns and not virtual ones. + std::vector xpadding_for_output_; + + // First stage that doesn't have any kInOut channel. + size_t first_trailing_stage_; + + // Origin and size of the frame after switching to image dimensions. + FrameOrigin frame_origin_; + size_t full_image_xsize_; + size_t full_image_ysize_; + size_t first_image_dim_stage_; +}; + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_LOW_MEMORY_RENDER_PIPELINE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.cc new file mode 100644 index 0000000000..68b6ef613f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.cc @@ -0,0 +1,132 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/render_pipeline.h" + +#include + +#include "lib/jxl/render_pipeline/low_memory_render_pipeline.h" +#include "lib/jxl/render_pipeline/simple_render_pipeline.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { + +void RenderPipeline::Builder::AddStage( + std::unique_ptr stage) { + stages_.push_back(std::move(stage)); +} + +std::unique_ptr RenderPipeline::Builder::Finalize( + FrameDimensions frame_dimensions) && { +#if JXL_ENABLE_ASSERT + // Check that the last stage is not an kInOut stage for any channel, and that + // there is at least one stage. + JXL_ASSERT(!stages_.empty()); + for (size_t c = 0; c < num_c_; c++) { + JXL_ASSERT(stages_.back()->GetChannelMode(c) != + RenderPipelineChannelMode::kInOut); + } +#endif + + std::unique_ptr res; + if (use_simple_implementation_) { + res = jxl::make_unique(); + } else { + res = jxl::make_unique(); + } + + res->padding_.resize(stages_.size()); + for (size_t i = stages_.size(); i-- > 0;) { + const auto& stage = stages_[i]; + res->padding_[i].resize(num_c_); + if (i + 1 == stages_.size()) { + continue; + } + for (size_t c = 0; c < num_c_; c++) { + if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) { + res->padding_[i][c].first = DivCeil(res->padding_[i + 1][c].first, + 1 << stage->settings_.shift_x) + + stage->settings_.border_x; + res->padding_[i][c].second = DivCeil(res->padding_[i + 1][c].second, + 1 << stage->settings_.shift_y) + + stage->settings_.border_y; + } else { + res->padding_[i][c] = res->padding_[i + 1][c]; + } + } + } + + res->frame_dimensions_ = frame_dimensions; + res->group_completed_passes_.resize(frame_dimensions.num_groups); + res->channel_shifts_.resize(stages_.size()); + res->channel_shifts_[0].resize(num_c_); + for (size_t i = 1; i < stages_.size(); i++) { + auto& stage = stages_[i - 1]; + for (size_t c = 0; c < num_c_; c++) { + if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) { + res->channel_shifts_[0][c].first += stage->settings_.shift_x; + res->channel_shifts_[0][c].second += stage->settings_.shift_y; + } + } + } + for (size_t i = 1; i < stages_.size(); i++) { + auto& stage = stages_[i - 1]; + res->channel_shifts_[i].resize(num_c_); + for (size_t c = 0; c < num_c_; c++) { + if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kInOut) { + res->channel_shifts_[i][c].first = + res->channel_shifts_[i - 1][c].first - stage->settings_.shift_x; + res->channel_shifts_[i][c].second = + res->channel_shifts_[i - 1][c].second - stage->settings_.shift_y; + } else { + res->channel_shifts_[i][c].first = res->channel_shifts_[i - 1][c].first; + res->channel_shifts_[i][c].second = + res->channel_shifts_[i - 1][c].second; + } + } + } + res->stages_ = std::move(stages_); + res->Init(); + return res; +} + +RenderPipelineInput RenderPipeline::GetInputBuffers(size_t group_id, + size_t thread_id) { + RenderPipelineInput ret; + JXL_DASSERT(group_id < group_completed_passes_.size()); + ret.group_id_ = group_id; + ret.thread_id_ = thread_id; + ret.pipeline_ = this; + ret.buffers_ = PrepareBuffers(group_id, thread_id); + return ret; +} + +void RenderPipeline::InputReady( + size_t group_id, size_t thread_id, + const std::vector>& buffers) { + JXL_DASSERT(group_id < group_completed_passes_.size()); + group_completed_passes_[group_id]++; + for (size_t i = 0; i < buffers.size(); ++i) { + (void)i; + JXL_CHECK_PLANE_INITIALIZED(*buffers[i].first, buffers[i].second, i); + } + + ProcessBuffers(group_id, thread_id); +} + +Status RenderPipeline::PrepareForThreads(size_t num, bool use_group_ids) { + for (const auto& stage : stages_) { + JXL_RETURN_IF_ERROR(stage->PrepareForThreads(num)); + } + PrepareForThreadsInternal(num, use_group_ids); + return true; +} + +void RenderPipelineInput::Done() { + JXL_ASSERT(pipeline_); + pipeline_->InputReady(group_id_, thread_id_, buffers_); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.h new file mode 100644 index 0000000000..bf3ad4975e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline.h @@ -0,0 +1,139 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_ +#define LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_ + +#include + +#include "lib/jxl/image.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Interface to provide input to the rendering pipeline. When this object is +// destroyed, all the data in the provided ImageF's Rects must have been +// initialized. +class RenderPipelineInput { + public: + RenderPipelineInput(const RenderPipelineInput&) = delete; + RenderPipelineInput(RenderPipelineInput&& other) noexcept { + *this = std::move(other); + } + RenderPipelineInput& operator=(RenderPipelineInput&& other) noexcept { + pipeline_ = other.pipeline_; + group_id_ = other.group_id_; + thread_id_ = other.thread_id_; + buffers_ = std::move(other.buffers_); + other.pipeline_ = nullptr; + return *this; + } + + RenderPipelineInput() = default; + void Done(); + + const std::pair& GetBuffer(size_t c) const { + JXL_ASSERT(c < buffers_.size()); + return buffers_[c]; + } + + private: + RenderPipeline* pipeline_ = nullptr; + size_t group_id_; + size_t thread_id_; + std::vector> buffers_; + friend class RenderPipeline; +}; + +class RenderPipeline { + public: + class Builder { + public: + explicit Builder(size_t num_c) : num_c_(num_c) { JXL_ASSERT(num_c > 0); } + + // Adds a stage to the pipeline. Must be called at least once; the last + // added stage cannot have kInOut channels. + void AddStage(std::unique_ptr stage); + + // Enables using the simple (i.e. non-memory-efficient) implementation of + // the pipeline. + void UseSimpleImplementation() { use_simple_implementation_ = true; } + + // Finalizes setup of the pipeline. Shifts for all channels should be 0 at + // this point. + std::unique_ptr Finalize( + FrameDimensions frame_dimensions) &&; + + private: + std::vector> stages_; + size_t num_c_; + bool use_simple_implementation_ = false; + }; + + friend class Builder; + + virtual ~RenderPipeline() = default; + + Status IsInitialized() const { + for (const auto& stage : stages_) { + JXL_RETURN_IF_ERROR(stage->IsInitialized()); + } + return true; + } + + // Allocates storage to run with `num` threads. If `use_group_ids` is true, + // storage is allocated for each group, not each thread. The behaviour is + // undefined if calling this function multiple times with a different value + // for `use_group_ids`. + Status PrepareForThreads(size_t num, bool use_group_ids); + + // Retrieves a buffer where input data should be stored by the callee. When + // input has been provided for all buffers, the pipeline will complete its + // processing. This method may be called multiple times concurrently from + // different threads, provided that a different `thread_id` is given. + RenderPipelineInput GetInputBuffers(size_t group_id, size_t thread_id); + + size_t PassesWithAllInput() const { + return *std::min_element(group_completed_passes_.begin(), + group_completed_passes_.end()); + } + + virtual void ClearDone(size_t i) {} + + protected: + std::vector> stages_; + // Shifts for every channel at the input of each stage. + std::vector>> channel_shifts_; + + // Amount of (cumulative) padding required by each stage and channel, in + // either direction. + std::vector>> padding_; + + FrameDimensions frame_dimensions_; + + std::vector group_completed_passes_; + + friend class RenderPipelineInput; + + private: + void InputReady(size_t group_id, size_t thread_id, + const std::vector>& buffers); + + virtual std::vector> PrepareBuffers( + size_t group_id, size_t thread_id) = 0; + + virtual void ProcessBuffers(size_t group_id, size_t thread_id) = 0; + + // Note that this method may be called multiple times with different (or + // equal) `num`. + virtual void PrepareForThreadsInternal(size_t num, bool use_group_ids) = 0; + + // Called once frame dimensions and stages are known. + virtual void Init() {} +}; + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_stage.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_stage.h new file mode 100644 index 0000000000..d1a0074161 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/render_pipeline_stage.h @@ -0,0 +1,171 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_ +#define LIB_JXL_RENDER_PIPELINE_RENDER_PIPELINE_STAGE_H_ + +#include + +#include "lib/jxl/base/arch_macros.h" +#include "lib/jxl/frame_header.h" + +namespace jxl { + +// The first pixel in the input to RenderPipelineStage will be located at +// this position. Pixels before this position may be accessed as padding. +// This should be at least the RoundUpTo(maximum padding / 2, maximum vector +// size) times 2: this is realized when using Gaborish + EPF + upsampling + +// chroma subsampling. +#if JXL_ARCH_ARM +constexpr size_t kRenderPipelineXOffset = 16; +#else +constexpr size_t kRenderPipelineXOffset = 32; +#endif + +enum class RenderPipelineChannelMode { + // This channel is not modified by this stage. + kIgnored = 0, + // This channel is modified in-place. + kInPlace = 1, + // This channel is modified and written to a new buffer. + kInOut = 2, + // This channel is only read. These are the only stages that are assumed to + // have observable effects, i.e. calls to ProcessRow for other stages may be + // omitted if it can be shown they can't affect any kInput stage ProcessRow + // call that happens inside image boundaries. + kInput = 3, +}; + +class RenderPipeline; + +class RenderPipelineStage { + protected: + using Row = float*; + using ChannelRows = std::vector; + + public: + using RowInfo = std::vector; + struct Settings { + // Amount of padding required in the various directions by all channels + // that have kInOut mode. + size_t border_x = 0; + size_t border_y = 0; + + // Log2 of the number of columns/rows of output that this stage will produce + // for every input row for kInOut channels. + size_t shift_x = 0; + size_t shift_y = 0; + + static Settings ShiftX(size_t shift, size_t border) { + Settings settings; + settings.border_x = border; + settings.shift_x = shift; + return settings; + } + + static Settings ShiftY(size_t shift, size_t border) { + Settings settings; + settings.border_y = border; + settings.shift_y = shift; + return settings; + } + + static Settings Symmetric(size_t shift, size_t border) { + Settings settings; + settings.border_x = settings.border_y = border; + settings.shift_x = settings.shift_y = shift; + return settings; + } + + static Settings SymmetricBorderOnly(size_t border) { + return Symmetric(0, border); + } + }; + + virtual ~RenderPipelineStage() = default; + + // Processes one row of input, producing the appropriate number of rows of + // output. Input/output rows can be obtained by calls to + // `GetInputRow`/`GetOutputRow`. `xsize+2*xextra` represents the total number + // of pixels to be processed in the input row, where the first pixel is at + // position `kRenderPipelineXOffset-xextra`. All pixels in the + // `[kRenderPipelineXOffset-xextra-border_x, + // kRenderPipelineXOffset+xsize+xextra+border_x)` range are initialized and + // accessible. `xpos` and `ypos` represent the position of the first + // (non-extra, i.e. in position kRenderPipelineXOffset) pixel in the center + // row of the input in the full image. `xpos` is a multiple of + // `GroupBorderAssigner::kPaddingXRound`. If `settings_.temp_buffer_size` is + // nonzero, `temp` will point to an HWY-aligned buffer of at least that number + // of floats; concurrent calls will have different buffers. + virtual void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const = 0; + + // How each channel will be processed. Channels are numbered starting from + // color channels (always 3) and followed by all other channels. + virtual RenderPipelineChannelMode GetChannelMode(size_t c) const = 0; + + protected: + explicit RenderPipelineStage(Settings settings) : settings_(settings) {} + + virtual Status IsInitialized() const { return true; } + + // Informs the stage about the total size of each channel. Few stages will + // actually need to use this information. + virtual void SetInputSizes( + const std::vector>& input_sizes) {} + + virtual Status PrepareForThreads(size_t num_threads) { return true; } + + // Returns a pointer to the input row of channel `c` with offset `y`. + // `y` must be in [-settings_.border_y, settings_.border_y]. `c` must be such + // that `GetChannelMode(c) != kIgnored`. The returned pointer points to the + // offset-ed row (i.e. kRenderPipelineXOffset has been applied). + float* GetInputRow(const RowInfo& input_rows, size_t c, int offset) const { + JXL_DASSERT(GetChannelMode(c) != RenderPipelineChannelMode::kIgnored); + JXL_DASSERT(-offset <= static_cast(settings_.border_y)); + JXL_DASSERT(offset <= static_cast(settings_.border_y)); + return input_rows[c][settings_.border_y + offset] + kRenderPipelineXOffset; + } + // Similar to `GetInputRow`, but can only be used if `GetChannelMode(c) == + // kInOut`. Offset must be less than `1< +#include + +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/jxl/dec_frame.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/fake_parallel_runner_testonly.h" +#include "lib/jxl/icc_codec.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/jpeg/enc_jpeg_data.h" +#include "lib/jxl/render_pipeline/test_render_pipeline_stages.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +Status DecodeFile(const Span file, bool use_slow_pipeline, + CodecInOut* io, ThreadPool* pool) { + Status ret = true; + { + BitReader reader(file); + BitReaderScopedCloser reader_closer(&reader, &ret); + JXL_RETURN_IF_ERROR(reader.ReadFixedBits<16>() == 0x0AFF); + JXL_RETURN_IF_ERROR(ReadSizeHeader(&reader, &io->metadata.size)); + JXL_RETURN_IF_ERROR(ReadImageMetadata(&reader, &io->metadata.m)); + io->metadata.transform_data.nonserialized_xyb_encoded = + io->metadata.m.xyb_encoded; + JXL_RETURN_IF_ERROR(Bundle::Read(&reader, &io->metadata.transform_data)); + if (io->metadata.m.color_encoding.WantICC()) { + PaddedBytes icc; + JXL_RETURN_IF_ERROR(ReadICC(&reader, &icc)); + JXL_RETURN_IF_ERROR( + io->metadata.m.color_encoding.SetICC(std::move(icc), &GetJxlCms())); + } + PassesDecoderState dec_state; + JXL_RETURN_IF_ERROR( + dec_state.output_encoding_info.SetFromMetadata(io->metadata)); + JXL_RETURN_IF_ERROR(reader.JumpToByteBoundary()); + io->frames.clear(); + do { + io->frames.emplace_back(&io->metadata.m); + // Skip frames that are not displayed. + do { + size_t frame_start = reader.TotalBitsConsumed() / kBitsPerByte; + size_t size_left = file.size() - frame_start; + JXL_RETURN_IF_ERROR( + DecodeFrame(&dec_state, pool, file.data() + frame_start, size_left, + &io->frames.back(), io->metadata, use_slow_pipeline)); + reader.SkipBits(io->frames.back().decoded_bytes() * kBitsPerByte); + } while (dec_state.shared->frame_header.frame_type != + FrameType::kRegularFrame && + dec_state.shared->frame_header.frame_type != + FrameType::kSkipProgressive); + } while (!dec_state.shared->frame_header.is_last); + + if (io->frames.empty()) return JXL_FAILURE("Not enough data."); + + if (reader.TotalBitsConsumed() != file.size() * kBitsPerByte) { + return JXL_FAILURE("Reader position not at EOF."); + } + if (!reader.AllReadsWithinBounds()) { + return JXL_FAILURE("Reader out of bounds read."); + } + io->CheckMetadata(); + // reader is closed here. + } + return ret; +} + +TEST(RenderPipelineTest, Build) { + RenderPipeline::Builder builder(/*num_c=*/1); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.UseSimpleImplementation(); + FrameDimensions frame_dimensions; + frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0, + /*max_hshift=*/0, /*max_vshift=*/0, + /*modular_mode=*/false, /*upsampling=*/1); + std::move(builder).Finalize(frame_dimensions); +} + +TEST(RenderPipelineTest, CallAllGroups) { + RenderPipeline::Builder builder(/*num_c=*/1); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.UseSimpleImplementation(); + FrameDimensions frame_dimensions; + frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0, + /*max_hshift=*/0, /*max_vshift=*/0, + /*modular_mode=*/false, /*upsampling=*/1); + auto pipeline = std::move(builder).Finalize(frame_dimensions); + ASSERT_TRUE(pipeline->PrepareForThreads(1, /*use_group_ids=*/false)); + + for (size_t i = 0; i < frame_dimensions.num_groups; i++) { + auto input_buffers = pipeline->GetInputBuffers(i, 0); + FillPlane(0.0f, input_buffers.GetBuffer(0).first, + input_buffers.GetBuffer(0).second); + input_buffers.Done(); + } + + EXPECT_EQ(pipeline->PassesWithAllInput(), 1); +} + +TEST(RenderPipelineTest, BuildFast) { + RenderPipeline::Builder builder(/*num_c=*/1); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + FrameDimensions frame_dimensions; + frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0, + /*max_hshift=*/0, /*max_vshift=*/0, + /*modular_mode=*/false, /*upsampling=*/1); + std::move(builder).Finalize(frame_dimensions); +} + +TEST(RenderPipelineTest, CallAllGroupsFast) { + RenderPipeline::Builder builder(/*num_c=*/1); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.AddStage(jxl::make_unique()); + builder.UseSimpleImplementation(); + FrameDimensions frame_dimensions; + frame_dimensions.Set(/*xsize=*/1024, /*ysize=*/1024, /*group_size_shift=*/0, + /*max_hshift=*/0, /*max_vshift=*/0, + /*modular_mode=*/false, /*upsampling=*/1); + auto pipeline = std::move(builder).Finalize(frame_dimensions); + ASSERT_TRUE(pipeline->PrepareForThreads(1, /*use_group_ids=*/false)); + + for (size_t i = 0; i < frame_dimensions.num_groups; i++) { + auto input_buffers = pipeline->GetInputBuffers(i, 0); + FillPlane(0.0f, input_buffers.GetBuffer(0).first, + input_buffers.GetBuffer(0).second); + input_buffers.Done(); + } + + EXPECT_EQ(pipeline->PassesWithAllInput(), 1); +} + +struct RenderPipelineTestInputSettings { + // Input image. + std::string input_path; + size_t xsize, ysize; + bool jpeg_transcode = false; + // Encoding settings. + CompressParams cparams; + // Short name for the encoder settings. + std::string cparams_descr; + + bool add_spot_color = false; + + Splines splines; +}; + +class RenderPipelineTestParam + : public ::testing::TestWithParam {}; + +TEST_P(RenderPipelineTestParam, PipelineTest) { + RenderPipelineTestInputSettings config = GetParam(); + + // Use a parallel runner that randomly shuffles tasks to detect possible + // border handling bugs. + FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8); + ThreadPool pool(&JxlFakeParallelRunner, &fake_pool); + const PaddedBytes orig = jxl::test::ReadTestData(config.input_path); + + CodecInOut io; + if (config.jpeg_transcode) { + ASSERT_TRUE(jpeg::DecodeImageJPG(Span(orig), &io)); + } else { + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + } + io.ShrinkTo(config.xsize, config.ysize); + + if (config.add_spot_color) { + jxl::ImageF spot(config.xsize, config.ysize); + jxl::ZeroFillImage(&spot); + + for (size_t y = 0; y < config.ysize; y++) { + float* JXL_RESTRICT row = spot.Row(y); + for (size_t x = 0; x < config.xsize; x++) { + row[x] = ((x ^ y) & 255) * (1.f / 255.f); + } + } + ExtraChannelInfo info; + info.bit_depth.bits_per_sample = 8; + info.dim_shift = 0; + info.type = jxl::ExtraChannel::kSpotColor; + info.spot_color[0] = 0.5f; + info.spot_color[1] = 0.2f; + info.spot_color[2] = 1.f; + info.spot_color[3] = 0.5f; + + io.metadata.m.extra_channel_info.push_back(info); + std::vector ec; + ec.push_back(std::move(spot)); + io.frames[0].SetExtraChannels(std::move(ec)); + } + + PaddedBytes compressed; + + PassesEncoderState enc_state; + enc_state.shared.image_features.splines = config.splines; + ASSERT_TRUE(EncodeFile(config.cparams, &io, &enc_state, &compressed, + GetJxlCms(), /*aux_out=*/nullptr, &pool)); + + CodecInOut io_default; + ASSERT_TRUE(DecodeFile(Span(compressed), + /*use_slow_pipeline=*/false, &io_default, &pool)); + CodecInOut io_slow_pipeline; + ASSERT_TRUE(DecodeFile(Span(compressed), + /*use_slow_pipeline=*/true, &io_slow_pipeline, &pool)); + + ASSERT_EQ(io_default.frames.size(), io_slow_pipeline.frames.size()); + for (size_t i = 0; i < io_default.frames.size(); i++) { +#if JXL_HIGH_PRECISION + constexpr float kMaxError = 1e-5; +#else + constexpr float kMaxError = 5e-4; +#endif + Image3F def = std::move(*io_default.frames[i].color()); + Image3F pip = std::move(*io_slow_pipeline.frames[i].color()); + JXL_ASSERT_OK(VerifyRelativeError(pip, def, kMaxError, kMaxError, _)); + for (size_t ec = 0; ec < io_default.frames[i].extra_channels().size(); + ec++) { + JXL_ASSERT_OK(VerifyRelativeError( + io_slow_pipeline.frames[i].extra_channels()[ec], + io_default.frames[i].extra_channels()[ec], kMaxError, kMaxError, _)); + } + } +} + +Splines CreateTestSplines() { + const ColorCorrelationMap cmap; + std::vector control_points{{9, 54}, {118, 159}, {97, 3}, + {10, 40}, {150, 25}, {120, 300}}; + const Spline spline{ + control_points, + /*color_dct=*/ + {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}}, + /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}}; + std::vector spline_data = {spline}; + std::vector quantized_splines; + std::vector starting_points; + for (const Spline& spline : spline_data) { + quantized_splines.emplace_back(spline, /*quantization_adjustment=*/0, + cmap.YtoXRatio(0), cmap.YtoBRatio(0)); + starting_points.push_back(spline.control_points.front()); + } + return Splines(/*quantization_adjustment=*/0, std::move(quantized_splines), + std::move(starting_points)); +} + +std::vector GeneratePipelineTests() { + std::vector all_tests; + + std::pair sizes[] = { + {3, 8}, {128, 128}, {256, 256}, {258, 258}, {533, 401}, {777, 777}, + }; + + for (auto size : sizes) { + RenderPipelineTestInputSettings settings; + settings.input_path = "jxl/flower/flower.png"; + settings.xsize = size.first; + settings.ysize = size.second; + + // Base settings. + settings.cparams.butteraugli_distance = 1.0; + settings.cparams.patches = Override::kOff; + settings.cparams.dots = Override::kOff; + settings.cparams.gaborish = Override::kOff; + settings.cparams.epf = 0; + settings.cparams.color_transform = ColorTransform::kXYB; + + { + auto s = settings; + s.cparams_descr = "NoGabNoEpfNoPatches"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.color_transform = ColorTransform::kNone; + s.cparams_descr = "NoGabNoEpfNoPatchesNoXYB"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.gaborish = Override::kOn; + s.cparams_descr = "GabNoEpfNoPatches"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.epf = 1; + s.cparams_descr = "NoGabEpf1NoPatches"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.epf = 2; + s.cparams_descr = "NoGabEpf2NoPatches"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.epf = 3; + s.cparams_descr = "NoGabEpf3NoPatches"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.gaborish = Override::kOn; + s.cparams.epf = 3; + s.cparams_descr = "GabEpf3NoPatches"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams_descr = "Splines"; + s.splines = CreateTestSplines(); + all_tests.push_back(s); + } + + for (size_t ups : {2, 4, 8}) { + { + auto s = settings; + s.cparams.resampling = ups; + s.cparams_descr = "Ups" + std::to_string(ups); + all_tests.push_back(s); + } + { + auto s = settings; + s.cparams.resampling = ups; + s.cparams.epf = 1; + s.cparams_descr = "Ups" + std::to_string(ups) + "EPF1"; + all_tests.push_back(s); + } + { + auto s = settings; + s.cparams.resampling = ups; + s.cparams.gaborish = Override::kOn; + s.cparams.epf = 1; + s.cparams_descr = "Ups" + std::to_string(ups) + "GabEPF1"; + all_tests.push_back(s); + } + } + + { + auto s = settings; + s.cparams_descr = "Noise"; + s.cparams.photon_noise_iso = 3200; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams_descr = "NoiseUps"; + s.cparams.photon_noise_iso = 3200; + s.cparams.resampling = 2; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams_descr = "ModularLossless"; + s.cparams.modular_mode = true; + s.cparams.butteraugli_distance = 0; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams_descr = "ProgressiveDC"; + s.cparams.progressive_dc = 1; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams_descr = "ModularLossy"; + s.cparams.modular_mode = true; + s.cparams.butteraugli_distance = 1.f; + all_tests.push_back(s); + } + + { + auto s = settings; + s.input_path = "jxl/flower/flower_alpha.png"; + s.cparams_descr = "AlphaVarDCT"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.input_path = "jxl/flower/flower_alpha.png"; + s.cparams_descr = "AlphaVarDCTUpsamplingEPF"; + s.cparams.epf = 1; + s.cparams.ec_resampling = 2; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams.modular_mode = true; + s.cparams.butteraugli_distance = 0; + s.input_path = "jxl/flower/flower_alpha.png"; + s.cparams_descr = "AlphaLossless"; + all_tests.push_back(s); + } + + { + auto s = settings; + s.input_path = "jxl/flower/flower_alpha.png"; + s.cparams_descr = "AlphaDownsample"; + s.cparams.ec_resampling = 2; + all_tests.push_back(s); + } + + { + auto s = settings; + s.cparams_descr = "SpotColor"; + s.add_spot_color = true; + all_tests.push_back(s); + } + } + +#if JPEGXL_ENABLE_TRANSCODE_JPEG + for (const char* input : {"jxl/flower/flower.png.im_q85_444.jpg", + "jxl/flower/flower.png.im_q85_420.jpg", + "jxl/flower/flower.png.im_q85_422.jpg", + "jxl/flower/flower.png.im_q85_440.jpg"}) { + RenderPipelineTestInputSettings settings; + settings.input_path = input; + settings.jpeg_transcode = true; + settings.xsize = 2268; + settings.ysize = 1512; + settings.cparams_descr = "Default"; + all_tests.push_back(settings); + } + +#endif + + { + RenderPipelineTestInputSettings settings; + settings.input_path = "jxl/grayscale_patches.png"; + settings.xsize = 1011; + settings.ysize = 277; + settings.cparams_descr = "Patches"; + all_tests.push_back(settings); + } + + { + RenderPipelineTestInputSettings settings; + settings.input_path = "jxl/grayscale_patches.png"; + settings.xsize = 1011; + settings.ysize = 277; + settings.cparams.photon_noise_iso = 1000; + settings.cparams_descr = "PatchesAndNoise"; + all_tests.push_back(settings); + } + + { + RenderPipelineTestInputSettings settings; + settings.input_path = "jxl/grayscale_patches.png"; + settings.xsize = 1011; + settings.ysize = 277; + settings.cparams.resampling = 2; + settings.cparams_descr = "PatchesAndUps2"; + all_tests.push_back(settings); + } + + return all_tests; +} + +std::ostream& operator<<(std::ostream& os, + const RenderPipelineTestInputSettings& c) { + std::string filename; + size_t pos = c.input_path.find_last_of('/'); + if (pos == std::string::npos) { + filename = c.input_path; + } else { + filename = c.input_path.substr(pos + 1); + } + std::replace_if( + filename.begin(), filename.end(), [](char c) { return !isalnum(c); }, + '_'); + os << filename << "_" << (c.jpeg_transcode ? "JPEG_" : "") << c.xsize << "x" + << c.ysize << "_" << c.cparams_descr; + return os; +} + +std::string PipelineTestDescription( + const testing::TestParamInfo& info) { + std::stringstream name; + name << info.param; + return name.str(); +} + +JXL_GTEST_INSTANTIATE_TEST_SUITE_P(RenderPipelineTest, RenderPipelineTestParam, + testing::ValuesIn(GeneratePipelineTests()), + PipelineTestDescription); + +TEST(RenderPipelineDecodingTest, Animation) { + FakeParallelRunner fake_pool(/*order_seed=*/123, /*num_threads=*/8); + ThreadPool pool(&JxlFakeParallelRunner, &fake_pool); + + PaddedBytes compressed = + jxl::test::ReadTestData("jxl/blending/cropped_traffic_light.jxl"); + + CodecInOut io_default; + ASSERT_TRUE(DecodeFile(Span(compressed), + /*use_slow_pipeline=*/false, &io_default, &pool)); + CodecInOut io_slow_pipeline; + ASSERT_TRUE(DecodeFile(Span(compressed), + /*use_slow_pipeline=*/true, &io_slow_pipeline, &pool)); + + ASSERT_EQ(io_default.frames.size(), io_slow_pipeline.frames.size()); + for (size_t i = 0; i < io_default.frames.size(); i++) { +#if JXL_HIGH_PRECISION + constexpr float kMaxError = 1e-5; +#else + constexpr float kMaxError = 1e-4; +#endif + + Image3F fast_pipeline = std::move(*io_default.frames[i].color()); + Image3F slow_pipeline = std::move(*io_slow_pipeline.frames[i].color()); + JXL_ASSERT_OK(VerifyRelativeError(slow_pipeline, fast_pipeline, kMaxError, + kMaxError, _)) + for (size_t ec = 0; ec < io_default.frames[i].extra_channels().size(); + ec++) { + JXL_ASSERT_OK(VerifyRelativeError( + io_slow_pipeline.frames[i].extra_channels()[ec], + io_default.frames[i].extra_channels()[ec], kMaxError, kMaxError, _)); + } + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.cc new file mode 100644 index 0000000000..4495288860 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.cc @@ -0,0 +1,266 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/simple_render_pipeline.h" + +#include + +#include "lib/jxl/image_ops.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" +#include "lib/jxl/sanitizers.h" + +namespace jxl { + +void SimpleRenderPipeline::PrepareForThreadsInternal(size_t num, + bool use_group_ids) { + if (!channel_data_.empty()) { + return; + } + auto ch_size = [](size_t frame_size, size_t shift) { + return DivCeil(frame_size, 1 << shift) + kRenderPipelineXOffset * 2; + }; + for (size_t c = 0; c < channel_shifts_[0].size(); c++) { + channel_data_.push_back(ImageF( + ch_size(frame_dimensions_.xsize_upsampled, channel_shifts_[0][c].first), + ch_size(frame_dimensions_.ysize_upsampled, + channel_shifts_[0][c].second))); + msan::PoisonImage(channel_data_.back()); + } +} + +Rect SimpleRenderPipeline::MakeChannelRect(size_t group_id, size_t channel) { + size_t base_color_shift = + CeilLog2Nonzero(frame_dimensions_.xsize_upsampled_padded / + frame_dimensions_.xsize_padded); + + const size_t gx = group_id % frame_dimensions_.xsize_groups; + const size_t gy = group_id / frame_dimensions_.xsize_groups; + size_t xgroupdim = (frame_dimensions_.group_dim << base_color_shift) >> + channel_shifts_[0][channel].first; + size_t ygroupdim = (frame_dimensions_.group_dim << base_color_shift) >> + channel_shifts_[0][channel].second; + return Rect( + kRenderPipelineXOffset + gx * xgroupdim, + kRenderPipelineXOffset + gy * ygroupdim, xgroupdim, ygroupdim, + kRenderPipelineXOffset + DivCeil(frame_dimensions_.xsize_upsampled, + 1 << channel_shifts_[0][channel].first), + kRenderPipelineXOffset + + DivCeil(frame_dimensions_.ysize_upsampled, + 1 << channel_shifts_[0][channel].second)); +} + +std::vector> SimpleRenderPipeline::PrepareBuffers( + size_t group_id, size_t thread_id) { + std::vector> ret; + for (size_t c = 0; c < channel_data_.size(); c++) { + ret.emplace_back(&channel_data_[c], MakeChannelRect(group_id, c)); + } + return ret; +} + +void SimpleRenderPipeline::ProcessBuffers(size_t group_id, size_t thread_id) { + for (size_t c = 0; c < channel_data_.size(); c++) { + Rect r = MakeChannelRect(group_id, c); + (void)r; + JXL_CHECK_PLANE_INITIALIZED(channel_data_[c], r, c); + } + + if (PassesWithAllInput() <= processed_passes_) return; + processed_passes_++; + + for (size_t stage_id = 0; stage_id < stages_.size(); stage_id++) { + const auto& stage = stages_[stage_id]; + // Prepare buffers for kInOut channels. + std::vector new_channels(channel_data_.size()); + std::vector output_channels(channel_data_.size()); + + std::vector> input_sizes(channel_data_.size()); + for (size_t c = 0; c < channel_data_.size(); c++) { + input_sizes[c] = + std::make_pair(channel_data_[c].xsize() - kRenderPipelineXOffset * 2, + channel_data_[c].ysize() - kRenderPipelineXOffset * 2); + } + + for (size_t c = 0; c < channel_data_.size(); c++) { + if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) { + continue; + } + // Ensure that the newly allocated channels are large enough to avoid + // problems with padding. + new_channels[c] = + ImageF(frame_dimensions_.xsize_upsampled_padded + + kRenderPipelineXOffset * 2 + hwy::kMaxVectorSize * 8, + frame_dimensions_.ysize_upsampled_padded + + kRenderPipelineXOffset * 2); + new_channels[c].ShrinkTo( + (input_sizes[c].first << stage->settings_.shift_x) + + kRenderPipelineXOffset * 2, + (input_sizes[c].second << stage->settings_.shift_y) + + kRenderPipelineXOffset * 2); + output_channels[c] = &new_channels[c]; + } + + auto get_row = [&](size_t c, int64_t y) { + return channel_data_[c].Row(kRenderPipelineXOffset + y) + + kRenderPipelineXOffset; + }; + + // Add mirrored pixes to all kInOut channels. + for (size_t c = 0; c < channel_data_.size(); c++) { + if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) { + continue; + } + // Horizontal mirroring. + for (size_t y = 0; y < input_sizes[c].second; y++) { + float* row = get_row(c, y); + for (size_t ix = 0; ix < stage->settings_.border_x; ix++) { + *(row - ix - 1) = row[Mirror(-ssize_t(ix) - 1, input_sizes[c].first)]; + } + for (size_t ix = 0; ix < stage->settings_.border_x; ix++) { + *(row + ix + input_sizes[c].first) = + row[Mirror(ix + input_sizes[c].first, input_sizes[c].first)]; + } + } + // Vertical mirroring. + for (int y = 0; y < static_cast(stage->settings_.border_y); y++) { + memcpy(get_row(c, -y - 1) - stage->settings_.border_x, + get_row(c, Mirror(-ssize_t(y) - 1, input_sizes[c].second)) - + stage->settings_.border_x, + sizeof(float) * + (input_sizes[c].first + 2 * stage->settings_.border_x)); + } + for (int y = 0; y < static_cast(stage->settings_.border_y); y++) { + memcpy( + get_row(c, input_sizes[c].second + y) - stage->settings_.border_x, + get_row(c, + Mirror(input_sizes[c].second + y, input_sizes[c].second)) - + stage->settings_.border_x, + sizeof(float) * + (input_sizes[c].first + 2 * stage->settings_.border_x)); + } + } + + size_t ysize = 0; + size_t xsize = 0; + for (size_t c = 0; c < channel_data_.size(); c++) { + if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kIgnored) { + continue; + } + ysize = std::max(input_sizes[c].second, ysize); + xsize = std::max(input_sizes[c].first, xsize); + } + + JXL_ASSERT(ysize != 0); + JXL_ASSERT(xsize != 0); + + RenderPipelineStage::RowInfo input_rows(channel_data_.size()); + RenderPipelineStage::RowInfo output_rows(channel_data_.size()); + + // Run the pipeline. + { + stage->SetInputSizes(input_sizes); + int border_y = stage->settings_.border_y; + for (size_t y = 0; y < ysize; y++) { + // Prepare input rows. + for (size_t c = 0; c < channel_data_.size(); c++) { + if (stage->GetChannelMode(c) == RenderPipelineChannelMode::kIgnored) { + continue; + } + input_rows[c].resize(2 * border_y + 1); + for (int iy = -border_y; iy <= border_y; iy++) { + input_rows[c][iy + border_y] = + channel_data_[c].Row(y + kRenderPipelineXOffset + iy); + } + } + // Prepare output rows. + for (size_t c = 0; c < channel_data_.size(); c++) { + if (!output_channels[c]) continue; + output_rows[c].resize(1 << stage->settings_.shift_y); + for (size_t iy = 0; iy < output_rows[c].size(); iy++) { + output_rows[c][iy] = output_channels[c]->Row( + (y << stage->settings_.shift_y) + iy + kRenderPipelineXOffset); + } + } + stage->ProcessRow(input_rows, output_rows, /*xextra=*/0, xsize, + /*xpos=*/0, y, thread_id); + } + } + + // Move new channels to current channels. + for (size_t c = 0; c < channel_data_.size(); c++) { + if (stage->GetChannelMode(c) != RenderPipelineChannelMode::kInOut) { + continue; + } + channel_data_[c] = std::move(new_channels[c]); + } + for (size_t c = 0; c < channel_data_.size(); c++) { + size_t next_stage = std::min(stage_id + 1, channel_shifts_.size() - 1); + size_t xsize = DivCeil(frame_dimensions_.xsize_upsampled, + 1 << channel_shifts_[next_stage][c].first); + size_t ysize = DivCeil(frame_dimensions_.ysize_upsampled, + 1 << channel_shifts_[next_stage][c].second); + channel_data_[c].ShrinkTo(xsize + 2 * kRenderPipelineXOffset, + ysize + 2 * kRenderPipelineXOffset); + JXL_CHECK_PLANE_INITIALIZED( + channel_data_[c], + Rect(kRenderPipelineXOffset, kRenderPipelineXOffset, xsize, ysize), + c); + } + + if (stage->SwitchToImageDimensions()) { + size_t image_xsize, image_ysize; + FrameOrigin frame_origin; + stage->GetImageDimensions(&image_xsize, &image_ysize, &frame_origin); + frame_dimensions_.Set(image_xsize, image_ysize, 0, 0, 0, false, 1); + std::vector old_channels = std::move(channel_data_); + channel_data_.clear(); + channel_data_.reserve(old_channels.size()); + for (size_t c = 0; c < old_channels.size(); c++) { + channel_data_.emplace_back(2 * kRenderPipelineXOffset + image_xsize, + 2 * kRenderPipelineXOffset + image_ysize); + } + for (size_t y = 0; y < image_ysize; ++y) { + for (size_t c = 0; c < channel_data_.size(); c++) { + output_rows[c].resize(1); + output_rows[c][0] = channel_data_[c].Row(kRenderPipelineXOffset + y); + } + // TODO(sboukortt): consider doing this only on the parts of the + // background that won't be occluded. + stage->ProcessPaddingRow(output_rows, image_xsize, 0, y); + } + ssize_t x0 = frame_origin.x0; + ssize_t y0 = frame_origin.y0; + size_t x0_fg = 0; + size_t y0_fg = 0; + if (x0 < 0) { + xsize += x0; + x0_fg -= x0; + x0 = 0; + } + if (x0 + xsize > image_xsize) { + xsize = image_xsize - x0; + } + if (y0 < 0) { + ysize += y0; + y0_fg -= x0; + y0 = 0; + } + if (y0 + ysize > image_ysize) { + ysize = image_ysize - y0; + } + const Rect rect_fg_relative_to_image = + Rect(x0, y0, xsize, ysize) + .Translate(kRenderPipelineXOffset, kRenderPipelineXOffset); + const Rect rect_fg = + Rect(x0_fg, y0_fg, xsize, ysize) + .Translate(kRenderPipelineXOffset, kRenderPipelineXOffset); + for (size_t c = 0; c < channel_data_.size(); c++) { + CopyImageTo(rect_fg, old_channels[c], rect_fg_relative_to_image, + &channel_data_[c]); + } + } + } +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.h new file mode 100644 index 0000000000..10f4505912 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/simple_render_pipeline.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_ +#define LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_ + +#include + +#include "lib/jxl/render_pipeline/render_pipeline.h" + +namespace jxl { + +// A RenderPipeline that is "obviously correct"; it may use potentially large +// amounts of memory and be slow. It is intended to be used mostly for testing +// purposes. +class SimpleRenderPipeline : public RenderPipeline { + std::vector> PrepareBuffers( + size_t group_id, size_t thread_id) override; + + void ProcessBuffers(size_t group_id, size_t thread_id) override; + + void PrepareForThreadsInternal(size_t num, bool use_group_ids) override; + + // Full frame buffers. Both X and Y dimensions are padded by + // kRenderPipelineXOffset. + std::vector channel_data_; + size_t processed_passes_ = 0; + + private: + Rect MakeChannelRect(size_t group_id, size_t channel); +}; + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_SIMPLE_RENDER_PIPELINE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.cc new file mode 100644 index 0000000000..a66a60daec --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.cc @@ -0,0 +1,247 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_blending.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_blending.cc" +#include +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/blending.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +class BlendingStage : public RenderPipelineStage { + public: + explicit BlendingStage(const PassesDecoderState* dec_state, + const ColorEncoding& frame_color_encoding) + : RenderPipelineStage(RenderPipelineStage::Settings()), + state_(*dec_state->shared) { + image_xsize_ = state_.frame_header.nonserialized_metadata->xsize(); + image_ysize_ = state_.frame_header.nonserialized_metadata->ysize(); + extra_channel_info_ = + &state_.frame_header.nonserialized_metadata->m.extra_channel_info; + info_ = state_.frame_header.blending_info; + const std::vector& ec_info = + state_.frame_header.extra_channel_blending_info; + const ImageBundle& bg = state_.reference_frames[info_.source].frame; + bg_ = &bg; + if (bg.xsize() == 0 || bg.ysize() == 0) { + zeroes_.resize(image_xsize_, 0.f); + } else if (state_.reference_frames[info_.source].ib_is_in_xyb) { + initialized_ = JXL_FAILURE( + "Trying to blend XYB reference frame %i and non-XYB frame", + info_.source); + return; + } else if (std::any_of(ec_info.begin(), ec_info.end(), + [this](const BlendingInfo& info) { + const ImageBundle& bg = + state_.reference_frames[info.source].frame; + return bg.xsize() == 0 || bg.ysize() == 0; + })) { + zeroes_.resize(image_xsize_, 0.f); + } + + auto verify_bg_size = [&](const ImageBundle& bg) -> Status { + if (bg.xsize() != 0 && bg.ysize() != 0 && + (bg.xsize() < image_xsize_ || bg.ysize() < image_ysize_ || + bg.origin.x0 != 0 || bg.origin.y0 != 0)) { + return JXL_FAILURE("Trying to use a %" PRIuS "x%" PRIuS + " crop as a background", + bg.xsize(), bg.ysize()); + } + return true; + }; + + Status ok = verify_bg_size(bg); + for (const auto& info : ec_info) { + const ImageBundle& bg = state_.reference_frames[info.source].frame; + if (!!ok) ok = verify_bg_size(bg); + } + if (!ok) { + initialized_ = ok; + return; + } + + if (state_.metadata->m.xyb_encoded) { + if (!dec_state->output_encoding_info.color_encoding_is_original) { + initialized_ = JXL_FAILURE("Blending in unsupported color space"); + return; + } + } + + blending_info_.resize(ec_info.size() + 1); + auto make_blending = [&](const BlendingInfo& info, PatchBlending* pb) { + pb->alpha_channel = info.alpha_channel; + pb->clamp = info.clamp; + switch (info.mode) { + case BlendMode::kReplace: { + pb->mode = PatchBlendMode::kReplace; + break; + } + case BlendMode::kAdd: { + pb->mode = PatchBlendMode::kAdd; + break; + } + case BlendMode::kMul: { + pb->mode = PatchBlendMode::kMul; + break; + } + case BlendMode::kBlend: { + pb->mode = PatchBlendMode::kBlendAbove; + break; + } + case BlendMode::kAlphaWeightedAdd: { + pb->mode = PatchBlendMode::kAlphaWeightedAddAbove; + break; + } + default: { + JXL_UNREACHABLE( + "Invalid blend mode"); // should have failed to decode + } + } + }; + make_blending(info_, &blending_info_[0]); + for (size_t i = 0; i < ec_info.size(); i++) { + make_blending(ec_info[i], &blending_info_[1 + i]); + } + } + + Status IsInitialized() const override { return initialized_; } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + JXL_ASSERT(initialized_); + const FrameOrigin& frame_origin = state_.frame_header.frame_origin; + ssize_t bg_xpos = frame_origin.x0 + static_cast(xpos); + ssize_t bg_ypos = frame_origin.y0 + static_cast(ypos); + int offset = 0; + if (bg_xpos + static_cast(xsize) <= 0 || + frame_origin.x0 >= static_cast(image_xsize_) || bg_ypos < 0 || + bg_ypos >= static_cast(image_ysize_)) { + return; + } + if (bg_xpos < 0) { + offset -= bg_xpos; + xsize += bg_xpos; + bg_xpos = 0; + } + if (bg_xpos + xsize > image_xsize_) { + xsize = + std::max(0, static_cast(image_xsize_) - bg_xpos); + } + std::vector bg_row_ptrs_(input_rows.size()); + std::vector fg_row_ptrs_(input_rows.size()); + size_t num_c = std::min(input_rows.size(), extra_channel_info_->size() + 3); + for (size_t c = 0; c < num_c; ++c) { + fg_row_ptrs_[c] = GetInputRow(input_rows, c, 0) + offset; + if (c < 3) { + bg_row_ptrs_[c] = bg_->xsize() != 0 && bg_->ysize() != 0 + ? bg_->color().ConstPlaneRow(c, bg_ypos) + bg_xpos + : zeroes_.data(); + } else { + const ImageBundle& ec_bg = + state_ + .reference_frames[state_.frame_header + .extra_channel_blending_info[c - 3] + .source] + .frame; + bg_row_ptrs_[c] = + ec_bg.xsize() != 0 && ec_bg.ysize() != 0 + ? ec_bg.extra_channels()[c - 3].ConstRow(bg_ypos) + bg_xpos + : zeroes_.data(); + } + } + PerformBlending(bg_row_ptrs_.data(), fg_row_ptrs_.data(), + fg_row_ptrs_.data(), 0, xsize, blending_info_[0], + blending_info_.data() + 1, *extra_channel_info_); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return RenderPipelineChannelMode::kInPlace; + } + + bool SwitchToImageDimensions() const override { return true; } + + void GetImageDimensions(size_t* xsize, size_t* ysize, + FrameOrigin* frame_origin) const override { + *xsize = image_xsize_; + *ysize = image_ysize_; + *frame_origin = state_.frame_header.frame_origin; + } + + void ProcessPaddingRow(const RowInfo& output_rows, size_t xsize, size_t xpos, + size_t ypos) const override { + if (bg_->xsize() == 0 || bg_->ysize() == 0) { + for (size_t c = 0; c < 3; ++c) { + memset(GetInputRow(output_rows, c, 0), 0, xsize * sizeof(float)); + } + } else { + for (size_t c = 0; c < 3; ++c) { + memcpy(GetInputRow(output_rows, c, 0), + bg_->color().ConstPlaneRow(c, ypos) + xpos, + xsize * sizeof(float)); + } + } + for (size_t ec = 0; ec < extra_channel_info_->size(); ++ec) { + const ImageBundle& ec_bg = + state_ + .reference_frames + [state_.frame_header.extra_channel_blending_info[ec].source] + .frame; + if (ec_bg.xsize() == 0 || ec_bg.ysize() == 0) { + memset(GetInputRow(output_rows, 3 + ec, 0), 0, xsize * sizeof(float)); + } else { + memcpy(GetInputRow(output_rows, 3 + ec, 0), + ec_bg.extra_channels()[ec].ConstRow(ypos) + xpos, + xsize * sizeof(float)); + } + } + } + + const char* GetName() const override { return "Blending"; } + + private: + const PassesSharedState& state_; + BlendingInfo info_; + const ImageBundle* bg_; + Status initialized_ = true; + size_t image_xsize_; + size_t image_ysize_; + std::vector blending_info_; + const std::vector* extra_channel_info_; + std::vector zeroes_; +}; + +std::unique_ptr GetBlendingStage( + const PassesDecoderState* dec_state, + const ColorEncoding& frame_color_encoding) { + return jxl::make_unique(dec_state, frame_color_encoding); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetBlendingStage); + +std::unique_ptr GetBlendingStage( + const PassesDecoderState* dec_state, + const ColorEncoding& frame_color_encoding) { + return HWY_DYNAMIC_DISPATCH(GetBlendingStage)(dec_state, + frame_color_encoding); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.h new file mode 100644 index 0000000000..c8db7490cd --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_blending.h @@ -0,0 +1,24 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_ + +#include + +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" +#include "lib/jxl/splines.h" + +namespace jxl { + +// Applies blending if applicable. +std::unique_ptr GetBlendingStage( + const PassesDecoderState* dec_state, + const ColorEncoding& frame_color_encoding); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_BLENDING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc new file mode 100644 index 0000000000..936fbd3a44 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.cc @@ -0,0 +1,127 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_chroma_upsampling.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_chroma_upsampling.cc" +#include +#include + +#include "lib/jxl/simd_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; + +class HorizontalChromaUpsamplingStage : public RenderPipelineStage { + public: + explicit HorizontalChromaUpsamplingStage(size_t channel) + : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX( + /*shift=*/1, /*border=*/1)), + c_(channel) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + HWY_FULL(float) df; + xextra = RoundUpTo(xextra, Lanes(df)); + auto threefour = Set(df, 0.75f); + auto onefour = Set(df, 0.25f); + const float* row_in = GetInputRow(input_rows, c_, 0); + float* row_out = GetOutputRow(output_rows, c_, 0); + for (ssize_t x = -xextra; x < static_cast(xsize + xextra); + x += Lanes(df)) { + auto current = Mul(LoadU(df, row_in + x), threefour); + auto prev = LoadU(df, row_in + x - 1); + auto next = LoadU(df, row_in + x + 1); + auto left = MulAdd(onefour, prev, current); + auto right = MulAdd(onefour, next, current); + StoreInterleaved(df, left, right, row_out + x * 2); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c == c_ ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "HChromaUps"; } + + private: + size_t c_; +}; + +class VerticalChromaUpsamplingStage : public RenderPipelineStage { + public: + explicit VerticalChromaUpsamplingStage(size_t channel) + : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY( + /*shift=*/1, /*border=*/1)), + c_(channel) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + HWY_FULL(float) df; + xextra = RoundUpTo(xextra, Lanes(df)); + auto threefour = Set(df, 0.75f); + auto onefour = Set(df, 0.25f); + const float* row_top = GetInputRow(input_rows, c_, -1); + const float* row_mid = GetInputRow(input_rows, c_, 0); + const float* row_bot = GetInputRow(input_rows, c_, 1); + float* row_out0 = GetOutputRow(output_rows, c_, 0); + float* row_out1 = GetOutputRow(output_rows, c_, 1); + for (ssize_t x = -xextra; x < static_cast(xsize + xextra); + x += Lanes(df)) { + auto it = LoadU(df, row_top + x); + auto im = LoadU(df, row_mid + x); + auto ib = LoadU(df, row_bot + x); + auto im_scaled = Mul(im, threefour); + Store(MulAdd(it, onefour, im_scaled), df, row_out0 + x); + Store(MulAdd(ib, onefour, im_scaled), df, row_out1 + x); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c == c_ ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "VChromaUps"; } + + private: + size_t c_; +}; + +std::unique_ptr GetChromaUpsamplingStage(size_t channel, + bool horizontal) { + if (horizontal) { + return jxl::make_unique(channel); + } else { + return jxl::make_unique(channel); + } +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetChromaUpsamplingStage); + +std::unique_ptr GetChromaUpsamplingStage(size_t channel, + bool horizontal) { + return HWY_DYNAMIC_DISPATCH(GetChromaUpsamplingStage)(channel, horizontal); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.h new file mode 100644 index 0000000000..b8bfc15f5f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_chroma_upsampling.h @@ -0,0 +1,27 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Applies simple upsampling, either horizontal or vertical, to the given +// channel. +std::unique_ptr GetChromaUpsamplingStage(size_t channel, + bool horizontal); +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_CHROMA_UPSAMPLING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.cc new file mode 100644 index 0000000000..a75652db0e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.cc @@ -0,0 +1,524 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_epf.h" + +#include "lib/jxl/epf.h" +#include "lib/jxl/sanitizers.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_epf.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +// TODO(veluca): In principle, vectors could be not capped, if we want to deal +// with having two different sigma values in a single vector. +using DF = HWY_CAPPED(float, 8); + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::AbsDiff; +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::VFromD; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +JXL_INLINE Vec Weight(Vec sad, Vec inv_sigma, Vec thres) { + auto v = MulAdd(sad, inv_sigma, Set(DF(), 1.0f)); + return ZeroIfNegative(v); +} + +// 5x5 plus-shaped kernel with 5 SADs per pixel (3x3 plus-shaped). So this makes +// this filter a 7x7 filter. +class EPF0Stage : public RenderPipelineStage { + public: + EPF0Stage(const LoopFilter& lf, const ImageF& sigma) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/0, /*border=*/3)), + lf_(lf), + sigma_(&sigma) {} + + template + JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][7], ssize_t x, + Vec sad, Vec inv_sigma, + Vec* JXL_RESTRICT X, Vec* JXL_RESTRICT Y, + Vec* JXL_RESTRICT B, + Vec* JXL_RESTRICT w) const { + auto cx = aligned ? Load(DF(), rows[0][3 + row] + x) + : LoadU(DF(), rows[0][3 + row] + x); + auto cy = aligned ? Load(DF(), rows[1][3 + row] + x) + : LoadU(DF(), rows[1][3 + row] + x); + auto cb = aligned ? Load(DF(), rows[2][3 + row] + x) + : LoadU(DF(), rows[2][3 + row] + x); + + auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass1_zeroflush)); + *w = Add(*w, weight); + *X = MulAdd(weight, cx, *X); + *Y = MulAdd(weight, cy, *Y); + *B = MulAdd(weight, cb, *B); + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + DF df; + + using V = decltype(Zero(df)); + V t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, tA, tB; + V* sads[12] = {&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7, &t8, &t9, &tA, &tB}; + + xextra = RoundUpTo(xextra, Lanes(df)); + const float* JXL_RESTRICT row_sigma = + sigma_->Row(ypos / kBlockDim + kSigmaPadding); + + float sm = lf_.epf_pass0_sigma_scale * 1.65; + float bsm = sm * lf_.epf_border_sad_mul; + + HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm, + sm, sm, sm, bsm}; + HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm, + bsm, bsm, bsm, bsm}; + float* JXL_RESTRICT rows[3][7]; + for (size_t c = 0; c < 3; c++) { + for (int i = 0; i < 7; i++) { + rows[c][i] = GetInputRow(input_rows, c, i - 3); + } + } + + const float* sad_mul = + (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1) + ? sad_mul_border + : sad_mul_center; + + for (ssize_t x = -xextra; x < static_cast(xsize + xextra); + x += Lanes(df)) { + size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim; + size_t ix = (x + xpos) % kBlockDim; + + if (row_sigma[bx] < kMinSigma) { + for (size_t c = 0; c < 3; c++) { + auto px = Load(df, rows[c][3 + 0] + x); + StoreU(px, df, GetOutputRow(output_rows, c, 0) + x); + } + continue; + } + + const auto sm = Load(df, sad_mul + ix); + const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm); + + for (size_t i = 0; i < 12; i++) *sads[i] = Zero(df); + constexpr std::array sads_off[12] = { + {{-2, 0}}, {{-1, -1}}, {{-1, 0}}, {{-1, 1}}, {{0, -2}}, {{0, -1}}, + {{0, 1}}, {{0, 2}}, {{1, -1}}, {{1, 0}}, {{1, 1}}, {{2, 0}}, + }; + + // compute sads + // TODO(veluca): consider unrolling and optimizing this. + for (size_t c = 0; c < 3; c++) { + auto scale = Set(df, lf_.epf_channel_scale[c]); + for (size_t i = 0; i < 12; i++) { + auto sad = Zero(df); + constexpr std::array plus_off[] = { + {{0, 0}}, {{-1, 0}}, {{0, -1}}, {{1, 0}}, {{0, 1}}}; + for (size_t j = 0; j < 5; j++) { + const auto r11 = + LoadU(df, rows[c][3 + plus_off[j][0]] + x + plus_off[j][1]); + const auto c11 = + LoadU(df, rows[c][3 + sads_off[i][0] + plus_off[j][0]] + x + + sads_off[i][1] + plus_off[j][1]); + sad = Add(sad, AbsDiff(r11, c11)); + } + *sads[i] = MulAdd(sad, scale, *sads[i]); + } + } + const auto x_cc = Load(df, rows[0][3 + 0] + x); + const auto y_cc = Load(df, rows[1][3 + 0] + x); + const auto b_cc = Load(df, rows[2][3 + 0] + x); + + auto w = Set(df, 1); + auto X = x_cc; + auto Y = y_cc; + auto B = b_cc; + + for (size_t i = 0; i < 12; i++) { + AddPixel(/*row=*/sads_off[i][0], rows, + x + sads_off[i][1], *sads[i], inv_sigma, &X, + &Y, &B, &w); + } +#if JXL_HIGH_PRECISION + auto inv_w = Div(Set(df, 1.0f), w); +#else + auto inv_w = ApproximateReciprocal(w); +#endif + StoreU(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x); + StoreU(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x); + StoreU(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "EPF0"; } + + private: + LoopFilter lf_; + const ImageF* sigma_; +}; + +// 3x3 plus-shaped kernel with 5 SADs per pixel (also 3x3 plus-shaped). So this +// makes this filter a 5x5 filter. +class EPF1Stage : public RenderPipelineStage { + public: + EPF1Stage(const LoopFilter& lf, const ImageF& sigma) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/0, /*border=*/2)), + lf_(lf), + sigma_(&sigma) {} + + template + JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][5], ssize_t x, + Vec sad, Vec inv_sigma, + Vec* JXL_RESTRICT X, Vec* JXL_RESTRICT Y, + Vec* JXL_RESTRICT B, + Vec* JXL_RESTRICT w) const { + auto cx = aligned ? Load(DF(), rows[0][2 + row] + x) + : LoadU(DF(), rows[0][2 + row] + x); + auto cy = aligned ? Load(DF(), rows[1][2 + row] + x) + : LoadU(DF(), rows[1][2 + row] + x); + auto cb = aligned ? Load(DF(), rows[2][2 + row] + x) + : LoadU(DF(), rows[2][2 + row] + x); + + auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass1_zeroflush)); + *w = Add(*w, weight); + *X = MulAdd(weight, cx, *X); + *Y = MulAdd(weight, cy, *Y); + *B = MulAdd(weight, cb, *B); + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + DF df; + xextra = RoundUpTo(xextra, Lanes(df)); + const float* JXL_RESTRICT row_sigma = + sigma_->Row(ypos / kBlockDim + kSigmaPadding); + + float sm = 1.65f; + float bsm = sm * lf_.epf_border_sad_mul; + + HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm, + sm, sm, sm, bsm}; + HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm, + bsm, bsm, bsm, bsm}; + + float* JXL_RESTRICT rows[3][5]; + for (size_t c = 0; c < 3; c++) { + for (int i = 0; i < 5; i++) { + rows[c][i] = GetInputRow(input_rows, c, i - 2); + } + } + + const float* sad_mul = + (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1) + ? sad_mul_border + : sad_mul_center; + + for (ssize_t x = -xextra; x < static_cast(xsize + xextra); + x += Lanes(df)) { + size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim; + size_t ix = (x + xpos) % kBlockDim; + + if (row_sigma[bx] < kMinSigma) { + for (size_t c = 0; c < 3; c++) { + auto px = Load(df, rows[c][2 + 0] + x); + Store(px, df, GetOutputRow(output_rows, c, 0) + x); + } + continue; + } + + const auto sm = Load(df, sad_mul + ix); + const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm); + auto sad0 = Zero(df); + auto sad1 = Zero(df); + auto sad2 = Zero(df); + auto sad3 = Zero(df); + + // compute sads + for (size_t c = 0; c < 3; c++) { + // center px = 22, px above = 21 + auto t = Undefined(df); + + const auto p20 = Load(df, rows[c][2 + -2] + x); + const auto p21 = Load(df, rows[c][2 + -1] + x); + auto sad0c = AbsDiff(p20, p21); // SAD 2, 1 + + const auto p11 = LoadU(df, rows[c][2 + -1] + x - 1); + auto sad1c = AbsDiff(p11, p21); // SAD 1, 2 + + const auto p31 = LoadU(df, rows[c][2 + -1] + x + 1); + auto sad2c = AbsDiff(p31, p21); // SAD 3, 2 + + const auto p02 = LoadU(df, rows[c][2 + 0] + x - 2); + const auto p12 = LoadU(df, rows[c][2 + 0] + x - 1); + sad1c = Add(sad1c, AbsDiff(p02, p12)); // SAD 1, 2 + sad0c = Add(sad0c, AbsDiff(p11, p12)); // SAD 2, 1 + + const auto p22 = LoadU(df, rows[c][2 + 0] + x); + t = AbsDiff(p12, p22); + sad1c = Add(sad1c, t); // SAD 1, 2 + sad2c = Add(sad2c, t); // SAD 3, 2 + t = AbsDiff(p22, p21); + auto sad3c = t; // SAD 2, 3 + sad0c = Add(sad0c, t); // SAD 2, 1 + + const auto p32 = LoadU(df, rows[c][2 + 0] + x + 1); + sad0c = Add(sad0c, AbsDiff(p31, p32)); // SAD 2, 1 + t = AbsDiff(p22, p32); + sad1c = Add(sad1c, t); // SAD 1, 2 + sad2c = Add(sad2c, t); // SAD 3, 2 + + const auto p42 = LoadU(df, rows[c][2 + 0] + x + 2); + sad2c = Add(sad2c, AbsDiff(p42, p32)); // SAD 3, 2 + + const auto p13 = LoadU(df, rows[c][2 + 1] + x - 1); + sad3c = Add(sad3c, AbsDiff(p13, p12)); // SAD 2, 3 + + const auto p23 = Load(df, rows[c][2 + 1] + x); + t = AbsDiff(p22, p23); + sad0c = Add(sad0c, t); // SAD 2, 1 + sad3c = Add(sad3c, t); // SAD 2, 3 + sad1c = Add(sad1c, AbsDiff(p13, p23)); // SAD 1, 2 + + const auto p33 = LoadU(df, rows[c][2 + 1] + x + 1); + sad2c = Add(sad2c, AbsDiff(p33, p23)); // SAD 3, 2 + sad3c = Add(sad3c, AbsDiff(p33, p32)); // SAD 2, 3 + + const auto p24 = Load(df, rows[c][2 + 2] + x); + sad3c = Add(sad3c, AbsDiff(p24, p23)); // SAD 2, 3 + + auto scale = Set(df, lf_.epf_channel_scale[c]); + sad0 = MulAdd(sad0c, scale, sad0); + sad1 = MulAdd(sad1c, scale, sad1); + sad2 = MulAdd(sad2c, scale, sad2); + sad3 = MulAdd(sad3c, scale, sad3); + } + const auto x_cc = Load(df, rows[0][2 + 0] + x); + const auto y_cc = Load(df, rows[1][2 + 0] + x); + const auto b_cc = Load(df, rows[2][2 + 0] + x); + + auto w = Set(df, 1); + auto X = x_cc; + auto Y = y_cc; + auto B = b_cc; + + // Top row + AddPixel(/*row=*/-1, rows, x, sad0, inv_sigma, &X, &Y, + &B, &w); + // Center + AddPixel(/*row=*/0, rows, x - 1, sad1, inv_sigma, &X, + &Y, &B, &w); + AddPixel(/*row=*/0, rows, x + 1, sad2, inv_sigma, &X, + &Y, &B, &w); + // Bottom + AddPixel(/*row=*/1, rows, x, sad3, inv_sigma, &X, &Y, + &B, &w); +#if JXL_HIGH_PRECISION + auto inv_w = Div(Set(df, 1.0f), w); +#else + auto inv_w = ApproximateReciprocal(w); +#endif + Store(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x); + Store(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x); + Store(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "EPF1"; } + + private: + LoopFilter lf_; + const ImageF* sigma_; +}; + +// 3x3 plus-shaped kernel with 1 SAD per pixel. So this makes this filter a 3x3 +// filter. +class EPF2Stage : public RenderPipelineStage { + public: + EPF2Stage(const LoopFilter& lf, const ImageF& sigma) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/0, /*border=*/1)), + lf_(lf), + sigma_(&sigma) {} + + template + JXL_INLINE void AddPixel(int row, float* JXL_RESTRICT rows[3][3], ssize_t x, + Vec rx, Vec ry, Vec rb, + Vec inv_sigma, Vec* JXL_RESTRICT X, + Vec* JXL_RESTRICT Y, Vec* JXL_RESTRICT B, + Vec* JXL_RESTRICT w) const { + auto cx = aligned ? Load(DF(), rows[0][1 + row] + x) + : LoadU(DF(), rows[0][1 + row] + x); + auto cy = aligned ? Load(DF(), rows[1][1 + row] + x) + : LoadU(DF(), rows[1][1 + row] + x); + auto cb = aligned ? Load(DF(), rows[2][1 + row] + x) + : LoadU(DF(), rows[2][1 + row] + x); + + auto sad = Mul(AbsDiff(cx, rx), Set(DF(), lf_.epf_channel_scale[0])); + sad = MulAdd(AbsDiff(cy, ry), Set(DF(), lf_.epf_channel_scale[1]), sad); + sad = MulAdd(AbsDiff(cb, rb), Set(DF(), lf_.epf_channel_scale[2]), sad); + + auto weight = Weight(sad, inv_sigma, Set(DF(), lf_.epf_pass2_zeroflush)); + + *w = Add(*w, weight); + *X = MulAdd(weight, cx, *X); + *Y = MulAdd(weight, cy, *Y); + *B = MulAdd(weight, cb, *B); + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + DF df; + xextra = RoundUpTo(xextra, Lanes(df)); + const float* JXL_RESTRICT row_sigma = + sigma_->Row(ypos / kBlockDim + kSigmaPadding); + + float sm = lf_.epf_pass2_sigma_scale * 1.65; + float bsm = sm * lf_.epf_border_sad_mul; + + HWY_ALIGN float sad_mul_center[kBlockDim] = {bsm, sm, sm, sm, + sm, sm, sm, bsm}; + HWY_ALIGN float sad_mul_border[kBlockDim] = {bsm, bsm, bsm, bsm, + bsm, bsm, bsm, bsm}; + + float* JXL_RESTRICT rows[3][3]; + for (size_t c = 0; c < 3; c++) { + for (int i = 0; i < 3; i++) { + rows[c][i] = GetInputRow(input_rows, c, i - 1); + } + } + + const float* sad_mul = + (ypos % kBlockDim == 0 || ypos % kBlockDim == kBlockDim - 1) + ? sad_mul_border + : sad_mul_center; + + for (ssize_t x = -xextra; x < static_cast(xsize + xextra); + x += Lanes(df)) { + size_t bx = (x + xpos + kSigmaPadding * kBlockDim) / kBlockDim; + size_t ix = (x + xpos) % kBlockDim; + + if (row_sigma[bx] < kMinSigma) { + for (size_t c = 0; c < 3; c++) { + auto px = Load(df, rows[c][1 + 0] + x); + Store(px, df, GetOutputRow(output_rows, c, 0) + x); + } + continue; + } + + const auto sm = Load(df, sad_mul + ix); + const auto inv_sigma = Mul(Set(df, row_sigma[bx]), sm); + + const auto x_cc = Load(df, rows[0][1 + 0] + x); + const auto y_cc = Load(df, rows[1][1 + 0] + x); + const auto b_cc = Load(df, rows[2][1 + 0] + x); + + auto w = Set(df, 1); + auto X = x_cc; + auto Y = y_cc; + auto B = b_cc; + + // Top row + AddPixel(/*row=*/-1, rows, x, x_cc, y_cc, b_cc, + inv_sigma, &X, &Y, &B, &w); + // Center + AddPixel(/*row=*/0, rows, x - 1, x_cc, y_cc, b_cc, + inv_sigma, &X, &Y, &B, &w); + AddPixel(/*row=*/0, rows, x + 1, x_cc, y_cc, b_cc, + inv_sigma, &X, &Y, &B, &w); + // Bottom + AddPixel(/*row=*/1, rows, x, x_cc, y_cc, b_cc, + inv_sigma, &X, &Y, &B, &w); +#if JXL_HIGH_PRECISION + auto inv_w = Div(Set(df, 1.0f), w); +#else + auto inv_w = ApproximateReciprocal(w); +#endif + Store(Mul(X, inv_w), df, GetOutputRow(output_rows, 0, 0) + x); + Store(Mul(Y, inv_w), df, GetOutputRow(output_rows, 1, 0) + x); + Store(Mul(B, inv_w), df, GetOutputRow(output_rows, 2, 0) + x); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "EPF2"; } + + private: + LoopFilter lf_; + const ImageF* sigma_; +}; + +std::unique_ptr GetEPFStage0(const LoopFilter& lf, + const ImageF& sigma) { + return jxl::make_unique(lf, sigma); +} + +std::unique_ptr GetEPFStage1(const LoopFilter& lf, + const ImageF& sigma) { + return jxl::make_unique(lf, sigma); +} + +std::unique_ptr GetEPFStage2(const LoopFilter& lf, + const ImageF& sigma) { + return jxl::make_unique(lf, sigma); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetEPFStage0); +HWY_EXPORT(GetEPFStage1); +HWY_EXPORT(GetEPFStage2); + +std::unique_ptr GetEPFStage(const LoopFilter& lf, + const ImageF& sigma, + size_t epf_stage) { + JXL_ASSERT(lf.epf_iters != 0); + switch (epf_stage) { + case 0: + return HWY_DYNAMIC_DISPATCH(GetEPFStage0)(lf, sigma); + case 1: + return HWY_DYNAMIC_DISPATCH(GetEPFStage1)(lf, sigma); + case 2: + return HWY_DYNAMIC_DISPATCH(GetEPFStage2)(lf, sigma); + default: + JXL_UNREACHABLE("Invalid EPF stage"); + } +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.h new file mode 100644 index 0000000000..c9d0d0c785 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_epf.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/image.h" +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Applies the `epf_stage`-th EPF step with the given settings and `sigma`. +// `sigma` will be accessed with an offset of (kSigmaPadding, kSigmaPadding), +// and should have (kSigmaBorder, kSigmaBorder) mirrored sigma values available +// around the main image. See also filters.(h|cc) +std::unique_ptr GetEPFStage(const LoopFilter& lf, + const ImageF& sigma, + size_t epf_stage); +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_EPF_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.cc new file mode 100644 index 0000000000..bc8f1ad8db --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.cc @@ -0,0 +1,190 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_from_linear.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_from_linear.cc" +#include +#include + +#include "lib/jxl/dec_tone_mapping-inl.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::IfThenZeroElse; + +template +struct PerChannelOp { + explicit PerChannelOp(Op op) : op(op) {} + template + void Transform(D d, T* r, T* g, T* b) const { + *r = op.Transform(d, *r); + *g = op.Transform(d, *g); + *b = op.Transform(d, *b); + } + + Op op; +}; +template +PerChannelOp MakePerChannelOp(Op&& op) { + return PerChannelOp(std::forward(op)); +} + +struct OpLinear { + template + T Transform(D d, const T& linear) const { + return linear; + } +}; + +struct OpRgb { + template + T Transform(D d, const T& linear) const { +#if JXL_HIGH_PRECISION + return TF_SRGB().EncodedFromDisplay(d, linear); +#else + return FastLinearToSRGB(d, linear); +#endif + } +}; + +struct OpPq { + template + T Transform(D d, const T& linear) const { + return TF_PQ().EncodedFromDisplay(d, linear); + } +}; + +struct OpHlg { + explicit OpHlg(const float luminances[3], const float intensity_target) + : hlg_ootf_(HlgOOTF::ToSceneLight(/*display_luminance=*/intensity_target, + luminances)) {} + + template + void Transform(D d, T* r, T* g, T* b) const { + hlg_ootf_.Apply(r, g, b); + *r = TF_HLG().EncodedFromDisplay(d, *r); + *g = TF_HLG().EncodedFromDisplay(d, *g); + *b = TF_HLG().EncodedFromDisplay(d, *b); + } + HlgOOTF hlg_ootf_; +}; + +struct Op709 { + template + T Transform(D d, const T& linear) const { + return TF_709().EncodedFromDisplay(d, linear); + } +}; + +struct OpGamma { + const float inverse_gamma; + template + T Transform(D d, const T& linear) const { + return IfThenZeroElse(Le(linear, Set(d, 1e-5f)), + FastPowf(d, linear, Set(d, inverse_gamma))); + } +}; + +template +class FromLinearStage : public RenderPipelineStage { + public: + explicit FromLinearStage(Op op) + : RenderPipelineStage(RenderPipelineStage::Settings()), + op_(std::move(op)) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + const HWY_FULL(float) d; + const size_t xsize_v = RoundUpTo(xsize, Lanes(d)); + float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0); + // All calculations are lane-wise, still some might require + // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last + // vector tail. + msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + auto r = LoadU(d, row0 + x); + auto g = LoadU(d, row1 + x); + auto b = LoadU(d, row2 + x); + op_.Transform(d, &r, &g, &b); + StoreU(r, d, row0 + x); + StoreU(g, d, row1 + x); + StoreU(b, d, row2 + x); + } + msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "FromLinear"; } + + private: + Op op_; +}; + +template +std::unique_ptr> MakeFromLinearStage(Op&& op) { + return jxl::make_unique>(std::forward(op)); +} + +std::unique_ptr GetFromLinearStage( + const OutputEncodingInfo& output_encoding_info) { + if (output_encoding_info.color_encoding.tf.IsLinear()) { + return MakeFromLinearStage(MakePerChannelOp(OpLinear())); + } else if (output_encoding_info.color_encoding.tf.IsSRGB()) { + return MakeFromLinearStage(MakePerChannelOp(OpRgb())); + } else if (output_encoding_info.color_encoding.tf.IsPQ()) { + return MakeFromLinearStage(MakePerChannelOp(OpPq())); + } else if (output_encoding_info.color_encoding.tf.IsHLG()) { + return MakeFromLinearStage( + OpHlg(output_encoding_info.luminances, + output_encoding_info.desired_intensity_target)); + } else if (output_encoding_info.color_encoding.tf.Is709()) { + return MakeFromLinearStage(MakePerChannelOp(Op709())); + } else if (output_encoding_info.color_encoding.tf.IsGamma() || + output_encoding_info.color_encoding.tf.IsDCI()) { + return MakeFromLinearStage( + MakePerChannelOp(OpGamma{output_encoding_info.inverse_gamma})); + } else { + // This is a programming error. + JXL_UNREACHABLE("Invalid target encoding"); + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetFromLinearStage); + +std::unique_ptr GetFromLinearStage( + const OutputEncodingInfo& output_encoding_info) { + return HWY_DYNAMIC_DISPATCH(GetFromLinearStage)(output_encoding_info); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.h new file mode 100644 index 0000000000..548ab50b8c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_from_linear.h @@ -0,0 +1,20 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_ + +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Converts the color channels from linear to the specified output encoding. +std::unique_ptr GetFromLinearStage( + const OutputEncodingInfo& output_encoding_info); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_FROM_LINEAR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.cc new file mode 100644 index 0000000000..0917db3f9a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.cc @@ -0,0 +1,120 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_gaborish.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_gaborish.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; + +class GaborishStage : public RenderPipelineStage { + public: + explicit GaborishStage(const LoopFilter& lf) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/0, /*border=*/1)) { + weights_[0] = 1; + weights_[1] = lf.gab_x_weight1; + weights_[2] = lf.gab_x_weight2; + weights_[3] = 1; + weights_[4] = lf.gab_y_weight1; + weights_[5] = lf.gab_y_weight2; + weights_[6] = 1; + weights_[7] = lf.gab_b_weight1; + weights_[8] = lf.gab_b_weight2; + // Normalize + for (size_t c = 0; c < 3; c++) { + const float div = + weights_[3 * c] + 4 * (weights_[3 * c + 1] + weights_[3 * c + 2]); + const float mul = 1.0f / div; + weights_[3 * c] *= mul; + weights_[3 * c + 1] *= mul; + weights_[3 * c + 2] *= mul; + } + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + const HWY_FULL(float) d; + for (size_t c = 0; c < 3; c++) { + float* JXL_RESTRICT row_t = GetInputRow(input_rows, c, -1); + float* JXL_RESTRICT row_m = GetInputRow(input_rows, c, 0); + float* JXL_RESTRICT row_b = GetInputRow(input_rows, c, 1); + float* JXL_RESTRICT row_out = GetOutputRow(output_rows, c, 0); + const auto w0 = Set(d, weights_[3 * c + 0]); + const auto w1 = Set(d, weights_[3 * c + 1]); + const auto w2 = Set(d, weights_[3 * c + 2]); +// Group data need only be aligned to a block; for >=512 bit vectors, this may +// result in unaligned loads. +#if HWY_CAP_GE512 +#define LoadMaybeU LoadU +#else +#define LoadMaybeU Load +#endif + // Since GetInputRow(input_rows, c, {-1, 0, 1}) is aligned, rounding + // xextra up to Lanes(d) doesn't access anything problematic. + for (ssize_t x = -RoundUpTo(xextra, Lanes(d)); + x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + const auto t = LoadMaybeU(d, row_t + x); + const auto tl = LoadU(d, row_t + x - 1); + const auto tr = LoadU(d, row_t + x + 1); + const auto m = LoadMaybeU(d, row_m + x); + const auto l = LoadU(d, row_m + x - 1); + const auto r = LoadU(d, row_m + x + 1); + const auto b = LoadMaybeU(d, row_b + x); + const auto bl = LoadU(d, row_b + x - 1); + const auto br = LoadU(d, row_b + x + 1); + const auto sum0 = m; + const auto sum1 = Add(Add(l, r), Add(t, b)); + const auto sum2 = Add(Add(tl, tr), Add(bl, br)); + auto pixels = MulAdd(sum2, w2, MulAdd(sum1, w1, Mul(sum0, w0))); + Store(pixels, d, row_out + x); + } + } + } +#undef LoadMaybeU + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "Gab"; } + + private: + float weights_[9]; +}; + +std::unique_ptr GetGaborishStage(const LoopFilter& lf) { + return jxl::make_unique(lf); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetGaborishStage); + +std::unique_ptr GetGaborishStage(const LoopFilter& lf) { + JXL_ASSERT(lf.gab == 1); + return HWY_DYNAMIC_DISPATCH(GetGaborishStage)(lf); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.h new file mode 100644 index 0000000000..761800f668 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_gaborish.h @@ -0,0 +1,25 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/loop_filter.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Applies decoder-side Gaborish with the given settings. `lf.gab` must be 1. +std::unique_ptr GetGaborishStage(const LoopFilter& lf); +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_GABORISH_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.cc new file mode 100644 index 0000000000..62abd8fb0f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.cc @@ -0,0 +1,306 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_noise.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_noise.cc" +#include +#include + +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Vec; +using hwy::HWY_NAMESPACE::ZeroIfNegative; + +using D = HWY_CAPPED(float, kBlockDim); +using DI = hwy::HWY_NAMESPACE::Rebind; +using DI8 = hwy::HWY_NAMESPACE::Repartition; + +// [0, max_value] +template +static HWY_INLINE V Clamp0ToMax(D d, const V x, const V max_value) { + const auto clamped = Min(x, max_value); + return ZeroIfNegative(clamped); +} + +// x is in [0+delta, 1+delta], delta ~= 0.06 +template +typename StrengthEval::V NoiseStrength(const StrengthEval& eval, + const typename StrengthEval::V x) { + return Clamp0ToMax(D(), eval(x), Set(D(), 1.0f)); +} + +// TODO(veluca): SIMD-fy. +class StrengthEvalLut { + public: + using V = Vec; + + explicit StrengthEvalLut(const NoiseParams& noise_params) +#if HWY_TARGET == HWY_SCALAR + : noise_params_(noise_params) +#endif + { +#if HWY_TARGET != HWY_SCALAR + uint32_t lut[8]; + memcpy(lut, noise_params.lut, sizeof(lut)); + for (size_t i = 0; i < 8; i++) { + low16_lut[2 * i] = (lut[i] >> 0) & 0xFF; + low16_lut[2 * i + 1] = (lut[i] >> 8) & 0xFF; + high16_lut[2 * i] = (lut[i] >> 16) & 0xFF; + high16_lut[2 * i + 1] = (lut[i] >> 24) & 0xFF; + } +#endif + } + + V operator()(const V vx) const { + constexpr size_t kScale = NoiseParams::kNumNoisePoints - 2; + auto scaled_vx = Max(Zero(D()), Mul(vx, Set(D(), kScale))); + auto floor_x = Floor(scaled_vx); + auto frac_x = Sub(scaled_vx, floor_x); + floor_x = IfThenElse(Ge(scaled_vx, Set(D(), kScale + 1)), Set(D(), kScale), + floor_x); + frac_x = + IfThenElse(Ge(scaled_vx, Set(D(), kScale + 1)), Set(D(), 1), frac_x); + auto floor_x_int = ConvertTo(DI(), floor_x); +#if HWY_TARGET == HWY_SCALAR + auto low = Set(D(), noise_params_.lut[floor_x_int.raw]); + auto hi = Set(D(), noise_params_.lut[floor_x_int.raw + 1]); +#else + // Set each lane's bytes to {0, 0, 2x+1, 2x}. + auto floorx_indices_low = + Add(Mul(floor_x_int, Set(DI(), 0x0202)), Set(DI(), 0x0100)); + // Set each lane's bytes to {2x+1, 2x, 0, 0}. + auto floorx_indices_hi = + Add(Mul(floor_x_int, Set(DI(), 0x02020000)), Set(DI(), 0x01000000)); + // load LUT + auto low16 = BitCast(DI(), LoadDup128(DI8(), low16_lut)); + auto lowm = Set(DI(), 0xFFFF); + auto hi16 = BitCast(DI(), LoadDup128(DI8(), high16_lut)); + auto him = Set(DI(), 0xFFFF0000); + // low = noise_params.lut[floor_x] + auto low = + BitCast(D(), Or(And(TableLookupBytes(low16, floorx_indices_low), lowm), + And(TableLookupBytes(hi16, floorx_indices_hi), him))); + // hi = noise_params.lut[floor_x+1] + floorx_indices_low = Add(floorx_indices_low, Set(DI(), 0x0202)); + floorx_indices_hi = Add(floorx_indices_hi, Set(DI(), 0x02020000)); + auto hi = + BitCast(D(), Or(And(TableLookupBytes(low16, floorx_indices_low), lowm), + And(TableLookupBytes(hi16, floorx_indices_hi), him))); +#endif + return MulAdd(Sub(hi, low), frac_x, low); + } + + private: +#if HWY_TARGET != HWY_SCALAR + // noise_params.lut transformed into two 16-bit lookup tables. + HWY_ALIGN uint8_t high16_lut[16]; + HWY_ALIGN uint8_t low16_lut[16]; +#else + const NoiseParams& noise_params_; +#endif +}; + +template +void AddNoiseToRGB(const D d, const Vec rnd_noise_r, + const Vec rnd_noise_g, const Vec rnd_noise_cor, + const Vec noise_strength_g, const Vec noise_strength_r, + float ytox, float ytob, float* JXL_RESTRICT out_x, + float* JXL_RESTRICT out_y, float* JXL_RESTRICT out_b) { + const auto kRGCorr = Set(d, 0.9921875f); // 127/128 + const auto kRGNCorr = Set(d, 0.0078125f); // 1/128 + + const auto red_noise = + Mul(noise_strength_r, + MulAdd(kRGNCorr, rnd_noise_r, Mul(kRGCorr, rnd_noise_cor))); + const auto green_noise = + Mul(noise_strength_g, + MulAdd(kRGNCorr, rnd_noise_g, Mul(kRGCorr, rnd_noise_cor))); + + auto vx = LoadU(d, out_x); + auto vy = LoadU(d, out_y); + auto vb = LoadU(d, out_b); + + const auto rg_noise = Add(red_noise, green_noise); + vx = Add(MulAdd(Set(d, ytox), rg_noise, Sub(red_noise, green_noise)), vx); + vy = Add(vy, rg_noise); + vb = MulAdd(Set(d, ytob), rg_noise, vb); + + StoreU(vx, d, out_x); + StoreU(vy, d, out_y); + StoreU(vb, d, out_b); +} + +class AddNoiseStage : public RenderPipelineStage { + public: + AddNoiseStage(const NoiseParams& noise_params, + const ColorCorrelationMap& cmap, size_t first_c) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/0, /*border=*/0)), + noise_params_(noise_params), + cmap_(cmap), + first_c_(first_c) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + if (!noise_params_.HasAny()) return; + const StrengthEvalLut noise_model(noise_params_); + D d; + const auto half = Set(d, 0.5f); + + // With the prior subtract-random Laplacian approximation, rnd_* ranges were + // about [-1.5, 1.6]; Laplacian3 about doubles this to [-3.6, 3.6], so the + // normalizer is half of what it was before (0.5). + const auto norm_const = Set(d, 0.22f); + + float ytox = cmap_.YtoXRatio(0); + float ytob = cmap_.YtoBRatio(0); + + const size_t xsize_v = RoundUpTo(xsize, Lanes(d)); + + float* JXL_RESTRICT row_x = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row_y = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row_b = GetInputRow(input_rows, 2, 0); + const float* JXL_RESTRICT row_rnd_r = + GetInputRow(input_rows, first_c_ + 0, 0); + const float* JXL_RESTRICT row_rnd_g = + GetInputRow(input_rows, first_c_ + 1, 0); + const float* JXL_RESTRICT row_rnd_c = + GetInputRow(input_rows, first_c_ + 2, 0); + // Needed by the calls to Floor() in StrengthEvalLut. Only arithmetic and + // shuffles are otherwise done on the data, so this is safe. + msan::UnpoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float)); + msan::UnpoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float)); + for (size_t x = 0; x < xsize_v; x += Lanes(d)) { + const auto vx = LoadU(d, row_x + x); + const auto vy = LoadU(d, row_y + x); + const auto in_g = Sub(vy, vx); + const auto in_r = Add(vy, vx); + const auto noise_strength_g = NoiseStrength(noise_model, Mul(in_g, half)); + const auto noise_strength_r = NoiseStrength(noise_model, Mul(in_r, half)); + const auto addit_rnd_noise_red = Mul(LoadU(d, row_rnd_r + x), norm_const); + const auto addit_rnd_noise_green = + Mul(LoadU(d, row_rnd_g + x), norm_const); + const auto addit_rnd_noise_correlated = + Mul(LoadU(d, row_rnd_c + x), norm_const); + AddNoiseToRGB(D(), addit_rnd_noise_red, addit_rnd_noise_green, + addit_rnd_noise_correlated, noise_strength_g, + noise_strength_r, ytox, ytob, row_x + x, row_y + x, + row_b + x); + } + msan::PoisonMemory(row_x + xsize, (xsize_v - xsize) * sizeof(float)); + msan::PoisonMemory(row_y + xsize, (xsize_v - xsize) * sizeof(float)); + msan::PoisonMemory(row_b + xsize, (xsize_v - xsize) * sizeof(float)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c >= first_c_ ? RenderPipelineChannelMode::kInput + : c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "AddNoise"; } + + private: + const NoiseParams& noise_params_; + const ColorCorrelationMap& cmap_; + size_t first_c_; +}; + +std::unique_ptr GetAddNoiseStage( + const NoiseParams& noise_params, const ColorCorrelationMap& cmap, + size_t noise_c_start) { + return jxl::make_unique(noise_params, cmap, noise_c_start); +} + +class ConvolveNoiseStage : public RenderPipelineStage { + public: + explicit ConvolveNoiseStage(size_t first_c) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/0, /*border=*/2)), + first_c_(first_c) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + const HWY_FULL(float) d; + for (size_t c = first_c_; c < first_c_ + 3; c++) { + float* JXL_RESTRICT rows[5]; + for (size_t i = 0; i < 5; i++) { + rows[i] = GetInputRow(input_rows, c, i - 2); + } + float* JXL_RESTRICT row_out = GetOutputRow(output_rows, c, 0); + for (ssize_t x = -RoundUpTo(xextra, Lanes(d)); + x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + const auto p00 = LoadU(d, rows[2] + x); + auto others = Zero(d); + // TODO(eustas): sum loaded values to reduce the calculation chain + for (ssize_t i = -2; i <= 2; i++) { + others = Add(others, LoadU(d, rows[0] + x + i)); + others = Add(others, LoadU(d, rows[1] + x + i)); + others = Add(others, LoadU(d, rows[3] + x + i)); + others = Add(others, LoadU(d, rows[4] + x + i)); + } + others = Add(others, LoadU(d, rows[2] + x - 2)); + others = Add(others, LoadU(d, rows[2] + x - 1)); + others = Add(others, LoadU(d, rows[2] + x + 1)); + others = Add(others, LoadU(d, rows[2] + x + 2)); + // 4 * (1 - box kernel) + auto pixels = MulAdd(others, Set(d, 0.16), Mul(p00, Set(d, -3.84))); + StoreU(pixels, d, row_out + x); + } + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c >= first_c_ ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "ConvNoise"; } + + private: + size_t first_c_; +}; + +std::unique_ptr GetConvolveNoiseStage( + size_t noise_c_start) { + return jxl::make_unique(noise_c_start); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetAddNoiseStage); +HWY_EXPORT(GetConvolveNoiseStage); + +std::unique_ptr GetAddNoiseStage( + const NoiseParams& noise_params, const ColorCorrelationMap& cmap, + size_t noise_c_start) { + return HWY_DYNAMIC_DISPATCH(GetAddNoiseStage)(noise_params, cmap, + noise_c_start); +} + +std::unique_ptr GetConvolveNoiseStage( + size_t noise_c_start) { + return HWY_DYNAMIC_DISPATCH(GetConvolveNoiseStage)(noise_c_start); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.h new file mode 100644 index 0000000000..bd7797f991 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_noise.h @@ -0,0 +1,32 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/dec_noise.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Adds noise to color channels. +std::unique_ptr GetAddNoiseStage( + const NoiseParams& noise_params, const ColorCorrelationMap& cmap, + size_t noise_c_start); + +// Applies a 5x5 subtract-box-filter convolution to the noise input channels. +std::unique_ptr GetConvolveNoiseStage( + size_t noise_c_start); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_NOISE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.cc new file mode 100644 index 0000000000..c5a75b09f7 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.cc @@ -0,0 +1,47 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_patches.h" + +namespace jxl { +namespace { +class PatchDictionaryStage : public RenderPipelineStage { + public: + PatchDictionaryStage(const PatchDictionary* patches, size_t num_channels) + : RenderPipelineStage(RenderPipelineStage::Settings()), + patches_(*patches), + num_channels_(num_channels) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + JXL_ASSERT(xpos == 0 || xpos >= xextra); + size_t x0 = xpos ? xpos - xextra : 0; + std::vector row_ptrs(num_channels_); + for (size_t i = 0; i < num_channels_; i++) { + row_ptrs[i] = GetInputRow(input_rows, i, 0) + x0 - xpos; + } + patches_.AddOneRow(row_ptrs.data(), ypos, x0, xsize + xextra + xpos - x0); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < num_channels_ ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "Patches"; } + + private: + const PatchDictionary& patches_; + const size_t num_channels_; +}; +} // namespace + +std::unique_ptr GetPatchesStage( + const PatchDictionary* patches, size_t num_channels) { + return jxl::make_unique(patches, num_channels); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.h new file mode 100644 index 0000000000..b35abdc2eb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_patches.h @@ -0,0 +1,22 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_ + +#include + +#include "lib/jxl/patch_dictionary_internal.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Draws patches if applicable. +std::unique_ptr GetPatchesStage( + const PatchDictionary* patches, size_t num_channels); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_PATCHES_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.cc new file mode 100644 index 0000000000..4a0529ce2c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.cc @@ -0,0 +1,62 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_splines.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_splines.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +class SplineStage : public RenderPipelineStage { + public: + explicit SplineStage(const Splines* splines) + : RenderPipelineStage(RenderPipelineStage::Settings()), + splines_(*splines) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + float* row_x = GetInputRow(input_rows, 0, 0); + float* row_y = GetInputRow(input_rows, 1, 0); + float* row_b = GetInputRow(input_rows, 2, 0); + splines_.AddToRow(row_x, row_y, row_b, Rect(xpos, ypos, xsize, 1)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "Splines"; } + + private: + const Splines& splines_; +}; + +std::unique_ptr GetSplineStage(const Splines* splines) { + return jxl::make_unique(splines); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetSplineStage); + +std::unique_ptr GetSplineStage(const Splines* splines) { + return HWY_DYNAMIC_DISPATCH(GetSplineStage)(splines); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.h new file mode 100644 index 0000000000..363af393ec --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_splines.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_ + +#include + +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" +#include "lib/jxl/splines.h" + +namespace jxl { + +// Draws splines if applicable. +std::unique_ptr GetSplineStage(const Splines* splines); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_SPLINES_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.cc new file mode 100644 index 0000000000..a43cb4e1ab --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.cc @@ -0,0 +1,51 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_spot.h" + +namespace jxl { +class SpotColorStage : public RenderPipelineStage { + public: + explicit SpotColorStage(size_t spot_c, const float* spot_color) + : RenderPipelineStage(RenderPipelineStage::Settings()), + spot_c_(spot_c), + spot_color_(spot_color) { + JXL_ASSERT(spot_c_ >= 3); + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + // TODO(veluca): add SIMD. + float scale = spot_color_[3]; + for (size_t c = 0; c < 3; c++) { + float* JXL_RESTRICT p = GetInputRow(input_rows, c, 0); + const float* JXL_RESTRICT s = GetInputRow(input_rows, spot_c_, 0); + for (ssize_t x = -xextra; x < ssize_t(xsize + xextra); x++) { + float mix = scale * s[x]; + p[x] = mix * spot_color_[c] + (1.0f - mix) * p[x]; + } + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : c == spot_c_ ? RenderPipelineChannelMode::kInput + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "Spot"; } + + private: + size_t spot_c_; + const float* spot_color_; +}; + +std::unique_ptr GetSpotColorStage( + size_t spot_c, const float* spot_color) { + return jxl::make_unique(spot_c, spot_color); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.h new file mode 100644 index 0000000000..3e79c75823 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_spot.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_ + +#include + +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Render the spot color channels. +std::unique_ptr GetSpotColorStage(size_t spot_c, + const float* spot_color); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_SPOT_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.cc new file mode 100644 index 0000000000..5a543d2b44 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.cc @@ -0,0 +1,200 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_to_linear.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_to_linear.cc" +#include +#include + +#include "lib/jxl/dec_tone_mapping-inl.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::IfThenZeroElse; + +template +struct PerChannelOp { + explicit PerChannelOp(Op op) : op(op) {} + template + void Transform(D d, T* r, T* g, T* b) const { + *r = op.Transform(d, *r); + *g = op.Transform(d, *g); + *b = op.Transform(d, *b); + } + + Op op; +}; +template +PerChannelOp MakePerChannelOp(Op&& op) { + return PerChannelOp(std::forward(op)); +} + +struct OpLinear { + template + T Transform(D d, const T& encoded) const { + return encoded; + } +}; + +struct OpRgb { + template + T Transform(D d, const T& encoded) const { + return TF_SRGB().DisplayFromEncoded(encoded); + } +}; + +struct OpPq { + template + T Transform(D d, const T& encoded) const { + return TF_PQ().DisplayFromEncoded(d, encoded); + } +}; + +struct OpHlg { + explicit OpHlg(const float luminances[3], const float intensity_target) + : hlg_ootf_(HlgOOTF::FromSceneLight( + /*display_luminance=*/intensity_target, luminances)) {} + + template + void Transform(D d, T* r, T* g, T* b) const { + for (T* val : {r, g, b}) { + HWY_ALIGN float vals[MaxLanes(d)]; + Store(*val, d, vals); + for (size_t i = 0; i < Lanes(d); ++i) { + vals[i] = TF_HLG().DisplayFromEncoded(vals[i]); + } + *val = Load(d, vals); + } + hlg_ootf_.Apply(r, g, b); + } + HlgOOTF hlg_ootf_; +}; + +struct Op709 { + template + T Transform(D d, const T& encoded) const { + return TF_709().DisplayFromEncoded(d, encoded); + } +}; + +struct OpGamma { + const float gamma; + template + T Transform(D d, const T& encoded) const { + return IfThenZeroElse(Le(encoded, Set(d, 1e-5f)), + FastPowf(d, encoded, Set(d, gamma))); + } +}; + +struct OpInvalid { + template + void Transform(D d, T* r, T* g, T* b) const {} +}; + +template +class ToLinearStage : public RenderPipelineStage { + public: + explicit ToLinearStage(Op op) + : RenderPipelineStage(RenderPipelineStage::Settings()), + op_(std::move(op)) {} + + explicit ToLinearStage() + : RenderPipelineStage(RenderPipelineStage::Settings()), valid_(false) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + const HWY_FULL(float) d; + const size_t xsize_v = RoundUpTo(xsize, Lanes(d)); + float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0); + // All calculations are lane-wise, still some might require + // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last + // vector tail. + msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + auto r = LoadU(d, row0 + x); + auto g = LoadU(d, row1 + x); + auto b = LoadU(d, row2 + x); + op_.Transform(d, &r, &g, &b); + StoreU(r, d, row0 + x); + StoreU(g, d, row1 + x); + StoreU(b, d, row2 + x); + } + msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "ToLinear"; } + + private: + Status IsInitialized() const override { return valid_; } + + Op op_; + bool valid_ = true; +}; + +template +std::unique_ptr> MakeToLinearStage(Op&& op) { + return jxl::make_unique>(std::forward(op)); +} + +std::unique_ptr GetToLinearStage( + const OutputEncodingInfo& output_encoding_info) { + if (output_encoding_info.color_encoding.tf.IsLinear()) { + return MakeToLinearStage(MakePerChannelOp(OpLinear())); + } else if (output_encoding_info.color_encoding.tf.IsSRGB()) { + return MakeToLinearStage(MakePerChannelOp(OpRgb())); + } else if (output_encoding_info.color_encoding.tf.IsPQ()) { + return MakeToLinearStage(MakePerChannelOp(OpPq())); + } else if (output_encoding_info.color_encoding.tf.IsHLG()) { + return MakeToLinearStage(OpHlg(output_encoding_info.luminances, + output_encoding_info.orig_intensity_target)); + } else if (output_encoding_info.color_encoding.tf.Is709()) { + return MakeToLinearStage(MakePerChannelOp(Op709())); + } else if (output_encoding_info.color_encoding.tf.IsGamma() || + output_encoding_info.color_encoding.tf.IsDCI()) { + return MakeToLinearStage( + MakePerChannelOp(OpGamma{1.f / output_encoding_info.inverse_gamma})); + } else { + return jxl::make_unique>(); + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetToLinearStage); + +std::unique_ptr GetToLinearStage( + const OutputEncodingInfo& output_encoding_info) { + return HWY_DYNAMIC_DISPATCH(GetToLinearStage)(output_encoding_info); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.h new file mode 100644 index 0000000000..ccee7b09f0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_to_linear.h @@ -0,0 +1,21 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_ + +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Converts the color channels from `output_encoding_info.color_encoding` to +// linear. +std::unique_ptr GetToLinearStage( + const OutputEncodingInfo& output_encoding_info); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_TO_LINEAR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.cc new file mode 100644 index 0000000000..a3b4a3e7cf --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.cc @@ -0,0 +1,149 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_tone_mapping.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_tone_mapping.cc" +#include +#include + +#include "lib/jxl/dec_tone_mapping-inl.h" +#include "lib/jxl/dec_xyb-inl.h" +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +class ToneMappingStage : public RenderPipelineStage { + public: + explicit ToneMappingStage(OutputEncodingInfo output_encoding_info) + : RenderPipelineStage(RenderPipelineStage::Settings()), + output_encoding_info_(std::move(output_encoding_info)) { + if (output_encoding_info_.desired_intensity_target == + output_encoding_info_.orig_intensity_target) { + // No tone mapping requested. + return; + } + if (output_encoding_info_.orig_color_encoding.tf.IsPQ() && + output_encoding_info_.desired_intensity_target < + output_encoding_info_.orig_intensity_target) { + tone_mapper_ = jxl::make_unique( + /*source_range=*/std::pair( + 0, output_encoding_info_.orig_intensity_target), + /*target_range=*/ + std::pair( + 0, output_encoding_info_.desired_intensity_target), + output_encoding_info_.luminances); + } else if (output_encoding_info_.orig_color_encoding.tf.IsHLG() && + !output_encoding_info_.color_encoding.tf.IsHLG()) { + hlg_ootf_ = jxl::make_unique( + /*source_luminance=*/output_encoding_info_.orig_intensity_target, + /*target_luminance=*/output_encoding_info_.desired_intensity_target, + output_encoding_info_.luminances); + } + + if (output_encoding_info_.color_encoding.tf.IsPQ() && + (tone_mapper_ || hlg_ootf_)) { + to_intensity_target_ = + 10000.f / output_encoding_info_.orig_intensity_target; + from_desired_intensity_target_ = + output_encoding_info_.desired_intensity_target / 10000.f; + } + } + + bool IsNeeded() const { return tone_mapper_ || hlg_ootf_; } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + if (!(tone_mapper_ || hlg_ootf_)) return; + + const HWY_FULL(float) d; + const size_t xsize_v = RoundUpTo(xsize, Lanes(d)); + float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0); + // All calculations are lane-wise, still some might require + // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last + // vector tail. + msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + auto r = LoadU(d, row0 + x); + auto g = LoadU(d, row1 + x); + auto b = LoadU(d, row2 + x); + if (tone_mapper_ || hlg_ootf_) { + r = Mul(r, Set(d, to_intensity_target_)); + g = Mul(g, Set(d, to_intensity_target_)); + b = Mul(b, Set(d, to_intensity_target_)); + if (tone_mapper_) { + tone_mapper_->ToneMap(&r, &g, &b); + } else { + JXL_ASSERT(hlg_ootf_); + hlg_ootf_->Apply(&r, &g, &b); + } + if (tone_mapper_ || hlg_ootf_->WarrantsGamutMapping()) { + GamutMap(&r, &g, &b, output_encoding_info_.luminances); + } + r = Mul(r, Set(d, from_desired_intensity_target_)); + g = Mul(g, Set(d, from_desired_intensity_target_)); + b = Mul(b, Set(d, from_desired_intensity_target_)); + } + StoreU(r, d, row0 + x); + StoreU(g, d, row1 + x); + StoreU(b, d, row2 + x); + } + msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "ToneMapping"; } + + private: + using ToneMapper = Rec2408ToneMapper; + OutputEncodingInfo output_encoding_info_; + std::unique_ptr tone_mapper_; + std::unique_ptr hlg_ootf_; + // When the target colorspace is PQ, 1 represents 10000 nits instead of + // orig_intensity_target. This temporarily changes this if the tone mappers + // require it. + float to_intensity_target_ = 1.f; + float from_desired_intensity_target_ = 1.f; +}; + +std::unique_ptr GetToneMappingStage( + const OutputEncodingInfo& output_encoding_info) { + auto stage = jxl::make_unique(output_encoding_info); + if (!stage->IsNeeded()) return nullptr; + return stage; +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetToneMappingStage); + +std::unique_ptr GetToneMappingStage( + const OutputEncodingInfo& output_encoding_info) { + return HWY_DYNAMIC_DISPATCH(GetToneMappingStage)(output_encoding_info); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.h new file mode 100644 index 0000000000..99824f8511 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_tone_mapping.h @@ -0,0 +1,37 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Tone maps the image if appropriate. It must be in linear space and +// `output_encoding_info.luminances` must contain the luminance for the +// primaries of that space. It must also be encoded such that (1, 1, 1) +// represents `output_encoding_info.orig_intensity_target` nits, unless +// `output_encoding_info.color_encoding.tf.IsPQ()`, in which case (1, 1, 1) must +// represent 10000 nits. This corresponds to what XYBStage outputs. After this +// stage, (1, 1, 1) will represent +// `output_encoding_info.desired_intensity_target` nits, except in the PQ +// special case in which it remains 10000. +// +// If no tone mapping is necessary, this will return nullptr. +std::unique_ptr GetToneMappingStage( + const OutputEncodingInfo& output_encoding_info); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_TONE_MAPPING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.cc new file mode 100644 index 0000000000..bb8d9b2081 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.cc @@ -0,0 +1,186 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_upsampling.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_upsampling.cc" +#include +#include + +#include "lib/jxl/sanitizers.h" +#include "lib/jxl/simd_util-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Clamp; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Min; +using hwy::HWY_NAMESPACE::MulAdd; + +class UpsamplingStage : public RenderPipelineStage { + public: + explicit UpsamplingStage(const CustomTransformData& ups_factors, size_t c, + size_t shift) + : RenderPipelineStage(RenderPipelineStage::Settings::Symmetric( + /*shift=*/shift, /*border=*/2)), + c_(c) { + const float* weights = shift == 1 ? ups_factors.upsampling2_weights + : shift == 2 ? ups_factors.upsampling4_weights + : ups_factors.upsampling8_weights; + size_t N = 1 << (shift - 1); + for (size_t i = 0; i < 5 * N; i++) { + for (size_t j = 0; j < 5 * N; j++) { + size_t y = std::min(i, j); + size_t x = std::max(i, j); + kernel_[j / 5][i / 5][j % 5][i % 5] = + weights[5 * N * y - y * (y - 1) / 2 + x - y]; + } + } + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + static HWY_FULL(float) df; + size_t shift = settings_.shift_x; + size_t N = 1 << shift; + const size_t xsize_v = RoundUpTo(xsize, Lanes(df)); + for (ssize_t iy = -2; iy <= 2; iy++) { + msan::UnpoisonMemory(GetInputRow(input_rows, c_, iy) + xsize + 2, + sizeof(float) * (xsize_v - xsize)); + } + JXL_ASSERT(xextra == 0); + ssize_t x0 = 0; + ssize_t x1 = xsize; + if (N == 2) { + ProcessRowImpl<2>(input_rows, output_rows, x0, x1); + } + if (N == 4) { + ProcessRowImpl<4>(input_rows, output_rows, x0, x1); + } + if (N == 8) { + ProcessRowImpl<8>(input_rows, output_rows, x0, x1); + } + for (size_t oy = 0; oy < N; oy++) { + float* dst_row = GetOutputRow(output_rows, c_, oy); + msan::PoisonMemory(dst_row + xsize * N, + sizeof(float) * (xsize_v - xsize) * N); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c == c_ ? RenderPipelineChannelMode::kInOut + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "Upsample"; } + + private: + template + JXL_INLINE float Kernel(size_t x, size_t y, ssize_t ix, ssize_t iy) const { + ix += 2; + iy += 2; + if (N == 2) { + return kernel_[0][0][y % 2 ? 4 - iy : iy][x % 2 ? 4 - ix : ix]; + } + if (N == 4) { + return kernel_[y % 4 < 2 ? y % 2 : 1 - y % 2] + [x % 4 < 2 ? x % 2 : 1 - x % 2][y % 4 < 2 ? iy : 4 - iy] + [x % 4 < 2 ? ix : 4 - ix]; + } + if (N == 8) { + return kernel_[y % 8 < 4 ? y % 4 : 3 - y % 4] + [x % 8 < 4 ? x % 4 : 3 - x % 4][y % 8 < 4 ? iy : 4 - iy] + [x % 8 < 4 ? ix : 4 - ix]; + } + JXL_UNREACHABLE("Invalid upsample"); + } + + template + void ProcessRowImpl(const RowInfo& input_rows, const RowInfo& output_rows, + ssize_t x0, ssize_t x1) const { + static HWY_FULL(float) df; + using V = hwy::HWY_NAMESPACE::Vec; + V ups0, ups1, ups2, ups3, ups4, ups5, ups6, ups7; + (void)ups2, (void)ups3, (void)ups4, (void)ups5, (void)ups6, (void)ups7; + V* ups[N]; + if (N >= 2) { + ups[0] = &ups0; + ups[1] = &ups1; + } + if (N >= 4) { + ups[2] = &ups2; + ups[3] = &ups3; + } + if (N == 8) { + ups[4] = &ups4; + ups[5] = &ups5; + ups[6] = &ups6; + ups[7] = &ups7; + } + for (size_t oy = 0; oy < N; oy++) { + float* dst_row = GetOutputRow(output_rows, c_, oy); + for (ssize_t x = x0; x < x1; x += Lanes(df)) { + for (size_t ox = 0; ox < N; ox++) { + auto result = Zero(df); + auto min = LoadU(df, GetInputRow(input_rows, c_, 0) + x); + auto max = min; + for (ssize_t iy = -2; iy <= 2; iy++) { + for (ssize_t ix = -2; ix <= 2; ix++) { + auto v = LoadU(df, GetInputRow(input_rows, c_, iy) + x + ix); + result = MulAdd(Set(df, Kernel(ox, oy, ix, iy)), v, result); + min = Min(v, min); + max = Max(v, max); + } + } + // Avoid overshooting. + *ups[ox] = Clamp(result, min, max); + } + if (N == 2) { + StoreInterleaved(df, ups0, ups1, dst_row + x * N); + } + if (N == 4) { + StoreInterleaved(df, ups0, ups1, ups2, ups3, dst_row + x * N); + } + if (N == 8) { + StoreInterleaved(df, ups0, ups1, ups2, ups3, ups4, ups5, ups6, ups7, + dst_row + x * N); + } + } + } + } + + size_t c_; + float kernel_[4][4][5][5]; +}; + +std::unique_ptr GetUpsamplingStage( + const CustomTransformData& ups_factors, size_t c, size_t shift) { + return jxl::make_unique(ups_factors, c, shift); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetUpsamplingStage); + +std::unique_ptr GetUpsamplingStage( + const CustomTransformData& ups_factors, size_t c, size_t shift) { + JXL_ASSERT(shift != 0); + JXL_ASSERT(shift <= 3); + return HWY_DYNAMIC_DISPATCH(GetUpsamplingStage)(ups_factors, c, shift); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.h new file mode 100644 index 0000000000..7d5defd23c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_upsampling.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/image_metadata.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Upsamples the given channel by the given factor. +std::unique_ptr GetUpsamplingStage( + const CustomTransformData& ups_factors, size_t c, size_t shift); +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_UPSAMPLING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.cc new file mode 100644 index 0000000000..902fc33b7e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.cc @@ -0,0 +1,601 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_write.h" + +#include "lib/jxl/alpha.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/sanitizers.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_write.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Clamp; +using hwy::HWY_NAMESPACE::Div; +using hwy::HWY_NAMESPACE::Max; +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::NearestInt; +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::Rebind; +using hwy::HWY_NAMESPACE::ShiftLeftSame; +using hwy::HWY_NAMESPACE::ShiftRightSame; + +class WriteToOutputStage : public RenderPipelineStage { + public: + WriteToOutputStage(const ImageOutput& main_output, size_t width, + size_t height, bool has_alpha, bool unpremul_alpha, + size_t alpha_c, Orientation undo_orientation, + const std::vector& extra_output) + : RenderPipelineStage(RenderPipelineStage::Settings()), + width_(width), + height_(height), + main_(main_output), + num_color_(main_.num_channels_ < 3 ? 1 : 3), + want_alpha_(main_.num_channels_ == 2 || main_.num_channels_ == 4), + has_alpha_(has_alpha), + unpremul_alpha_(unpremul_alpha), + alpha_c_(alpha_c), + flip_x_(ShouldFlipX(undo_orientation)), + flip_y_(ShouldFlipY(undo_orientation)), + transpose_(ShouldTranspose(undo_orientation)), + opaque_alpha_(kMaxPixelsPerCall, 1.0f) { + for (size_t ec = 0; ec < extra_output.size(); ++ec) { + if (extra_output[ec].callback.IsPresent() || extra_output[ec].buffer) { + Output extra(extra_output[ec]); + extra.channel_index_ = 3 + ec; + extra_channels_.push_back(extra); + } + } + } + + WriteToOutputStage(const WriteToOutputStage&) = delete; + WriteToOutputStage& operator=(const WriteToOutputStage&) = delete; + WriteToOutputStage(WriteToOutputStage&&) = delete; + WriteToOutputStage& operator=(WriteToOutputStage&&) = delete; + + ~WriteToOutputStage() override { + if (main_.run_opaque_) { + main_.pixel_callback_.destroy(main_.run_opaque_); + } + for (auto& extra : extra_channels_) { + if (extra.run_opaque_) { + extra.pixel_callback_.destroy(extra.run_opaque_); + } + } + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + JXL_DASSERT(xextra == 0); + JXL_DASSERT(main_.run_opaque_ || main_.buffer_); + if (ypos >= height_) return; + if (xpos >= width_) return; + if (flip_y_) { + ypos = height_ - 1u - ypos; + } + size_t limit = std::min(xsize, width_ - xpos); + for (size_t x0 = 0; x0 < limit; x0 += kMaxPixelsPerCall) { + size_t xstart = xpos + x0; + size_t len = std::min(kMaxPixelsPerCall, limit - x0); + + const float* line_buffers[4]; + for (size_t c = 0; c < num_color_; c++) { + line_buffers[c] = GetInputRow(input_rows, c, 0) + x0; + } + if (has_alpha_) { + line_buffers[num_color_] = GetInputRow(input_rows, alpha_c_, 0) + x0; + } else { + // opaque_alpha_ is a way to set all values to 1.0f. + line_buffers[num_color_] = opaque_alpha_.data(); + } + if (has_alpha_ && want_alpha_ && unpremul_alpha_) { + UnpremulAlpha(thread_id, len, line_buffers); + } + OutputBuffers(main_, thread_id, ypos, xstart, len, line_buffers); + for (const auto& extra : extra_channels_) { + line_buffers[0] = GetInputRow(input_rows, extra.channel_index_, 0) + x0; + OutputBuffers(extra, thread_id, ypos, xstart, len, line_buffers); + } + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + if (c < num_color_ || (has_alpha_ && c == alpha_c_)) { + return RenderPipelineChannelMode::kInput; + } + for (const auto& extra : extra_channels_) { + if (c == extra.channel_index_) { + return RenderPipelineChannelMode::kInput; + } + } + return RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "WritePixelCB"; } + + private: + struct Output { + Output(const ImageOutput& image_out) + : pixel_callback_(image_out.callback), + buffer_(image_out.buffer), + buffer_size_(image_out.buffer_size), + stride_(image_out.stride), + num_channels_(image_out.format.num_channels), + swap_endianness_(SwapEndianness(image_out.format.endianness)), + data_type_(image_out.format.data_type), + bits_per_sample_(image_out.bits_per_sample) {} + + Status PrepareForThreads(size_t num_threads) { + if (pixel_callback_.IsPresent()) { + run_opaque_ = + pixel_callback_.Init(num_threads, /*num_pixels=*/kMaxPixelsPerCall); + JXL_RETURN_IF_ERROR(run_opaque_ != nullptr); + } else { + JXL_RETURN_IF_ERROR(buffer_ != nullptr); + } + return true; + } + + PixelCallback pixel_callback_; + void* run_opaque_ = nullptr; + void* buffer_ = nullptr; + size_t buffer_size_; + size_t stride_; + size_t num_channels_; + bool swap_endianness_; + JxlDataType data_type_; + size_t bits_per_sample_; + size_t channel_index_; // used for extra_channels + }; + + Status PrepareForThreads(size_t num_threads) override { + JXL_RETURN_IF_ERROR(main_.PrepareForThreads(num_threads)); + for (auto& extra : extra_channels_) { + JXL_RETURN_IF_ERROR(extra.PrepareForThreads(num_threads)); + } + temp_out_.resize(num_threads); + for (CacheAlignedUniquePtr& temp : temp_out_) { + temp = AllocateArray(sizeof(float) * kMaxPixelsPerCall * + main_.num_channels_); + } + if ((has_alpha_ && want_alpha_ && unpremul_alpha_) || flip_x_) { + temp_in_.resize(num_threads * main_.num_channels_); + for (CacheAlignedUniquePtr& temp : temp_in_) { + temp = AllocateArray(sizeof(float) * kMaxPixelsPerCall); + } + } + return true; + } + static bool ShouldFlipX(Orientation undo_orientation) { + return (undo_orientation == Orientation::kFlipHorizontal || + undo_orientation == Orientation::kRotate180 || + undo_orientation == Orientation::kRotate270 || + undo_orientation == Orientation::kAntiTranspose); + } + static bool ShouldFlipY(Orientation undo_orientation) { + return (undo_orientation == Orientation::kFlipVertical || + undo_orientation == Orientation::kRotate180 || + undo_orientation == Orientation::kRotate90 || + undo_orientation == Orientation::kAntiTranspose); + } + static bool ShouldTranspose(Orientation undo_orientation) { + return (undo_orientation == Orientation::kTranspose || + undo_orientation == Orientation::kRotate90 || + undo_orientation == Orientation::kRotate270 || + undo_orientation == Orientation::kAntiTranspose); + } + + void UnpremulAlpha(size_t thread_id, size_t len, + const float** line_buffers) const { + const HWY_FULL(float) d; + auto one = Set(d, 1.0f); + float* temp_in[4]; + for (size_t c = 0; c < main_.num_channels_; ++c) { + size_t tix = thread_id * main_.num_channels_ + c; + temp_in[c] = reinterpret_cast(temp_in_[tix].get()); + memcpy(temp_in[c], line_buffers[c], sizeof(float) * len); + } + auto small_alpha = Set(d, kSmallAlpha); + for (size_t ix = 0; ix < len; ix += Lanes(d)) { + auto alpha = LoadU(d, temp_in[num_color_] + ix); + auto mul = Div(one, Max(small_alpha, alpha)); + for (size_t c = 0; c < num_color_; ++c) { + auto val = LoadU(d, temp_in[c] + ix); + StoreU(Mul(val, mul), d, temp_in[c] + ix); + } + } + for (size_t c = 0; c < main_.num_channels_; ++c) { + line_buffers[c] = temp_in[c]; + } + } + + void OutputBuffers(const Output& out, size_t thread_id, size_t ypos, + size_t xstart, size_t len, const float* input[4]) const { + if (flip_x_) { + FlipX(out, thread_id, len, &xstart, input); + } + if (out.data_type_ == JXL_TYPE_UINT8) { + uint8_t* JXL_RESTRICT temp = + reinterpret_cast(temp_out_[thread_id].get()); + StoreUnsignedRow(out, input, len, temp); + WriteToOutput(out, thread_id, ypos, xstart, len, temp); + } else if (out.data_type_ == JXL_TYPE_UINT16 || + out.data_type_ == JXL_TYPE_FLOAT16) { + uint16_t* JXL_RESTRICT temp = + reinterpret_cast(temp_out_[thread_id].get()); + if (out.data_type_ == JXL_TYPE_UINT16) { + StoreUnsignedRow(out, input, len, temp); + } else { + StoreFloat16Row(out, input, len, temp); + } + if (out.swap_endianness_) { + const HWY_FULL(uint16_t) du; + size_t output_len = len * out.num_channels_; + for (size_t j = 0; j < output_len; j += Lanes(du)) { + auto v = LoadU(du, temp + j); + auto vswap = Or(ShiftRightSame(v, 8), ShiftLeftSame(v, 8)); + StoreU(vswap, du, temp + j); + } + } + WriteToOutput(out, thread_id, ypos, xstart, len, temp); + } else if (out.data_type_ == JXL_TYPE_FLOAT) { + float* JXL_RESTRICT temp = + reinterpret_cast(temp_out_[thread_id].get()); + StoreFloatRow(out, input, len, temp); + if (out.swap_endianness_) { + size_t output_len = len * out.num_channels_; + for (size_t j = 0; j < output_len; ++j) { + temp[j] = BSwapFloat(temp[j]); + } + } + WriteToOutput(out, thread_id, ypos, xstart, len, temp); + } + } + + void FlipX(const Output& out, size_t thread_id, size_t len, size_t* xstart, + const float** line_buffers) const { + float* temp_in[4]; + for (size_t c = 0; c < out.num_channels_; ++c) { + size_t tix = thread_id * main_.num_channels_ + c; + temp_in[c] = reinterpret_cast(temp_in_[tix].get()); + if (temp_in[c] != line_buffers[c]) { + memcpy(temp_in[c], line_buffers[c], sizeof(float) * len); + } + } + size_t last = (len - 1u); + size_t num = (len / 2); + for (size_t i = 0; i < num; ++i) { + for (size_t c = 0; c < out.num_channels_; ++c) { + std::swap(temp_in[c][i], temp_in[c][last - i]); + } + } + for (size_t c = 0; c < out.num_channels_; ++c) { + line_buffers[c] = temp_in[c]; + } + *xstart = width_ - *xstart - len; + } + + template + void StoreUnsignedRow(const Output& out, const float* input[4], size_t len, + T* output) const { + const HWY_FULL(float) d; + auto zero = Zero(d); + auto one = Set(d, 1.0f); + auto mul = Set(d, (1u << (out.bits_per_sample_)) - 1); + const Rebind du; + const size_t padding = RoundUpTo(len, Lanes(d)) - len; + for (size_t c = 0; c < out.num_channels_; ++c) { + msan::UnpoisonMemory(input[c] + len, sizeof(input[c][0]) * padding); + } + if (out.num_channels_ == 1) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul); + StoreU(DemoteTo(du, NearestInt(v0)), du, &output[i]); + } + } else if (out.num_channels_ == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul); + auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul); + StoreInterleaved2(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), du, &output[2 * i]); + } + } else if (out.num_channels_ == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul); + auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul); + auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul); + StoreInterleaved3(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), + DemoteTo(du, NearestInt(v2)), du, &output[3 * i]); + } + } else if (out.num_channels_ == 4) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = Mul(Clamp(zero, LoadU(d, &input[0][i]), one), mul); + auto v1 = Mul(Clamp(zero, LoadU(d, &input[1][i]), one), mul); + auto v2 = Mul(Clamp(zero, LoadU(d, &input[2][i]), one), mul); + auto v3 = Mul(Clamp(zero, LoadU(d, &input[3][i]), one), mul); + StoreInterleaved4(DemoteTo(du, NearestInt(v0)), + DemoteTo(du, NearestInt(v1)), + DemoteTo(du, NearestInt(v2)), + DemoteTo(du, NearestInt(v3)), du, &output[4 * i]); + } + } + msan::PoisonMemory(output + out.num_channels_ * len, + sizeof(output[0]) * out.num_channels_ * padding); + } + + void StoreFloat16Row(const Output& out, const float* input[4], size_t len, + uint16_t* output) const { + const HWY_FULL(float) d; + const Rebind du; + const Rebind df16; + const size_t padding = RoundUpTo(len, Lanes(d)) - len; + for (size_t c = 0; c < out.num_channels_; ++c) { + msan::UnpoisonMemory(input[c] + len, sizeof(input[c][0]) * padding); + } + if (out.num_channels_ == 1) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = LoadU(d, &input[0][i]); + StoreU(BitCast(du, DemoteTo(df16, v0)), du, &output[i]); + } + } else if (out.num_channels_ == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = LoadU(d, &input[0][i]); + auto v1 = LoadU(d, &input[1][i]); + StoreInterleaved2(BitCast(du, DemoteTo(df16, v0)), + BitCast(du, DemoteTo(df16, v1)), du, &output[2 * i]); + } + } else if (out.num_channels_ == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = LoadU(d, &input[0][i]); + auto v1 = LoadU(d, &input[1][i]); + auto v2 = LoadU(d, &input[2][i]); + StoreInterleaved3(BitCast(du, DemoteTo(df16, v0)), + BitCast(du, DemoteTo(df16, v1)), + BitCast(du, DemoteTo(df16, v2)), du, &output[3 * i]); + } + } else if (out.num_channels_ == 4) { + for (size_t i = 0; i < len; i += Lanes(d)) { + auto v0 = LoadU(d, &input[0][i]); + auto v1 = LoadU(d, &input[1][i]); + auto v2 = LoadU(d, &input[2][i]); + auto v3 = LoadU(d, &input[3][i]); + StoreInterleaved4(BitCast(du, DemoteTo(df16, v0)), + BitCast(du, DemoteTo(df16, v1)), + BitCast(du, DemoteTo(df16, v2)), + BitCast(du, DemoteTo(df16, v3)), du, &output[4 * i]); + } + } + msan::PoisonMemory(output + out.num_channels_ * len, + sizeof(output[0]) * out.num_channels_ * padding); + } + + void StoreFloatRow(const Output& out, const float* input[4], size_t len, + float* output) const { + const HWY_FULL(float) d; + if (out.num_channels_ == 1) { + memcpy(output, input[0], len * sizeof(output[0])); + } else if (out.num_channels_ == 2) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved2(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]), d, + &output[2 * i]); + } + } else if (out.num_channels_ == 3) { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved3(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]), + LoadU(d, &input[2][i]), d, &output[3 * i]); + } + } else { + for (size_t i = 0; i < len; i += Lanes(d)) { + StoreInterleaved4(LoadU(d, &input[0][i]), LoadU(d, &input[1][i]), + LoadU(d, &input[2][i]), LoadU(d, &input[3][i]), d, + &output[4 * i]); + } + } + } + + template + void WriteToOutput(const Output& out, size_t thread_id, size_t ypos, + size_t xstart, size_t len, T* output) const { + if (transpose_) { + // TODO(szabadka) Buffer 8x8 chunks and transpose with SIMD. + if (out.run_opaque_) { + for (size_t i = 0, j = 0; i < len; ++i, j += out.num_channels_) { + out.pixel_callback_.run(out.run_opaque_, thread_id, ypos, xstart + i, + 1, output + j); + } + } else { + const size_t pixel_stride = out.num_channels_ * sizeof(T); + const size_t offset = xstart * out.stride_ + ypos * pixel_stride; + for (size_t i = 0, j = 0; i < len; ++i, j += out.num_channels_) { + const size_t ix = offset + i * out.stride_; + JXL_DASSERT(ix + pixel_stride <= out.buffer_size_); + memcpy(reinterpret_cast(out.buffer_) + ix, output + j, + pixel_stride); + } + } + } else { + if (out.run_opaque_) { + out.pixel_callback_.run(out.run_opaque_, thread_id, xstart, ypos, len, + output); + } else { + const size_t pixel_stride = out.num_channels_ * sizeof(T); + const size_t offset = ypos * out.stride_ + xstart * pixel_stride; + JXL_DASSERT(offset + len * pixel_stride <= out.buffer_size_); + memcpy(reinterpret_cast(out.buffer_) + offset, output, + len * pixel_stride); + } + } + } + + static constexpr size_t kMaxPixelsPerCall = 1024; + size_t width_; + size_t height_; + Output main_; // color + alpha + size_t num_color_; + bool want_alpha_; + bool has_alpha_; + bool unpremul_alpha_; + size_t alpha_c_; + bool flip_x_; + bool flip_y_; + bool transpose_; + std::vector extra_channels_; + std::vector opaque_alpha_; + std::vector temp_in_; + std::vector temp_out_; +}; + +constexpr size_t WriteToOutputStage::kMaxPixelsPerCall; + +std::unique_ptr GetWriteToOutputStage( + const ImageOutput& main_output, size_t width, size_t height, bool has_alpha, + bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation, + std::vector& extra_output) { + return jxl::make_unique( + main_output, width, height, has_alpha, unpremul_alpha, alpha_c, + undo_orientation, extra_output); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE + +namespace jxl { + +HWY_EXPORT(GetWriteToOutputStage); + +namespace { +class WriteToImageBundleStage : public RenderPipelineStage { + public: + explicit WriteToImageBundleStage(ImageBundle* image_bundle, + ColorEncoding color_encoding) + : RenderPipelineStage(RenderPipelineStage::Settings()), + image_bundle_(image_bundle), + color_encoding_(std::move(color_encoding)) {} + + void SetInputSizes( + const std::vector>& input_sizes) override { +#if JXL_ENABLE_ASSERT + JXL_ASSERT(input_sizes.size() >= 3); + for (size_t c = 1; c < input_sizes.size(); c++) { + JXL_ASSERT(input_sizes[c].first == input_sizes[0].first); + JXL_ASSERT(input_sizes[c].second == input_sizes[0].second); + } +#endif + // TODO(eustas): what should we do in the case of "want only ECs"? + image_bundle_->SetFromImage( + Image3F(input_sizes[0].first, input_sizes[0].second), color_encoding_); + // TODO(veluca): consider not reallocating ECs if not needed. + image_bundle_->extra_channels().clear(); + for (size_t c = 3; c < input_sizes.size(); c++) { + image_bundle_->extra_channels().emplace_back(input_sizes[c].first, + input_sizes[c].second); + } + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + for (size_t c = 0; c < 3; c++) { + memcpy(image_bundle_->color()->PlaneRow(c, ypos) + xpos - xextra, + GetInputRow(input_rows, c, 0) - xextra, + sizeof(float) * (xsize + 2 * xextra)); + } + for (size_t ec = 0; ec < image_bundle_->extra_channels().size(); ec++) { + JXL_ASSERT(image_bundle_->extra_channels()[ec].xsize() >= + xpos + xsize + xextra); + memcpy(image_bundle_->extra_channels()[ec].Row(ypos) + xpos - xextra, + GetInputRow(input_rows, 3 + ec, 0) - xextra, + sizeof(float) * (xsize + 2 * xextra)); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return RenderPipelineChannelMode::kInput; + } + + const char* GetName() const override { return "WriteIB"; } + + private: + ImageBundle* image_bundle_; + ColorEncoding color_encoding_; +}; + +class WriteToImage3FStage : public RenderPipelineStage { + public: + explicit WriteToImage3FStage(Image3F* image) + : RenderPipelineStage(RenderPipelineStage::Settings()), image_(image) {} + + void SetInputSizes( + const std::vector>& input_sizes) override { +#if JXL_ENABLE_ASSERT + JXL_ASSERT(input_sizes.size() >= 3); + for (size_t c = 1; c < 3; ++c) { + JXL_ASSERT(input_sizes[c].first == input_sizes[0].first); + JXL_ASSERT(input_sizes[c].second == input_sizes[0].second); + } +#endif + *image_ = Image3F(input_sizes[0].first, input_sizes[0].second); + } + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + for (size_t c = 0; c < 3; c++) { + memcpy(image_->PlaneRow(c, ypos) + xpos - xextra, + GetInputRow(input_rows, c, 0) - xextra, + sizeof(float) * (xsize + 2 * xextra)); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInput + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "WriteI3F"; } + + private: + Image3F* image_; +}; + +} // namespace + +std::unique_ptr GetWriteToImageBundleStage( + ImageBundle* image_bundle, ColorEncoding color_encoding) { + return jxl::make_unique(image_bundle, + std::move(color_encoding)); +} + +std::unique_ptr GetWriteToImage3FStage(Image3F* image) { + return jxl::make_unique(image); +} + +std::unique_ptr GetWriteToOutputStage( + const ImageOutput& main_output, size_t width, size_t height, bool has_alpha, + bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation, + std::vector& extra_output) { + return HWY_DYNAMIC_DISPATCH(GetWriteToOutputStage)( + main_output, width, height, has_alpha, unpremul_alpha, alpha_c, + undo_orientation, extra_output); +} + +} // namespace jxl + +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.h new file mode 100644 index 0000000000..c5f844ebe8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_write.h @@ -0,0 +1,31 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_ + +#include + +#include "lib/jxl/dec_cache.h" +#include "lib/jxl/image_bundle.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +std::unique_ptr GetWriteToImageBundleStage( + ImageBundle* image_bundle, ColorEncoding color_encoding); + +// Gets a stage to write color channels to an Image3F. +std::unique_ptr GetWriteToImage3FStage(Image3F* image); + +// Gets a stage to write to a pixel callback or image buffer. +std::unique_ptr GetWriteToOutputStage( + const ImageOutput& main_output, size_t width, size_t height, bool has_alpha, + bool unpremul_alpha, size_t alpha_c, Orientation undo_orientation, + std::vector& extra_output); + +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_WRITE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.cc new file mode 100644 index 0000000000..093f3d1b4d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.cc @@ -0,0 +1,176 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_xyb.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_xyb.cc" +#include +#include + +#include "lib/jxl/dec_xyb-inl.h" +#include "lib/jxl/opsin_params.h" +#include "lib/jxl/sanitizers.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +class XYBStage : public RenderPipelineStage { + public: + explicit XYBStage(const OutputEncodingInfo& output_encoding_info) + : RenderPipelineStage(RenderPipelineStage::Settings()), + opsin_params_(output_encoding_info.opsin_params), + output_is_xyb_(output_encoding_info.color_encoding.GetColorSpace() == + ColorSpace::kXYB) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + const HWY_FULL(float) d; + JXL_ASSERT(xextra == 0); + const size_t xsize_v = RoundUpTo(xsize, Lanes(d)); + float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0); + // All calculations are lane-wise, still some might require + // value-dependent behaviour (e.g. NearestInt). Temporary unpoison last + // vector tail. + msan::UnpoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::UnpoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + // TODO(eustas): when using frame origin, addresses might be unaligned; + // making them aligned will void performance penalty. + if (output_is_xyb_) { + const auto scale_x = Set(d, kScaledXYBScale[0]); + const auto scale_y = Set(d, kScaledXYBScale[1]); + const auto scale_bmy = Set(d, kScaledXYBScale[2]); + const auto offset_x = Set(d, kScaledXYBOffset[0]); + const auto offset_y = Set(d, kScaledXYBOffset[1]); + const auto offset_bmy = Set(d, kScaledXYBOffset[2]); + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + const auto in_x = LoadU(d, row0 + x); + const auto in_y = LoadU(d, row1 + x); + const auto in_b = LoadU(d, row2 + x); + auto out_x = Mul(Add(in_x, offset_x), scale_x); + auto out_y = Mul(Add(in_y, offset_y), scale_y); + auto out_b = Mul(Add(Sub(in_b, in_y), offset_bmy), scale_bmy); + StoreU(out_x, d, row0 + x); + StoreU(out_y, d, row1 + x); + StoreU(out_b, d, row2 + x); + } + } else { + for (ssize_t x = -xextra; x < (ssize_t)(xsize + xextra); x += Lanes(d)) { + const auto in_opsin_x = LoadU(d, row0 + x); + const auto in_opsin_y = LoadU(d, row1 + x); + const auto in_opsin_b = LoadU(d, row2 + x); + auto r = Undefined(d); + auto g = Undefined(d); + auto b = Undefined(d); + XybToRgb(d, in_opsin_x, in_opsin_y, in_opsin_b, opsin_params_, &r, &g, + &b); + StoreU(r, d, row0 + x); + StoreU(g, d, row1 + x); + StoreU(b, d, row2 + x); + } + } + msan::PoisonMemory(row0 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row1 + xsize, sizeof(float) * (xsize_v - xsize)); + msan::PoisonMemory(row2 + xsize, sizeof(float) * (xsize_v - xsize)); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "XYB"; } + + private: + const OpsinParams opsin_params_; + const bool output_is_xyb_; +}; + +std::unique_ptr GetXYBStage( + const OutputEncodingInfo& output_encoding_info) { + return jxl::make_unique(output_encoding_info); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetXYBStage); + +std::unique_ptr GetXYBStage( + const OutputEncodingInfo& output_encoding_info) { + return HWY_DYNAMIC_DISPATCH(GetXYBStage)(output_encoding_info); +} + +#if !JXL_HIGH_PRECISION +namespace { +class FastXYBStage : public RenderPipelineStage { + public: + FastXYBStage(uint8_t* rgb, size_t stride, size_t width, size_t height, + bool rgba, bool has_alpha, size_t alpha_c) + : RenderPipelineStage(RenderPipelineStage::Settings()), + rgb_(rgb), + stride_(stride), + width_(width), + height_(height), + rgba_(rgba), + has_alpha_(has_alpha), + alpha_c_(alpha_c) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + if (ypos >= height_) return; + JXL_ASSERT(xextra == 0); + const float* xyba[4] = { + GetInputRow(input_rows, 0, 0), GetInputRow(input_rows, 1, 0), + GetInputRow(input_rows, 2, 0), + has_alpha_ ? GetInputRow(input_rows, alpha_c_, 0) : nullptr}; + uint8_t* out_buf = rgb_ + stride_ * ypos + (rgba_ ? 4 : 3) * xpos; + FastXYBTosRGB8(xyba, out_buf, rgba_, + xsize + xpos <= width_ ? xsize : width_ - xpos); + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 || (has_alpha_ && c == alpha_c_) + ? RenderPipelineChannelMode::kInput + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "FastXYB"; } + + private: + uint8_t* rgb_; + size_t stride_; + size_t width_; + size_t height_; + bool rgba_; + bool has_alpha_; + size_t alpha_c_; + std::vector opaque_alpha_; +}; + +} // namespace + +std::unique_ptr GetFastXYBTosRGB8Stage( + uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba, + bool has_alpha, size_t alpha_c) { + JXL_ASSERT(HasFastXYBTosRGB8()); + return make_unique(rgb, stride, width, height, rgba, has_alpha, + alpha_c); +} +#endif + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.h new file mode 100644 index 0000000000..7b06345c36 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_xyb.h @@ -0,0 +1,26 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_ +#include + +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Converts the color channels from XYB to linear with appropriate primaries. +std::unique_ptr GetXYBStage( + const OutputEncodingInfo& output_encoding_info); + +// Gets a stage to convert with fixed point arithmetic from XYB to sRGB8 and +// write to a uint8 buffer. +std::unique_ptr GetFastXYBTosRGB8Stage( + uint8_t* rgb, size_t stride, size_t width, size_t height, bool rgba, + bool has_alpha, size_t alpha_c); +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_XYB_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.cc b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.cc new file mode 100644 index 0000000000..30ad327221 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.cc @@ -0,0 +1,83 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/render_pipeline/stage_ycbcr.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/render_pipeline/stage_ycbcr.cc" +#include +#include + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::MulAdd; + +class kYCbCrStage : public RenderPipelineStage { + public: + kYCbCrStage() : RenderPipelineStage(RenderPipelineStage::Settings()) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + const HWY_FULL(float) df; + + // Full-range BT.601 as defined by JFIF Clause 7: + // https://www.itu.int/rec/T-REC-T.871-201105-I/en + const auto c128 = Set(df, 128.0f / 255); + const auto crcr = Set(df, 1.402f); + const auto cgcb = Set(df, -0.114f * 1.772f / 0.587f); + const auto cgcr = Set(df, -0.299f * 1.402f / 0.587f); + const auto cbcb = Set(df, 1.772f); + + float* JXL_RESTRICT row0 = GetInputRow(input_rows, 0, 0); + float* JXL_RESTRICT row1 = GetInputRow(input_rows, 1, 0); + float* JXL_RESTRICT row2 = GetInputRow(input_rows, 2, 0); + // TODO(eustas): when using frame origin, addresses might be unaligned; + // making them aligned will void performance penalty. + for (size_t x = 0; x < xsize; x += Lanes(df)) { + const auto y_vec = Add(LoadU(df, row1 + x), c128); + const auto cb_vec = LoadU(df, row0 + x); + const auto cr_vec = LoadU(df, row2 + x); + const auto r_vec = MulAdd(crcr, cr_vec, y_vec); + const auto g_vec = MulAdd(cgcr, cr_vec, MulAdd(cgcb, cb_vec, y_vec)); + const auto b_vec = MulAdd(cbcb, cb_vec, y_vec); + StoreU(r_vec, df, row0 + x); + StoreU(g_vec, df, row1 + x); + StoreU(b_vec, df, row2 + x); + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return c < 3 ? RenderPipelineChannelMode::kInPlace + : RenderPipelineChannelMode::kIgnored; + } + + const char* GetName() const override { return "YCbCr"; } +}; + +std::unique_ptr GetYCbCrStage() { + return jxl::make_unique(); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +HWY_EXPORT(GetYCbCrStage); + +std::unique_ptr GetYCbCrStage() { + return HWY_DYNAMIC_DISPATCH(GetYCbCrStage)(); +} + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.h new file mode 100644 index 0000000000..9320c9723f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/stage_ycbcr.h @@ -0,0 +1,25 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_ +#define LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_ +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/dec_xyb.h" +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +// Converts the color channels from YCbCr to RGB. +std::unique_ptr GetYCbCrStage(); +} // namespace jxl + +#endif // LIB_JXL_RENDER_PIPELINE_STAGE_YCBCR_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/render_pipeline/test_render_pipeline_stages.h b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/test_render_pipeline_stages.h new file mode 100644 index 0000000000..789a52f8b2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/render_pipeline/test_render_pipeline_stages.h @@ -0,0 +1,101 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include + +#include +#include +#include + +#include "lib/jxl/render_pipeline/render_pipeline_stage.h" + +namespace jxl { + +class UpsampleXSlowStage : public RenderPipelineStage { + public: + UpsampleXSlowStage() + : RenderPipelineStage(RenderPipelineStage::Settings::ShiftX(1, 1)) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + for (size_t c = 0; c < input_rows.size(); c++) { + const float* row = GetInputRow(input_rows, c, 0); + float* row_out = GetOutputRow(output_rows, c, 0); + for (int64_t x = -xextra; x < (int64_t)(xsize + xextra); x++) { + float xp = *(row + x - 1); + float xc = *(row + x); + float xn = *(row + x + 1); + float xout0 = xp * 0.25f + xc * 0.75f; + float xout1 = xc * 0.75f + xn * 0.25f; + *(row_out + 2 * x + 0) = xout0; + *(row_out + 2 * x + 1) = xout1; + } + } + } + + const char* GetName() const override { return "TEST::UpsampleXSlowStage"; } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return RenderPipelineChannelMode::kInOut; + } +}; + +class UpsampleYSlowStage : public RenderPipelineStage { + public: + UpsampleYSlowStage() + : RenderPipelineStage(RenderPipelineStage::Settings::ShiftY(1, 1)) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + for (size_t c = 0; c < input_rows.size(); c++) { + const float* rowp = GetInputRow(input_rows, c, -1); + const float* rowc = GetInputRow(input_rows, c, 0); + const float* rown = GetInputRow(input_rows, c, 1); + float* row_out0 = GetOutputRow(output_rows, c, 0); + float* row_out1 = GetOutputRow(output_rows, c, 1); + for (int64_t x = -xextra; x < (int64_t)(xsize + xextra); x++) { + float xp = *(rowp + x); + float xc = *(rowc + x); + float xn = *(rown + x); + float yout0 = xp * 0.25f + xc * 0.75f; + float yout1 = xc * 0.75f + xn * 0.25f; + *(row_out0 + x) = yout0; + *(row_out1 + x) = yout1; + } + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return RenderPipelineChannelMode::kInOut; + } + + const char* GetName() const override { return "TEST::UpsampleYSlowStage"; } +}; + +class Check0FinalStage : public RenderPipelineStage { + public: + Check0FinalStage() : RenderPipelineStage(RenderPipelineStage::Settings()) {} + + void ProcessRow(const RowInfo& input_rows, const RowInfo& output_rows, + size_t xextra, size_t xsize, size_t xpos, size_t ypos, + size_t thread_id) const final { + for (size_t c = 0; c < input_rows.size(); c++) { + for (size_t x = 0; x < xsize; x++) { + JXL_CHECK(fabsf(GetInputRow(input_rows, c, 0)[x]) < 1e-8); + } + } + } + + RenderPipelineChannelMode GetChannelMode(size_t c) const final { + return RenderPipelineChannelMode::kInput; + } + const char* GetName() const override { return "TEST::Check0FinalStage"; } +}; + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/roundtrip_test.cc b/third-party/libjxl/libjxl/lib/jxl/roundtrip_test.cc new file mode 100644 index 0000000000..7640ca7c5f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/roundtrip_test.cc @@ -0,0 +1,833 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include +#include +#include + +#include // std::abs +#include +#include +#include +#include + +#include "lib/extras/codec.h" +#include "lib/jxl/dec_external_image.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_comparator.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/encode_internal.h" +#include "lib/jxl/icc_codec.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace { + +// Converts a test image to a CodecInOut. +// icc_profile can be empty to automatically deduce profile from the pixel +// format, or filled in to force this ICC profile +jxl::CodecInOut ConvertTestImage(const std::vector& buf, + const size_t xsize, const size_t ysize, + const JxlPixelFormat& pixel_format, + const jxl::PaddedBytes& icc_profile) { + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + + bool is_gray = pixel_format.num_channels < 3; + bool has_alpha = + pixel_format.num_channels == 2 || pixel_format.num_channels == 4; + + io.metadata.m.color_encoding.SetColorSpace(is_gray ? jxl::ColorSpace::kGray + : jxl::ColorSpace::kRGB); + if (has_alpha) { + // Note: alpha > 16 not yet supported by the C++ codec + switch (pixel_format.data_type) { + case JXL_TYPE_UINT8: + io.metadata.m.SetAlphaBits(8); + break; + case JXL_TYPE_UINT16: + case JXL_TYPE_FLOAT: + case JXL_TYPE_FLOAT16: + io.metadata.m.SetAlphaBits(16); + break; + default: + ADD_FAILURE() << "Roundtrip tests for data type " + << pixel_format.data_type << " not yet implemented."; + } + } + size_t bitdepth = 0; + switch (pixel_format.data_type) { + case JXL_TYPE_FLOAT: + bitdepth = 32; + io.metadata.m.SetFloat32Samples(); + break; + case JXL_TYPE_FLOAT16: + bitdepth = 16; + io.metadata.m.SetFloat16Samples(); + break; + case JXL_TYPE_UINT8: + bitdepth = 8; + io.metadata.m.SetUintSamples(8); + break; + case JXL_TYPE_UINT16: + bitdepth = 16; + io.metadata.m.SetUintSamples(16); + break; + default: + ADD_FAILURE() << "Roundtrip tests for data type " + << pixel_format.data_type << " not yet implemented."; + } + jxl::ColorEncoding color_encoding; + if (!icc_profile.empty()) { + jxl::PaddedBytes icc_profile_copy(icc_profile); + EXPECT_TRUE( + color_encoding.SetICC(std::move(icc_profile_copy), &jxl::GetJxlCms())); + } else if (pixel_format.data_type == JXL_TYPE_FLOAT) { + color_encoding = jxl::ColorEncoding::LinearSRGB(is_gray); + } else { + color_encoding = jxl::ColorEncoding::SRGB(is_gray); + } + EXPECT_TRUE( + ConvertFromExternal(jxl::Span(buf.data(), buf.size()), + xsize, ysize, color_encoding, + /*bits_per_sample=*/bitdepth, pixel_format, + /*pool=*/nullptr, &io.Main())); + return io; +} + +template +T ConvertTestPixel(float val); + +template <> +float ConvertTestPixel(const float val) { + return val; +} + +template <> +uint16_t ConvertTestPixel(const float val) { + return (uint16_t)(val * UINT16_MAX); +} + +template <> +uint8_t ConvertTestPixel(const float val) { + return (uint8_t)(val * UINT8_MAX); +} + +// Returns a test image. +template +std::vector GetTestImage(const size_t xsize, const size_t ysize, + const JxlPixelFormat& pixel_format) { + std::vector pixels(xsize * ysize * pixel_format.num_channels); + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + for (size_t chan = 0; chan < pixel_format.num_channels; chan++) { + float val; + switch (chan % 4) { + case 0: + val = static_cast(y) / static_cast(ysize); + break; + case 1: + val = static_cast(x) / static_cast(xsize); + break; + case 2: + val = static_cast(x + y) / static_cast(xsize + ysize); + break; + case 3: + val = static_cast(x * y) / static_cast(xsize * ysize); + break; + } + pixels[(y * xsize + x) * pixel_format.num_channels + chan] = + ConvertTestPixel(val); + } + } + } + std::vector bytes(pixels.size() * sizeof(T)); + memcpy(bytes.data(), pixels.data(), sizeof(T) * pixels.size()); + return bytes; +} + +void EncodeWithEncoder(JxlEncoder* enc, std::vector* compressed) { + compressed->resize(64); + uint8_t* next_out = compressed->data(); + size_t avail_out = compressed->size() - (next_out - compressed->data()); + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + process_result = JxlEncoderProcessOutput(enc, &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed->data(); + compressed->resize(compressed->size() * 2); + next_out = compressed->data() + offset; + avail_out = compressed->size() - offset; + } + } + compressed->resize(next_out - compressed->data()); + EXPECT_EQ(JXL_ENC_SUCCESS, process_result); +} + +// Generates some pixels using some dimensions and pixel_format, +// compresses them, and verifies that the decoded version is similar to the +// original pixels. +// TODO(firsching): change this to be a parameterized test, like in +// decode_test.cc +template +void VerifyRoundtripCompression( + const size_t xsize, const size_t ysize, + const JxlPixelFormat& input_pixel_format, + const JxlPixelFormat& output_pixel_format, const bool lossless, + const bool use_container, const uint32_t resampling = 1, + const bool already_downsampled = false, + const std::vector>& + extra_channels = {}) { + size_t orig_xsize = xsize; + size_t orig_ysize = ysize; + if (already_downsampled) { + orig_xsize = jxl::DivCeil(xsize, resampling); + orig_ysize = jxl::DivCeil(ysize, resampling); + } + + JxlPixelFormat extra_channel_pixel_format = input_pixel_format; + extra_channel_pixel_format.num_channels = 1; + const std::vector extra_channel_bytes = + GetTestImage(xsize, ysize, extra_channel_pixel_format); + const std::vector original_bytes = + GetTestImage(orig_xsize, orig_ysize, input_pixel_format); + jxl::CodecInOut original_io = ConvertTestImage( + original_bytes, orig_xsize, orig_ysize, input_pixel_format, {}); + + JxlEncoder* enc = JxlEncoderCreate(nullptr); + EXPECT_NE(nullptr, enc); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, use_container)); + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &input_pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = lossless; + uint32_t num_channels = input_pixel_format.num_channels; + size_t has_interleaved_alpha = num_channels == 2 || num_channels == 4; + JxlPixelFormat output_pixel_format_with_extra_channel_alpha = + output_pixel_format; + + // In the case where we have an alpha channel, but it is provided as an extra + // channel and not interleaved, we do two things here: + // 1. modify the original_io to have the correct alpha channel + // 2. change the output_format_with_extra_alpha to have an alpha channel + bool alpha_in_extra_channels_vector = false; + for (const auto& extra_channel : extra_channels) { + if (extra_channel.first == JXL_CHANNEL_ALPHA) { + alpha_in_extra_channels_vector = true; + } + } + if (alpha_in_extra_channels_vector && !has_interleaved_alpha) { + jxl::ImageF alpha_channel(xsize, ysize); + EXPECT_TRUE(jxl::ConvertFromExternal( + jxl::Span(extra_channel_bytes.data(), + extra_channel_bytes.size()), + xsize, ysize, basic_info.bits_per_sample, extra_channel_pixel_format, 0, + /*pool=*/nullptr, &alpha_channel)); + + original_io.metadata.m.SetAlphaBits(basic_info.bits_per_sample); + original_io.Main().SetAlpha(std::move(alpha_channel)); + output_pixel_format_with_extra_channel_alpha.num_channels++; + } + // Those are the num_extra_channels including a potential alpha channel. + basic_info.num_extra_channels = extra_channels.size() + has_interleaved_alpha; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info)); + EXPECT_EQ(enc->metadata.m.num_extra_channels, + extra_channels.size() + has_interleaved_alpha); + JxlColorEncoding color_encoding; + if (input_pixel_format.data_type == JXL_TYPE_FLOAT) { + JxlColorEncodingSetToLinearSRGB( + &color_encoding, + /*is_gray=*/input_pixel_format.num_channels < 3); + } else { + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/input_pixel_format.num_channels < 3); + } + + std::vector channel_infos; + for (const auto& extra_channel : extra_channels) { + auto channel_type = extra_channel.first; + JxlExtraChannelInfo channel_info; + JxlEncoderInitExtraChannelInfo(channel_type, &channel_info); + channel_info.bits_per_sample = (lossless ? basic_info.bits_per_sample : 8); + channel_info.exponent_bits_per_sample = + (lossless ? basic_info.exponent_bits_per_sample : 0); + channel_infos.push_back(channel_info); + } + for (size_t index = 0; index < channel_infos.size(); index++) { + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetExtraChannelInfo(enc, index + has_interleaved_alpha, + &channel_infos[index])); + std::string name = extra_channels[index].second; + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetExtraChannelName(enc, index + has_interleaved_alpha, + name.c_str(), name.length())); + } + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc, nullptr); + JxlEncoderSetFrameLossless(frame_settings, lossless); + if (resampling > 1) { + EXPECT_EQ( + JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_RESAMPLING, resampling)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderFrameSettingsSetOption( + frame_settings, JXL_ENC_FRAME_SETTING_ALREADY_DOWNSAMPLED, + already_downsampled)); + } + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &input_pixel_format, + (void*)original_bytes.data(), + original_bytes.size())); + EXPECT_EQ(frame_settings->enc->input_queue.back() + .frame->frame.extra_channels() + .size(), + has_interleaved_alpha + extra_channels.size()); + EXPECT_EQ(frame_settings->enc->input_queue.empty(), false); + for (size_t index = 0; index < channel_infos.size(); index++) { + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetExtraChannelBuffer( + frame_settings, &extra_channel_pixel_format, + (void*)extra_channel_bytes.data(), extra_channel_bytes.size(), + index + has_interleaved_alpha)); + } + JxlEncoderCloseInput(enc); + EXPECT_EQ(frame_settings->enc->input_queue.back() + .frame->frame.extra_channels() + .size(), + has_interleaved_alpha + extra_channels.size()); + std::vector compressed; + EncodeWithEncoder(enc, &compressed); + JxlEncoderDestroy(enc); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_NE(nullptr, dec); + + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | + JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE)); + + JxlDecoderSetInput(dec, next_in, avail_in); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ( + JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize( + dec, &output_pixel_format_with_extra_channel_alpha, &buffer_size)); + if (&input_pixel_format == &output_pixel_format_with_extra_channel_alpha && + !already_downsampled) { + EXPECT_EQ(buffer_size, original_bytes.size()); + } + + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + EXPECT_EQ(extra_channels.size() + has_interleaved_alpha, + info.num_extra_channels); + + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + + size_t icc_profile_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA, + &icc_profile_size)); + jxl::PaddedBytes icc_profile(icc_profile_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, + icc_profile.data(), icc_profile.size())); + + std::vector decoded_bytes(buffer_size); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer( + dec, &output_pixel_format_with_extra_channel_alpha, + decoded_bytes.data(), decoded_bytes.size())); + std::vector> extra_channel_decoded_bytes( + info.num_extra_channels - has_interleaved_alpha); + + for (size_t index = has_interleaved_alpha; index < info.num_extra_channels; + index++) { + JxlExtraChannelInfo channel_info; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetExtraChannelInfo(dec, index, &channel_info)); + EXPECT_EQ(channel_info.type, + extra_channels[index - has_interleaved_alpha].first); + std::string input_name = + extra_channels[index - has_interleaved_alpha].second; + const size_t name_length = channel_info.name_length; + EXPECT_EQ(input_name.size(), name_length); + std::vector output_name(name_length + 1); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetExtraChannelName(dec, index, output_name.data(), + output_name.size())); + EXPECT_EQ(0, + memcmp(input_name.data(), output_name.data(), input_name.size())); + size_t extra_buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderExtraChannelBufferSize(dec, &output_pixel_format, + &extra_buffer_size, index)); + std::vector extra_decoded_bytes(extra_buffer_size); + extra_channel_decoded_bytes[index - has_interleaved_alpha] = + std::move(extra_decoded_bytes); + EXPECT_EQ( + JXL_DEC_SUCCESS, + JxlDecoderSetExtraChannelBuffer( + dec, &output_pixel_format, + extra_channel_decoded_bytes[index - has_interleaved_alpha].data(), + extra_channel_decoded_bytes[index - has_interleaved_alpha].size(), + index)); + } + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + // Check if there are no further errors after getting the full image, e.g. + // check that the final codestream box is actually marked as last. + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderProcessInput(dec)); + + JxlDecoderDestroy(dec); + + jxl::CodecInOut decoded_io = ConvertTestImage( + decoded_bytes, xsize, ysize, output_pixel_format_with_extra_channel_alpha, + icc_profile); + + if (already_downsampled) { + jxl::Image3F* color = decoded_io.Main().color(); + jxl::DownsampleImage(color, resampling); + if (decoded_io.Main().HasAlpha()) { + jxl::ImageF* alpha = decoded_io.Main().alpha(); + jxl::DownsampleImage(alpha, resampling); + } + decoded_io.SetSize(color->xsize(), color->ysize()); + } + + if (lossless && !already_downsampled) { + JXL_EXPECT_OK(jxl::SamePixels(*original_io.Main().color(), + *decoded_io.Main().color(), _)); + } else { + jxl::ButteraugliParams ba; + float butteraugli_score = ButteraugliDistance( + original_io.frames, decoded_io.frames, ba, jxl::GetJxlCms(), + /*distmap=*/nullptr, nullptr); + EXPECT_LE(butteraugli_score, 2.0f); + } + JxlPixelFormat extra_channel_output_pixel_format = output_pixel_format; + extra_channel_output_pixel_format.num_channels = 1; + for (auto& extra_channel : extra_channel_decoded_bytes) { + EXPECT_EQ(extra_channel.size(), extra_channel_bytes.size()); + if (lossless) { + EXPECT_EQ(jxl::test::ComparePixels(extra_channel.data(), + extra_channel_bytes.data(), xsize, + ysize, extra_channel_pixel_format, + extra_channel_output_pixel_format), + 0u); + EXPECT_EQ(extra_channel, extra_channel_bytes); + } + } +} + +} // namespace + +TEST(RoundtripTest, FloatFrameRoundtripTest) { + std::vector>> + extra_channels_cases = {{}, + {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}}, + {{JXL_CHANNEL_CFA, "my cfa channel"}}, + {{JXL_CHANNEL_DEPTH, "depth"}, + {JXL_CHANNEL_SELECTION_MASK, "mask"}, + {JXL_CHANNEL_BLACK, "black"}, + {JXL_CHANNEL_CFA, "my cfa channel"}, + {JXL_CHANNEL_OPTIONAL, "optional channel"}}, + {{JXL_CHANNEL_DEPTH, "very deep"}}}; + for (int use_container = 0; use_container < 2; use_container++) { + for (int lossless = 0; lossless < 2; lossless++) { + for (uint32_t num_channels = 1; num_channels < 5; num_channels++) { + for (auto& extra_channels : extra_channels_cases) { + uint32_t has_alpha = static_cast(num_channels % 2 == 0); + uint32_t total_extra_channels = has_alpha + extra_channels.size(); + // There's no support (yet) for lossless extra float + // channels, so we don't test it. + if (total_extra_channels == 0 || !lossless) { + JxlPixelFormat pixel_format = JxlPixelFormat{ + num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}; + VerifyRoundtripCompression( + 63, 129, pixel_format, pixel_format, (bool)lossless, + (bool)use_container, 1, false, extra_channels); + } + } + } + } + } +} + +TEST(RoundtripTest, Uint16FrameRoundtripTest) { + std::vector>> + extra_channels_cases = {{}, + {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}}, + {{JXL_CHANNEL_CFA, "my cfa channel"}}, + {{JXL_CHANNEL_CFA, "my cfa channel"}, + {JXL_CHANNEL_BLACK, "k_channel"}}, + {{JXL_CHANNEL_DEPTH, "very deep"}}}; + for (int use_container = 0; use_container < 2; use_container++) { + for (int lossless = 0; lossless < 2; lossless++) { + for (uint32_t num_channels = 1; num_channels < 5; num_channels++) { + for (auto& extra_channels : extra_channels_cases) { + JxlPixelFormat pixel_format = JxlPixelFormat{ + num_channels, JXL_TYPE_UINT16, JXL_NATIVE_ENDIAN, 0}; + VerifyRoundtripCompression( + 63, 129, pixel_format, pixel_format, (bool)lossless, + (bool)use_container, 1, false, extra_channels); + } + } + } + } +} + +TEST(RoundtripTest, Uint8FrameRoundtripTest) { + std::vector>> + extra_channels_cases = {{}, + {{JXL_CHANNEL_THERMAL, "temperature"}}, + {{JXL_CHANNEL_ALPHA, "my extra alpha channel"}}, + {{JXL_CHANNEL_CFA, "my cfa channel"}}, + {{JXL_CHANNEL_CFA, "my cfa channel"}, + {JXL_CHANNEL_BLACK, "k_channel"}}, + {{JXL_CHANNEL_DEPTH, "very deep"}}}; + for (int use_container = 0; use_container < 2; use_container++) { + for (int lossless = 0; lossless < 2; lossless++) { + for (uint32_t num_channels = 1; num_channels < 5; num_channels++) { + for (auto& extra_channels : extra_channels_cases) { + JxlPixelFormat pixel_format = JxlPixelFormat{ + num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + VerifyRoundtripCompression( + 63, 129, pixel_format, pixel_format, (bool)lossless, + (bool)use_container, 1, false, extra_channels); + } + } + } + } +} + +TEST(RoundtripTest, TestNonlinearSrgbAsXybEncoded) { + for (int use_container = 0; use_container < 2; use_container++) { + for (uint32_t num_channels = 1; num_channels < 5; num_channels++) { + JxlPixelFormat pixel_format_in = + JxlPixelFormat{num_channels, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + JxlPixelFormat pixel_format_out = + JxlPixelFormat{num_channels, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}; + VerifyRoundtripCompression( + 63, 129, pixel_format_in, pixel_format_out, + /*lossless=*/false, (bool)use_container, {}); + } + } +} + +TEST(RoundtripTest, Resampling) { + JxlPixelFormat pixel_format = + JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + VerifyRoundtripCompression(63, 129, pixel_format, pixel_format, + /*lossless=*/false, + /*use_container=*/false, 2, + /*already_downsampled=*/false); + + // TODO(lode): also make this work for odd sizes. This requires a fix in + // enc_frame.cc to not set custom_size_or_origin to true due to even/odd + // mismatch. + VerifyRoundtripCompression(64, 128, pixel_format, pixel_format, + /*lossless=*/true, + /*use_container=*/false, 2, + /*already_downsampled=*/true); +} + +TEST(RoundtripTest, ExtraBoxesTest) { + JxlPixelFormat pixel_format = + JxlPixelFormat{4, JXL_TYPE_FLOAT, JXL_NATIVE_ENDIAN, 0}; + const size_t xsize = 61; + const size_t ysize = 71; + + const std::vector original_bytes = + GetTestImage(xsize, ysize, pixel_format); + jxl::CodecInOut original_io = + ConvertTestImage(original_bytes, xsize, ysize, pixel_format, {}); + + JxlEncoder* enc = JxlEncoderCreate(nullptr); + EXPECT_NE(nullptr, enc); + + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc, true)); + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &pixel_format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = false; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetCodestreamLevel(enc, 10)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info)); + JxlColorEncoding color_encoding; + if (pixel_format.data_type == JXL_TYPE_FLOAT) { + JxlColorEncodingSetToLinearSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + } else { + JxlColorEncodingSetToSRGB(&color_encoding, + /*is_gray=*/pixel_format.num_channels < 3); + } + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetColorEncoding(enc, &color_encoding)); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc, nullptr); + JxlEncoderSetFrameLossless(frame_settings, false); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &pixel_format, + (void*)original_bytes.data(), + original_bytes.size())); + JxlEncoderCloseInput(enc); + + std::vector compressed; + EncodeWithEncoder(enc, &compressed); + JxlEncoderDestroy(enc); + + std::vector extra_data(1023); + jxl::AppendBoxHeader(jxl::MakeBoxType("crud"), extra_data.size(), false, + &compressed); + compressed.insert(compressed.end(), extra_data.begin(), extra_data.end()); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_NE(nullptr, dec); + + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | + JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE)); + + JxlDecoderSetInput(dec, next_in, avail_in); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &pixel_format, &buffer_size)); + EXPECT_EQ(buffer_size, original_bytes.size()); + + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + + size_t icc_profile_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_DATA, + &icc_profile_size)); + jxl::PaddedBytes icc_profile(icc_profile_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_DATA, + icc_profile.data(), icc_profile.size())); + + std::vector decoded_bytes(buffer_size); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderSetImageOutBuffer(dec, &pixel_format, + decoded_bytes.data(), + decoded_bytes.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + + JxlDecoderDestroy(dec); + + jxl::CodecInOut decoded_io = + ConvertTestImage(decoded_bytes, xsize, ysize, pixel_format, icc_profile); + + jxl::ButteraugliParams ba; + float butteraugli_score = ButteraugliDistance( + original_io.frames, decoded_io.frames, ba, jxl::GetJxlCms(), + /*distmap=*/nullptr, nullptr); + EXPECT_LE(butteraugli_score, 2.0f); +} + +static const unsigned char kEncodedTestProfile[] = { + 0x1f, 0x8b, 0x1, 0x13, 0x10, 0x0, 0x0, 0x0, 0x20, 0x4c, 0xcc, 0x3, + 0xe7, 0xa0, 0xa5, 0xa2, 0x90, 0xa4, 0x27, 0xe8, 0x79, 0x1d, 0xe3, 0x26, + 0x57, 0x54, 0xef, 0x0, 0xe8, 0x97, 0x2, 0xce, 0xa1, 0xd7, 0x85, 0x16, + 0xb4, 0x29, 0x94, 0x58, 0xf2, 0x56, 0xc0, 0x76, 0xea, 0x23, 0xec, 0x7c, + 0x73, 0x51, 0x41, 0x40, 0x23, 0x21, 0x95, 0x4, 0x75, 0x12, 0xc9, 0xcc, + 0x16, 0xbd, 0xb6, 0x99, 0xad, 0xf8, 0x75, 0x35, 0xb6, 0x42, 0xae, 0xae, + 0xae, 0x86, 0x56, 0xf8, 0xcc, 0x16, 0x30, 0xb3, 0x45, 0xad, 0xd, 0x40, + 0xd6, 0xd1, 0xd6, 0x99, 0x40, 0xbe, 0xe2, 0xdc, 0x31, 0x7, 0xa6, 0xb9, + 0x27, 0x92, 0x38, 0x0, 0x3, 0x5e, 0x2c, 0xbe, 0xe6, 0xfb, 0x19, 0xbf, + 0xf3, 0x6d, 0xbc, 0x4d, 0x64, 0xe5, 0xba, 0x76, 0xde, 0x31, 0x65, 0x66, + 0x14, 0xa6, 0x3a, 0xc5, 0x8f, 0xb1, 0xb4, 0xba, 0x1f, 0xb1, 0xb8, 0xd4, + 0x75, 0xba, 0x18, 0x86, 0x95, 0x3c, 0x26, 0xf6, 0x25, 0x62, 0x53, 0xfd, + 0x9c, 0x94, 0x76, 0xf6, 0x95, 0x2c, 0xb1, 0xfd, 0xdc, 0xc0, 0xe4, 0x3f, + 0xb3, 0xff, 0x67, 0xde, 0xd5, 0x94, 0xcc, 0xb0, 0x83, 0x2f, 0x28, 0x93, + 0x92, 0x3, 0xa1, 0x41, 0x64, 0x60, 0x62, 0x70, 0x80, 0x87, 0xaf, 0xe7, + 0x60, 0x4a, 0x20, 0x23, 0xb3, 0x11, 0x7, 0x38, 0x38, 0xd4, 0xa, 0x66, + 0xb5, 0x93, 0x41, 0x90, 0x19, 0x17, 0x18, 0x60, 0xa5, 0xb, 0x7a, 0x24, + 0xaa, 0x20, 0x81, 0xac, 0xa9, 0xa1, 0x70, 0xa6, 0x12, 0x8a, 0x4a, 0xa3, + 0xa0, 0xf9, 0x9a, 0x97, 0xe7, 0xa8, 0xac, 0x8, 0xa8, 0xc4, 0x2a, 0x86, + 0xa7, 0x69, 0x1e, 0x67, 0xe6, 0xbe, 0xa4, 0xd3, 0xff, 0x91, 0x61, 0xf6, + 0x8a, 0xe6, 0xb5, 0xb3, 0x61, 0x9f, 0x19, 0x17, 0x98, 0x27, 0x6b, 0xe9, + 0x8, 0x98, 0xe1, 0x21, 0x4a, 0x9, 0xb5, 0xd7, 0xca, 0xfa, 0x94, 0xd0, + 0x69, 0x1a, 0xeb, 0x52, 0x1, 0x4e, 0xf5, 0xf6, 0xdf, 0x7f, 0xe7, 0x29, + 0x70, 0xee, 0x4, 0xda, 0x2f, 0xa4, 0xff, 0xfe, 0xbb, 0x6f, 0xa8, 0xff, + 0xfe, 0xdb, 0xaf, 0x8, 0xf6, 0x72, 0xa1, 0x40, 0x5d, 0xf0, 0x2d, 0x8, + 0x82, 0x5b, 0x87, 0xbd, 0x10, 0x8, 0xe9, 0x7, 0xee, 0x4b, 0x80, 0xda, + 0x4a, 0x4, 0xc5, 0x5e, 0xa0, 0xb7, 0x1e, 0x60, 0xb0, 0x59, 0x76, 0x60, + 0xb, 0x2e, 0x19, 0x8a, 0x2e, 0x1c, 0xe6, 0x6, 0x20, 0xb8, 0x64, 0x18, + 0x2a, 0xcf, 0x51, 0x94, 0xd4, 0xee, 0xc3, 0xfe, 0x39, 0x74, 0xd4, 0x2b, + 0x48, 0xc9, 0x83, 0x4c, 0x9b, 0xd0, 0x4c, 0x35, 0x10, 0xe3, 0x9, 0xf7, + 0x72, 0xf0, 0x7a, 0xe, 0xbf, 0x7d, 0x36, 0x2e, 0x19, 0x7e, 0x3f, 0xc, + 0xf7, 0x93, 0xe7, 0xf4, 0x1d, 0x32, 0xc6, 0xb0, 0x89, 0xad, 0xe0, 0x28, + 0xc1, 0xa7, 0x59, 0xe3, 0x0, +}; + +TEST(RoundtripTest, TestICCProfile) { + // JxlEncoderSetICCProfile parses the ICC profile, so a valid profile is + // needed. The profile should be passed correctly through the roundtrip. + jxl::BitReader reader(jxl::Span(kEncodedTestProfile, + sizeof(kEncodedTestProfile))); + jxl::PaddedBytes icc; + ASSERT_TRUE(ReadICC(&reader, &icc)); + ASSERT_TRUE(reader.Close()); + + JxlPixelFormat format = + JxlPixelFormat{3, JXL_TYPE_UINT8, JXL_NATIVE_ENDIAN, 0}; + + size_t xsize = 25; + size_t ysize = 37; + const std::vector original_bytes = + GetTestImage(xsize, ysize, format); + + JxlEncoder* enc = JxlEncoderCreate(nullptr); + EXPECT_NE(nullptr, enc); + + JxlBasicInfo basic_info; + jxl::test::JxlBasicInfoSetFromPixelFormat(&basic_info, &format); + basic_info.xsize = xsize; + basic_info.ysize = ysize; + basic_info.uses_original_profile = JXL_TRUE; + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderSetBasicInfo(enc, &basic_info)); + + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderSetICCProfile(enc, icc.data(), icc.size())); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc, nullptr); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddImageFrame(frame_settings, &format, + (void*)original_bytes.data(), + original_bytes.size())); + JxlEncoderCloseInput(enc); + + std::vector compressed; + EncodeWithEncoder(enc, &compressed); + JxlEncoderDestroy(enc); + + JxlDecoder* dec = JxlDecoderCreate(nullptr); + EXPECT_NE(nullptr, dec); + + const uint8_t* next_in = compressed.data(); + size_t avail_in = compressed.size(); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents(dec, JXL_DEC_BASIC_INFO | + JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE)); + + JxlDecoderSetInput(dec, next_in, avail_in); + EXPECT_EQ(JXL_DEC_BASIC_INFO, JxlDecoderProcessInput(dec)); + size_t buffer_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderImageOutBufferSize(dec, &format, &buffer_size)); + EXPECT_EQ(buffer_size, original_bytes.size()); + + JxlBasicInfo info; + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetBasicInfo(dec, &info)); + EXPECT_EQ(xsize, info.xsize); + EXPECT_EQ(ysize, info.ysize); + + EXPECT_EQ(JXL_DEC_COLOR_ENCODING, JxlDecoderProcessInput(dec)); + + size_t dec_icc_size; + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderGetICCProfileSize(dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + &dec_icc_size)); + EXPECT_EQ(icc.size(), dec_icc_size); + jxl::PaddedBytes dec_icc(dec_icc_size); + EXPECT_EQ(JXL_DEC_SUCCESS, JxlDecoderGetColorAsICCProfile( + dec, JXL_COLOR_PROFILE_TARGET_ORIGINAL, + dec_icc.data(), dec_icc.size())); + + std::vector decoded_bytes(buffer_size); + + EXPECT_EQ(JXL_DEC_NEED_IMAGE_OUT_BUFFER, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetImageOutBuffer(dec, &format, decoded_bytes.data(), + decoded_bytes.size())); + + EXPECT_EQ(JXL_DEC_FULL_IMAGE, JxlDecoderProcessInput(dec)); + + EXPECT_EQ(icc, dec_icc); + + JxlDecoderDestroy(dec); +} + +TEST(RoundtripTest, JXL_TRANSCODE_JPEG_TEST(TestJPEGReconstruction)) { + TEST_LIBJPEG_SUPPORT(); + const std::string jpeg_path = "jxl/flower/flower.png.im_q85_420.jpg"; + const jxl::PaddedBytes orig = jxl::test::ReadTestData(jpeg_path); + jxl::CodecInOut orig_io; + ASSERT_TRUE( + SetFromBytes(jxl::Span(orig), &orig_io, /*pool=*/nullptr)); + + JxlEncoderPtr enc = JxlEncoderMake(nullptr); + JxlEncoderFrameSettings* frame_settings = + JxlEncoderFrameSettingsCreate(enc.get(), NULL); + + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderUseContainer(enc.get(), JXL_TRUE)); + EXPECT_EQ(JXL_ENC_SUCCESS, JxlEncoderStoreJPEGMetadata(enc.get(), JXL_TRUE)); + EXPECT_EQ(JXL_ENC_SUCCESS, + JxlEncoderAddJPEGFrame(frame_settings, orig.data(), orig.size())); + JxlEncoderCloseInput(enc.get()); + + std::vector compressed; + EncodeWithEncoder(enc.get(), &compressed); + + JxlDecoderPtr dec = JxlDecoderMake(nullptr); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSubscribeEvents( + dec.get(), JXL_DEC_JPEG_RECONSTRUCTION | JXL_DEC_FULL_IMAGE)); + JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size()); + EXPECT_EQ(JXL_DEC_JPEG_RECONSTRUCTION, JxlDecoderProcessInput(dec.get())); + std::vector reconstructed_buffer(128); + EXPECT_EQ(JXL_DEC_SUCCESS, + JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data(), + reconstructed_buffer.size())); + size_t used = 0; + JxlDecoderStatus dec_process_result = JXL_DEC_JPEG_NEED_MORE_OUTPUT; + while (dec_process_result == JXL_DEC_JPEG_NEED_MORE_OUTPUT) { + used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get()); + reconstructed_buffer.resize(reconstructed_buffer.size() * 2); + EXPECT_EQ( + JXL_DEC_SUCCESS, + JxlDecoderSetJPEGBuffer(dec.get(), reconstructed_buffer.data() + used, + reconstructed_buffer.size() - used)); + dec_process_result = JxlDecoderProcessInput(dec.get()); + } + ASSERT_EQ(JXL_DEC_FULL_IMAGE, dec_process_result); + used = reconstructed_buffer.size() - JxlDecoderReleaseJPEGBuffer(dec.get()); + ASSERT_EQ(used, orig.size()); + EXPECT_EQ(0, memcmp(reconstructed_buffer.data(), orig.data(), used)); +} diff --git a/third-party/libjxl/libjxl/lib/jxl/sanitizers.h b/third-party/libjxl/libjxl/lib/jxl/sanitizers.h new file mode 100644 index 0000000000..ce0bd8dc63 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/sanitizers.h @@ -0,0 +1,242 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_SANITIZERS_H_ +#define LIB_JXL_SANITIZERS_H_ + +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/sanitizer_definitions.h" +#include "lib/jxl/image.h" + +#if JXL_MEMORY_SANITIZER +#include + +#include +#include +#include + +#include "lib/jxl/base/status.h" +#include "sanitizer/msan_interface.h" +#endif + +namespace jxl { +namespace msan { + +#if JXL_MEMORY_SANITIZER + +// Chosen so that kSanitizerSentinel is four copies of kSanitizerSentinelByte. +constexpr uint8_t kSanitizerSentinelByte = 0x48; +constexpr float kSanitizerSentinel = 205089.125f; + +static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const volatile void* m, + size_t size) { + __msan_poison(m, size); +} + +static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const volatile void* m, + size_t size) { + __msan_unpoison(m, size); +} + +static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonCStr(const char* c) { + do { + UnpoisonMemory(c, 1); + } while (*c++); +} + +static JXL_INLINE JXL_MAYBE_UNUSED void MemoryIsInitialized( + const volatile void* m, size_t size) { + __msan_check_mem_is_initialized(m, size); +} + +// Mark all the bytes of an image (including padding) as poisoned bytes. +static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) { + PoisonMemory(im.bytes(), im.bytes_per_row() * im.ysize()); +} + +template +static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Image3& im) { + PoisonImage(im.Plane(0)); + PoisonImage(im.Plane(1)); + PoisonImage(im.Plane(2)); +} + +// Print the uninitialized regions of an image. +template +static JXL_INLINE JXL_MAYBE_UNUSED void PrintImageUninitialized( + const Plane& im) { + fprintf(stderr, + "Uninitialized regions for image of size %" PRIu64 "x%" PRIu64 ":\n", + static_cast(im.xsize()), static_cast(im.ysize())); + + // A segment of uninitialized pixels in a row, in the format [first, second). + typedef std::pair PixelSegment; + + // Helper class to merge and print a list of rows of PixelSegment that may be + // the same over big ranges of rows. This compacts the output to ranges of + // rows like "[y0, y1): [x0, x1) [x2, x3)". + class RowsMerger { + public: + // Add a new row the list of rows. If the row is the same as the previous + // one it will be merged showing a range of rows [y0, y1), but if the new + // row is different the current range of rows (if any) will be printed and a + // new one will be started. + void AddRow(size_t y, std::vector&& new_row) { + if (start_y_ != -1 && new_row != segments_) { + PrintRow(y); + } + if (new_row.empty()) { + // Skip ranges with no uninitialized pixels. + start_y_ = -1; + segments_.clear(); + return; + } + if (start_y_ == -1) { + start_y_ = y; + segments_ = std::move(new_row); + } + } + + // Print the contents of the range of rows [start_y_, end_y) if any. + void PrintRow(size_t end_y) { + if (start_y_ == -1) return; + if (segments_.empty()) { + start_y_ = -1; + return; + } + if (end_y - start_y_ > 1) { + fprintf(stderr, " y=[%" PRId64 ", %" PRIu64 "):", + static_cast(start_y_), static_cast(end_y)); + } else { + fprintf(stderr, " y=[%" PRId64 "]:", static_cast(start_y_)); + } + for (const auto& seg : segments_) { + if (seg.first + 1 == seg.second) { + fprintf(stderr, " [%" PRId64 "]", static_cast(seg.first)); + } else { + fprintf(stderr, " [%" PRId64 ", %" PRIu64 ")", + static_cast(seg.first), + static_cast(seg.second)); + } + } + fprintf(stderr, "\n"); + start_y_ = -1; + } + + private: + std::vector segments_; + // Row number of the first row in the range of rows that have |segments| as + // the undefined segments. + ssize_t start_y_ = -1; + } rows_merger; + + class SegmentsMerger { + public: + void AddValue(size_t x) { + if (row.empty() || row.back().second != x) { + row.emplace_back(x, x + 1); + } else { + row.back().second = x + 1; + } + } + + std::vector row; + }; + + for (size_t y = 0; y < im.ysize(); y++) { + auto* row = im.Row(y); + SegmentsMerger seg_merger; + size_t x = 0; + while (x < im.xsize()) { + intptr_t ret = + __msan_test_shadow(row + x, (im.xsize() - x) * sizeof(row[0])); + if (ret < 0) break; + size_t next_x = x + ret / sizeof(row[0]); + seg_merger.AddValue(next_x); + x = next_x + 1; + } + rows_merger.AddRow(y, std::move(seg_merger.row)); + } + rows_merger.PrintRow(im.ysize()); +} + +// Check that all the pixels in the provided rect of the image are initialized +// (not poisoned). If any of the values is poisoned it will abort. +template +static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized( + const Plane& im, const Rect& r, size_t c, const char* message) { + JXL_ASSERT(r.x0() <= im.xsize()); + JXL_ASSERT(r.x0() + r.xsize() <= im.xsize()); + JXL_ASSERT(r.y0() <= im.ysize()); + JXL_ASSERT(r.y0() + r.ysize() <= im.ysize()); + for (size_t y = r.y0(); y < r.y0() + r.ysize(); y++) { + const auto* row = im.Row(y); + intptr_t ret = __msan_test_shadow(row + r.x0(), sizeof(*row) * r.xsize()); + if (ret != -1) { + JXL_DEBUG( + 1, + "Checking an image of %" PRIu64 " x %" PRIu64 ", rect x0=%" PRIu64 + ", y0=%" PRIu64 + ", " + "xsize=%" PRIu64 ", ysize=%" PRIu64, + static_cast(im.xsize()), static_cast(im.ysize()), + static_cast(r.x0()), static_cast(r.y0()), + static_cast(r.xsize()), static_cast(r.ysize())); + size_t x = ret / sizeof(*row); + JXL_DEBUG(1, + "CheckImageInitialized failed at x=%" PRIu64 ", y=%" PRIu64 + ", c=%" PRIu64 ": %s", + static_cast(r.x0() + x), static_cast(y), + static_cast(c), message ? message : ""); + PrintImageUninitialized(im); + } + // This will report an error if memory is not initialized. + __msan_check_mem_is_initialized(row + r.x0(), sizeof(*row) * r.xsize()); + } +} + +template +static JXL_INLINE JXL_MAYBE_UNUSED void CheckImageInitialized( + const Image3& im, const Rect& r, const char* message) { + for (size_t c = 0; c < 3; c++) { + std::string str_message(message); + str_message += " c=" + std::to_string(c); + CheckImageInitialized(im.Plane(c), r, c, str_message.c_str()); + } +} + +#define JXL_CHECK_IMAGE_INITIALIZED(im, r) \ + ::jxl::msan::CheckImageInitialized(im, r, "im=" #im ", r=" #r); + +#define JXL_CHECK_PLANE_INITIALIZED(im, r, c) \ + ::jxl::msan::CheckImageInitialized(im, r, c, "im=" #im ", r=" #r ", c=" #c); + +#else // JXL_MEMORY_SANITIZER + +// In non-msan mode these functions don't use volatile since it is not needed +// for the empty functions. + +static JXL_INLINE JXL_MAYBE_UNUSED void PoisonMemory(const void*, size_t) {} +static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonMemory(const void*, size_t) {} +static JXL_INLINE JXL_MAYBE_UNUSED void UnpoisonCStr(const char*) {} +static JXL_INLINE JXL_MAYBE_UNUSED void MemoryIsInitialized(const void*, + size_t) {} + +static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const PlaneBase& im) {} +template +static JXL_INLINE JXL_MAYBE_UNUSED void PoisonImage(const Plane& im) {} + +#define JXL_CHECK_IMAGE_INITIALIZED(im, r) +#define JXL_CHECK_PLANE_INITIALIZED(im, r, c) + +#endif + +} // namespace msan +} // namespace jxl + +#endif // LIB_JXL_SANITIZERS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/simd_util-inl.h b/third-party/libjxl/libjxl/lib/jxl/simd_util-inl.h new file mode 100644 index 0000000000..77b207ffe8 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/simd_util-inl.h @@ -0,0 +1,349 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Misc utilities for SIMD operations + +#if defined(LIB_JXL_SIMD_UTIL_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_SIMD_UTIL_INL_H_ +#undef LIB_JXL_SIMD_UTIL_INL_H_ +#else +#define LIB_JXL_SIMD_UTIL_INL_H_ +#endif + +#include + +#include "lib/jxl/base/compiler_specific.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +#if HWY_CAP_GE512 +using hwy::HWY_NAMESPACE::Half; +using hwy::HWY_NAMESPACE::Vec; +template +HWY_INLINE Vec>> Quarter(const DF df, V v) { + using HF = Half; + using HHF = Half; + auto half = i >= 2 ? UpperHalf(HF(), v) : LowerHalf(HF(), v); + return i & 1 ? UpperHalf(HHF(), half) : LowerHalf(HHF(), half); +} + +template +HWY_INLINE Vec Concat4(const DF df, V v0, V v1, V v2, V v3) { + using HF = Half; + return Combine(DF(), Combine(HF(), v3, v2), Combine(HF(), v1, v0)); +} + +#endif + +// Stores v0[0], v1[0], v0[1], v1[1], ... to mem, in this order. Mem must be +// aligned. +template +void StoreInterleaved(const DF df, V v0, V v1, T* mem) { + static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types"); +#if HWY_TARGET == HWY_SCALAR + Store(v0, df, mem); + Store(v1, df, mem + 1); +#elif !HWY_CAP_GE256 + Store(InterleaveLower(df, v0, v1), df, mem); + Store(InterleaveUpper(df, v0, v1), df, mem + Lanes(df)); +#else + if (!HWY_CAP_GE512 || Lanes(df) == 8) { + auto t0 = InterleaveLower(df, v0, v1); + auto t1 = InterleaveUpper(df, v0, v1); + Store(ConcatLowerLower(df, t1, t0), df, mem); + Store(ConcatUpperUpper(df, t1, t0), df, mem + Lanes(df)); + } else { +#if HWY_CAP_GE512 + auto t0 = InterleaveLower(df, v0, v1); + auto t1 = InterleaveUpper(df, v0, v1); + Store(Concat4(df, Quarter<0>(df, t0), Quarter<0>(df, t1), + Quarter<1>(df, t0), Quarter<1>(df, t1)), + df, mem); + Store(Concat4(df, Quarter<2>(df, t0), Quarter<2>(df, t1), + Quarter<3>(df, t0), Quarter<3>(df, t1)), + df, mem + Lanes(df)); +#endif + } +#endif +} + +// Stores v0[0], v1[0], v2[0], v3[0], v0[1] ... to mem, in this order. Mem must +// be aligned. +template +void StoreInterleaved(const DF df, V v0, V v1, V v2, V v3, T* mem) { + static_assert(sizeof(T) == 4, "only use StoreInterleaved for 4-byte types"); +#if HWY_TARGET == HWY_SCALAR + Store(v0, df, mem); + Store(v1, df, mem + 1); + Store(v2, df, mem + 2); + Store(v3, df, mem + 3); +#elif !HWY_CAP_GE256 + auto t0 = InterleaveLower(df, v0, v2); + auto t1 = InterleaveLower(df, v1, v3); + auto t2 = InterleaveUpper(df, v0, v2); + auto t3 = InterleaveUpper(df, v1, v3); + Store(InterleaveLower(df, t0, t1), df, mem); + Store(InterleaveUpper(df, t0, t1), df, mem + Lanes(df)); + Store(InterleaveLower(df, t2, t3), df, mem + 2 * Lanes(df)); + Store(InterleaveUpper(df, t2, t3), df, mem + 3 * Lanes(df)); +#elif !HWY_CAP_GE512 + auto t0 = InterleaveLower(df, v0, v2); + auto t1 = InterleaveLower(df, v1, v3); + auto t2 = InterleaveUpper(df, v0, v2); + auto t3 = InterleaveUpper(df, v1, v3); + + auto m0 = InterleaveLower(df, t0, t1); + auto m1 = InterleaveUpper(df, t0, t1); + auto m2 = InterleaveLower(df, t2, t3); + auto m3 = InterleaveUpper(df, t2, t3); + + Store(ConcatLowerLower(df, m1, m0), df, mem); + Store(ConcatLowerLower(df, m3, m2), df, mem + Lanes(df)); + Store(ConcatUpperUpper(df, m1, m0), df, mem + 2 * Lanes(df)); + Store(ConcatUpperUpper(df, m3, m2), df, mem + 3 * Lanes(df)); +#else + auto t0 = InterleaveLower(df, v0, v2); + auto t1 = InterleaveLower(df, v1, v3); + auto t2 = InterleaveUpper(df, v0, v2); + auto t3 = InterleaveUpper(df, v1, v3); + + auto m0 = InterleaveLower(df, t0, t1); + auto m1 = InterleaveUpper(df, t0, t1); + auto m2 = InterleaveLower(df, t2, t3); + auto m3 = InterleaveUpper(df, t2, t3); + + Store(Concat4(df, Quarter<0>(df, m0), Quarter<0>(df, m1), Quarter<0>(df, m2), + Quarter<0>(df, m3)), + df, mem); + Store(Concat4(df, Quarter<1>(df, m0), Quarter<1>(df, m1), Quarter<1>(df, m2), + Quarter<1>(df, m3)), + df, mem + Lanes(df)); + Store(Concat4(df, Quarter<2>(df, m0), Quarter<2>(df, m1), Quarter<2>(df, m2), + Quarter<2>(df, m3)), + df, mem + 2 * Lanes(df)); + Store(Concat4(df, Quarter<3>(df, m0), Quarter<3>(df, m1), Quarter<3>(df, m2), + Quarter<3>(df, m3)), + df, mem + 3 * Lanes(df)); +#endif +} + +// Stores v0[0], v1[0], v2[0], v3[0], v4[0], v5[0], v6[0], v7[0], v0[1] ... to +// mem, in this order. Mem must be aligned. +template +void StoreInterleaved(const DF df, V v0, V v1, V v2, V v3, V v4, V v5, V v6, + V v7, float* mem) { +#if HWY_TARGET == HWY_SCALAR + Store(v0, df, mem); + Store(v1, df, mem + 1); + Store(v2, df, mem + 2); + Store(v3, df, mem + 3); + Store(v4, df, mem + 4); + Store(v5, df, mem + 5); + Store(v6, df, mem + 6); + Store(v7, df, mem + 7); +#elif !HWY_CAP_GE256 + auto t0 = InterleaveLower(df, v0, v4); + auto t1 = InterleaveLower(df, v1, v5); + auto t2 = InterleaveLower(df, v2, v6); + auto t3 = InterleaveLower(df, v3, v7); + auto t4 = InterleaveUpper(df, v0, v4); + auto t5 = InterleaveUpper(df, v1, v5); + auto t6 = InterleaveUpper(df, v2, v6); + auto t7 = InterleaveUpper(df, v3, v7); + + auto w0 = InterleaveLower(df, t0, t2); + auto w1 = InterleaveLower(df, t1, t3); + auto w2 = InterleaveUpper(df, t0, t2); + auto w3 = InterleaveUpper(df, t1, t3); + auto w4 = InterleaveLower(df, t4, t6); + auto w5 = InterleaveLower(df, t5, t7); + auto w6 = InterleaveUpper(df, t4, t6); + auto w7 = InterleaveUpper(df, t5, t7); + + Store(InterleaveLower(df, w0, w1), df, mem); + Store(InterleaveUpper(df, w0, w1), df, mem + Lanes(df)); + Store(InterleaveLower(df, w2, w3), df, mem + 2 * Lanes(df)); + Store(InterleaveUpper(df, w2, w3), df, mem + 3 * Lanes(df)); + Store(InterleaveLower(df, w4, w5), df, mem + 4 * Lanes(df)); + Store(InterleaveUpper(df, w4, w5), df, mem + 5 * Lanes(df)); + Store(InterleaveLower(df, w6, w7), df, mem + 6 * Lanes(df)); + Store(InterleaveUpper(df, w6, w7), df, mem + 7 * Lanes(df)); +#elif !HWY_CAP_GE512 + auto t0 = InterleaveLower(df, v0, v4); + auto t1 = InterleaveLower(df, v1, v5); + auto t2 = InterleaveLower(df, v2, v6); + auto t3 = InterleaveLower(df, v3, v7); + auto t4 = InterleaveUpper(df, v0, v4); + auto t5 = InterleaveUpper(df, v1, v5); + auto t6 = InterleaveUpper(df, v2, v6); + auto t7 = InterleaveUpper(df, v3, v7); + + auto w0 = InterleaveLower(df, t0, t2); + auto w1 = InterleaveLower(df, t1, t3); + auto w2 = InterleaveUpper(df, t0, t2); + auto w3 = InterleaveUpper(df, t1, t3); + auto w4 = InterleaveLower(df, t4, t6); + auto w5 = InterleaveLower(df, t5, t7); + auto w6 = InterleaveUpper(df, t4, t6); + auto w7 = InterleaveUpper(df, t5, t7); + + auto m0 = InterleaveLower(df, w0, w1); + auto m1 = InterleaveUpper(df, w0, w1); + auto m2 = InterleaveLower(df, w2, w3); + auto m3 = InterleaveUpper(df, w2, w3); + auto m4 = InterleaveLower(df, w4, w5); + auto m5 = InterleaveUpper(df, w4, w5); + auto m6 = InterleaveLower(df, w6, w7); + auto m7 = InterleaveUpper(df, w6, w7); + + Store(ConcatLowerLower(df, m1, m0), df, mem); + Store(ConcatLowerLower(df, m3, m2), df, mem + Lanes(df)); + Store(ConcatLowerLower(df, m5, m4), df, mem + 2 * Lanes(df)); + Store(ConcatLowerLower(df, m7, m6), df, mem + 3 * Lanes(df)); + Store(ConcatUpperUpper(df, m1, m0), df, mem + 4 * Lanes(df)); + Store(ConcatUpperUpper(df, m3, m2), df, mem + 5 * Lanes(df)); + Store(ConcatUpperUpper(df, m5, m4), df, mem + 6 * Lanes(df)); + Store(ConcatUpperUpper(df, m7, m6), df, mem + 7 * Lanes(df)); +#else + auto t0 = InterleaveLower(df, v0, v4); + auto t1 = InterleaveLower(df, v1, v5); + auto t2 = InterleaveLower(df, v2, v6); + auto t3 = InterleaveLower(df, v3, v7); + auto t4 = InterleaveUpper(df, v0, v4); + auto t5 = InterleaveUpper(df, v1, v5); + auto t6 = InterleaveUpper(df, v2, v6); + auto t7 = InterleaveUpper(df, v3, v7); + + auto w0 = InterleaveLower(df, t0, t2); + auto w1 = InterleaveLower(df, t1, t3); + auto w2 = InterleaveUpper(df, t0, t2); + auto w3 = InterleaveUpper(df, t1, t3); + auto w4 = InterleaveLower(df, t4, t6); + auto w5 = InterleaveLower(df, t5, t7); + auto w6 = InterleaveUpper(df, t4, t6); + auto w7 = InterleaveUpper(df, t5, t7); + + auto m0 = InterleaveLower(df, w0, w1); + auto m1 = InterleaveUpper(df, w0, w1); + auto m2 = InterleaveLower(df, w2, w3); + auto m3 = InterleaveUpper(df, w2, w3); + auto m4 = InterleaveLower(df, w4, w5); + auto m5 = InterleaveUpper(df, w4, w5); + auto m6 = InterleaveLower(df, w6, w7); + auto m7 = InterleaveUpper(df, w6, w7); + + Store(Concat4(df, Quarter<0>(df, m0), Quarter<0>(df, m1), Quarter<0>(df, m2), + Quarter<0>(df, m3)), + df, mem); + Store(Concat4(df, Quarter<0>(df, m4), Quarter<0>(df, m5), Quarter<0>(df, m6), + Quarter<0>(df, m7)), + df, mem + Lanes(df)); + Store(Concat4(df, Quarter<1>(df, m0), Quarter<1>(df, m1), Quarter<1>(df, m2), + Quarter<1>(df, m3)), + df, mem + 2 * Lanes(df)); + Store(Concat4(df, Quarter<1>(df, m4), Quarter<1>(df, m5), Quarter<1>(df, m6), + Quarter<1>(df, m7)), + df, mem + 3 * Lanes(df)); + Store(Concat4(df, Quarter<2>(df, m0), Quarter<2>(df, m1), Quarter<2>(df, m2), + Quarter<2>(df, m3)), + df, mem + 4 * Lanes(df)); + Store(Concat4(df, Quarter<2>(df, m4), Quarter<2>(df, m5), Quarter<2>(df, m6), + Quarter<2>(df, m7)), + df, mem + 5 * Lanes(df)); + Store(Concat4(df, Quarter<3>(df, m0), Quarter<3>(df, m1), Quarter<3>(df, m2), + Quarter<3>(df, m3)), + df, mem + 6 * Lanes(df)); + Store(Concat4(df, Quarter<3>(df, m4), Quarter<3>(df, m5), Quarter<3>(df, m6), + Quarter<3>(df, m7)), + df, mem + 7 * Lanes(df)); +#endif +} + +#if HWY_CAP_GE256 +JXL_INLINE void Transpose8x8Block(const int32_t* JXL_RESTRICT from, + int32_t* JXL_RESTRICT to, size_t fromstride) { + const HWY_CAPPED(int32_t, 8) d; + auto i0 = Load(d, from); + auto i1 = Load(d, from + 1 * fromstride); + auto i2 = Load(d, from + 2 * fromstride); + auto i3 = Load(d, from + 3 * fromstride); + auto i4 = Load(d, from + 4 * fromstride); + auto i5 = Load(d, from + 5 * fromstride); + auto i6 = Load(d, from + 6 * fromstride); + auto i7 = Load(d, from + 7 * fromstride); + + const auto q0 = InterleaveLower(d, i0, i2); + const auto q1 = InterleaveLower(d, i1, i3); + const auto q2 = InterleaveUpper(d, i0, i2); + const auto q3 = InterleaveUpper(d, i1, i3); + const auto q4 = InterleaveLower(d, i4, i6); + const auto q5 = InterleaveLower(d, i5, i7); + const auto q6 = InterleaveUpper(d, i4, i6); + const auto q7 = InterleaveUpper(d, i5, i7); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + const auto r4 = InterleaveLower(d, q4, q5); + const auto r5 = InterleaveUpper(d, q4, q5); + const auto r6 = InterleaveLower(d, q6, q7); + const auto r7 = InterleaveUpper(d, q6, q7); + + i0 = ConcatLowerLower(d, r4, r0); + i1 = ConcatLowerLower(d, r5, r1); + i2 = ConcatLowerLower(d, r6, r2); + i3 = ConcatLowerLower(d, r7, r3); + i4 = ConcatUpperUpper(d, r4, r0); + i5 = ConcatUpperUpper(d, r5, r1); + i6 = ConcatUpperUpper(d, r6, r2); + i7 = ConcatUpperUpper(d, r7, r3); + + Store(i0, d, to); + Store(i1, d, to + 1 * 8); + Store(i2, d, to + 2 * 8); + Store(i3, d, to + 3 * 8); + Store(i4, d, to + 4 * 8); + Store(i5, d, to + 5 * 8); + Store(i6, d, to + 6 * 8); + Store(i7, d, to + 7 * 8); +} +#elif HWY_TARGET != HWY_SCALAR +JXL_INLINE void Transpose8x8Block(const int32_t* JXL_RESTRICT from, + int32_t* JXL_RESTRICT to, size_t fromstride) { + const HWY_CAPPED(int32_t, 4) d; + for (size_t n = 0; n < 8; n += 4) { + for (size_t m = 0; m < 8; m += 4) { + auto p0 = Load(d, from + n * fromstride + m); + auto p1 = Load(d, from + (n + 1) * fromstride + m); + auto p2 = Load(d, from + (n + 2) * fromstride + m); + auto p3 = Load(d, from + (n + 3) * fromstride + m); + const auto q0 = InterleaveLower(d, p0, p2); + const auto q1 = InterleaveLower(d, p1, p3); + const auto q2 = InterleaveUpper(d, p0, p2); + const auto q3 = InterleaveUpper(d, p1, p3); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + Store(r0, d, to + m * 8 + n); + Store(r1, d, to + (1 + m) * 8 + n); + Store(r2, d, to + (2 + m) * 8 + n); + Store(r3, d, to + (3 + m) * 8 + n); + } + } +} + +#endif + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_SIMD_UTIL_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/simd_util_test.cc b/third-party/libjxl/libjxl/lib/jxl/simd_util_test.cc new file mode 100644 index 0000000000..b81f5d1279 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/simd_util_test.cc @@ -0,0 +1,84 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/simd_util_test.cc" +#include + +#include "lib/jxl/simd_util-inl.h" + +// Test utils +#include +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +HWY_NOINLINE void TestInterleave2() { + HWY_FULL(float) d; + auto vec1 = Iota(d, 0 * 128.0); + auto vec2 = Iota(d, 1 * 128.0); + HWY_ALIGN float mem[MaxLanes(d) * 2]; + StoreInterleaved(d, vec1, vec2, mem); + for (size_t i = 0; i < Lanes(d); i++) { + for (size_t j = 0; j < 2; j++) { + EXPECT_EQ(mem[2 * i + j], j * 128 + i) << "i: " << i << " j: " << j; + } + } +} +HWY_NOINLINE void TestInterleave4() { + HWY_FULL(float) d; + auto vec1 = Iota(d, 0 * 128.0); + auto vec2 = Iota(d, 1 * 128.0); + auto vec3 = Iota(d, 2 * 128.0); + auto vec4 = Iota(d, 3 * 128.0); + HWY_ALIGN float mem[MaxLanes(d) * 4]; + StoreInterleaved(d, vec1, vec2, vec3, vec4, mem); + for (size_t i = 0; i < Lanes(d); i++) { + for (size_t j = 0; j < 4; j++) { + EXPECT_EQ(mem[4 * i + j], j * 128 + i) << "i: " << i << " j: " << j; + } + } +} +HWY_NOINLINE void TestInterleave8() { + HWY_FULL(float) d; + auto vec1 = Iota(d, 0 * 128.0); + auto vec2 = Iota(d, 1 * 128.0); + auto vec3 = Iota(d, 2 * 128.0); + auto vec4 = Iota(d, 3 * 128.0); + auto vec5 = Iota(d, 4 * 128.0); + auto vec6 = Iota(d, 5 * 128.0); + auto vec7 = Iota(d, 6 * 128.0); + auto vec8 = Iota(d, 7 * 128.0); + HWY_ALIGN float mem[MaxLanes(d) * 8]; + StoreInterleaved(d, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, mem); + for (size_t i = 0; i < Lanes(d); i++) { + for (size_t j = 0; j < 8; j++) { + EXPECT_EQ(mem[8 * i + j], j * 128 + i) << "i: " << i << " j: " << j; + } + } +} + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class SimdUtilTargetTest : public hwy::TestWithParamTarget {}; +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(SimdUtilTargetTest); + +HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave2); +HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave4); +HWY_EXPORT_AND_TEST_P(SimdUtilTargetTest, TestInterleave8); + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/speed_tier_test.cc b/third-party/libjxl/libjxl/lib/jxl/speed_tier_test.cc new file mode 100644 index 0000000000..61d580d2cb --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/speed_tier_test.cc @@ -0,0 +1,109 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include "lib/extras/codec.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_file.h" +#include "lib/jxl/enc_params.h" +#include "lib/jxl/image.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +struct SpeedTierTestParams { + explicit SpeedTierTestParams(const SpeedTier speed_tier, + const bool shrink8 = false) + : speed_tier(speed_tier), shrink8(shrink8) {} + SpeedTier speed_tier; + bool shrink8; +}; + +std::ostream& operator<<(std::ostream& os, SpeedTierTestParams params) { + auto previous_flags = os.flags(); + os << std::boolalpha; + os << "SpeedTierTestParams{" << static_cast(params.speed_tier) + << ", /*shrink8=*/" << params.shrink8 << "}"; + os.flags(previous_flags); + return os; +} + +class SpeedTierTest : public testing::TestWithParam {}; + +JXL_GTEST_INSTANTIATE_TEST_SUITE_P( + SpeedTierTestInstantiation, SpeedTierTest, + testing::Values(SpeedTierTestParams{SpeedTier::kCheetah, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kCheetah, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kThunder, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kThunder, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kLightning, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kLightning, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kFalcon, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kFalcon, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kHare, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kHare, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kWombat, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kWombat, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kSquirrel, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kSquirrel, + /*shrink8=*/false}, + SpeedTierTestParams{SpeedTier::kKitten, + /*shrink8=*/true}, + SpeedTierTestParams{SpeedTier::kKitten, + /*shrink8=*/false}, + // Only downscaled image for Tortoise mode. + SpeedTierTestParams{SpeedTier::kTortoise, + /*shrink8=*/true})); + +TEST_P(SpeedTierTest, Roundtrip) { + const PaddedBytes orig = jxl::test::ReadTestData( + "external/wesaturate/500px/u76c0g_bliznaca_srgb8.png"); + CodecInOut io; + test::ThreadPoolForTests pool(8); + ASSERT_TRUE(SetFromBytes(Span(orig), &io, &pool)); + + const SpeedTierTestParams& params = GetParam(); + + if (params.shrink8) { + io.ShrinkTo(io.xsize() / 8, io.ysize() / 8); + } + + CompressParams cparams; + cparams.speed_tier = params.speed_tier; + cparams.SetCms(GetJxlCms()); + + CodecInOut io2; + JXL_EXPECT_OK(test::Roundtrip(&io, cparams, {}, &io2, _)); + + // Can be 2.2 in non-hare mode. + EXPECT_LE(ButteraugliDistance(io.frames, io2.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr, /*pool=*/nullptr), + 2.8); +} +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/splines.cc b/third-party/libjxl/libjxl/lib/jxl/splines.cc new file mode 100644 index 0000000000..15fd6ce5b5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/splines.cc @@ -0,0 +1,711 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/splines.h" + +#include +#include +#include + +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/common.h" +#include "lib/jxl/dct_scales.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/splines.cc" +#include +#include + +#include "lib/jxl/fast_math-inl.h" +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Mul; +using hwy::HWY_NAMESPACE::MulAdd; +using hwy::HWY_NAMESPACE::MulSub; +using hwy::HWY_NAMESPACE::Sqrt; +using hwy::HWY_NAMESPACE::Sub; + +// Given a set of DCT coefficients, this returns the result of performing cosine +// interpolation on the original samples. +float ContinuousIDCT(const float dct[32], const float t) { + // We compute here the DCT-3 of the `dct` vector, rescaled by a factor of + // sqrt(32). This is such that an input vector vector {x, 0, ..., 0} produces + // a constant result of x. dct[0] was scaled in Dequantize() to allow uniform + // treatment of all the coefficients. + constexpr float kMultipliers[32] = { + kPi / 32 * 0, kPi / 32 * 1, kPi / 32 * 2, kPi / 32 * 3, kPi / 32 * 4, + kPi / 32 * 5, kPi / 32 * 6, kPi / 32 * 7, kPi / 32 * 8, kPi / 32 * 9, + kPi / 32 * 10, kPi / 32 * 11, kPi / 32 * 12, kPi / 32 * 13, kPi / 32 * 14, + kPi / 32 * 15, kPi / 32 * 16, kPi / 32 * 17, kPi / 32 * 18, kPi / 32 * 19, + kPi / 32 * 20, kPi / 32 * 21, kPi / 32 * 22, kPi / 32 * 23, kPi / 32 * 24, + kPi / 32 * 25, kPi / 32 * 26, kPi / 32 * 27, kPi / 32 * 28, kPi / 32 * 29, + kPi / 32 * 30, kPi / 32 * 31, + }; + HWY_CAPPED(float, 32) df; + auto result = Zero(df); + const auto tandhalf = Set(df, t + 0.5f); + for (int i = 0; i < 32; i += Lanes(df)) { + auto cos_arg = Mul(LoadU(df, kMultipliers + i), tandhalf); + auto cos = FastCosf(df, cos_arg); + auto local_res = Mul(LoadU(df, dct + i), cos); + result = MulAdd(Set(df, kSqrt2), local_res, result); + } + return GetLane(SumOfLanes(df, result)); +} + +template +void DrawSegment(DF df, const SplineSegment& segment, const bool add, + const size_t y, const size_t x, float* JXL_RESTRICT rows[3]) { + Rebind di; + const auto inv_sigma = Set(df, segment.inv_sigma); + const auto half = Set(df, 0.5f); + const auto one_over_2s2 = Set(df, 0.353553391f); + const auto sigma_over_4_times_intensity = + Set(df, segment.sigma_over_4_times_intensity); + const auto dx = Sub(ConvertTo(df, Iota(di, x)), Set(df, segment.center_x)); + const auto dy = Set(df, y - segment.center_y); + const auto sqd = MulAdd(dx, dx, Mul(dy, dy)); + const auto distance = Sqrt(sqd); + const auto one_dimensional_factor = + Sub(FastErff(df, Mul(MulAdd(distance, half, one_over_2s2), inv_sigma)), + FastErff(df, Mul(MulSub(distance, half, one_over_2s2), inv_sigma))); + auto local_intensity = + Mul(sigma_over_4_times_intensity, + Mul(one_dimensional_factor, one_dimensional_factor)); + for (size_t c = 0; c < 3; ++c) { + const auto cm = Set(df, add ? segment.color[c] : -segment.color[c]); + const auto in = LoadU(df, rows[c] + x); + StoreU(MulAdd(cm, local_intensity, in), df, rows[c] + x); + } +} + +void DrawSegment(const SplineSegment& segment, const bool add, const size_t y, + const ssize_t x0, ssize_t x1, float* JXL_RESTRICT rows[3]) { + ssize_t x = + std::max(x0, segment.center_x - segment.maximum_distance + 0.5f); + // one-past-the-end + x1 = + std::min(x1, segment.center_x + segment.maximum_distance + 1.5f); + HWY_FULL(float) df; + for (; x + static_cast(Lanes(df)) <= x1; x += Lanes(df)) { + DrawSegment(df, segment, add, y, x, rows); + } + for (; x < x1; ++x) { + DrawSegment(HWY_CAPPED(float, 1)(), segment, add, y, x, rows); + } +} + +void ComputeSegments(const Spline::Point& center, const float intensity, + const float color[3], const float sigma, + std::vector& segments, + std::vector>& segments_by_y) { + // Sanity check sigma, inverse sigma and intensity + if (!(std::isfinite(sigma) && sigma != 0.0f && std::isfinite(1.0f / sigma) && + std::isfinite(intensity))) { + return; + } +#if JXL_HIGH_PRECISION + constexpr float kDistanceExp = 5; +#else + // About 30% faster. + constexpr float kDistanceExp = 3; +#endif + // We cap from below colors to at least 0.01. + float max_color = 0.01f; + for (size_t c = 0; c < 3; c++) { + max_color = std::max(max_color, std::abs(color[c] * intensity)); + } + // Distance beyond which max_color*intensity*exp(-d^2 / (2 * sigma^2)) drops + // below 10^-kDistanceExp. + const float maximum_distance = + std::sqrt(-2 * sigma * sigma * + (std::log(0.1) * kDistanceExp - std::log(max_color))); + SplineSegment segment; + segment.center_y = center.y; + segment.center_x = center.x; + memcpy(segment.color, color, sizeof(segment.color)); + segment.inv_sigma = 1.0f / sigma; + segment.sigma_over_4_times_intensity = .25f * sigma * intensity; + segment.maximum_distance = maximum_distance; + ssize_t y0 = center.y - maximum_distance + .5f; + ssize_t y1 = center.y + maximum_distance + 1.5f; // one-past-the-end + for (ssize_t y = std::max(y0, 0); y < y1; y++) { + segments_by_y.emplace_back(y, segments.size()); + } + segments.push_back(segment); +} + +void DrawSegments(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, + float* JXL_RESTRICT row_b, const Rect& image_rect, + const bool add, const SplineSegment* segments, + const size_t* segment_indices, + const size_t* segment_y_start) { + JXL_ASSERT(image_rect.ysize() == 1); + float* JXL_RESTRICT rows[3] = {row_x - image_rect.x0(), + row_y - image_rect.x0(), + row_b - image_rect.x0()}; + size_t y = image_rect.y0(); + for (size_t i = segment_y_start[y]; i < segment_y_start[y + 1]; i++) { + DrawSegment(segments[segment_indices[i]], add, y, image_rect.x0(), + image_rect.x0() + image_rect.xsize(), rows); + } +} + +void SegmentsFromPoints( + const Spline& spline, + const std::vector>& points_to_draw, + const float arc_length, std::vector& segments, + std::vector>& segments_by_y) { + const float inv_arc_length = 1.0f / arc_length; + int k = 0; + for (const auto& point_to_draw : points_to_draw) { + const Spline::Point& point = point_to_draw.first; + const float multiplier = point_to_draw.second; + const float progress_along_arc = + std::min(1.f, (k * kDesiredRenderingDistance) * inv_arc_length); + ++k; + float color[3]; + for (size_t c = 0; c < 3; ++c) { + color[c] = + ContinuousIDCT(spline.color_dct[c], (32 - 1) * progress_along_arc); + } + const float sigma = + ContinuousIDCT(spline.sigma_dct, (32 - 1) * progress_along_arc); + ComputeSegments(point, multiplier, color, sigma, segments, segments_by_y); + } +} +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +HWY_EXPORT(SegmentsFromPoints); +HWY_EXPORT(DrawSegments); + +namespace { + +// It is not in spec, but reasonable limit to avoid overflows. +template +Status ValidateSplinePointPos(const T& x, const T& y) { + constexpr T kSplinePosLimit = 1u << 23; + if ((x >= kSplinePosLimit) || (x <= -kSplinePosLimit) || + (y >= kSplinePosLimit) || (y <= -kSplinePosLimit)) { + return JXL_FAILURE("Spline coordinates out of bounds"); + } + return true; +} + +// Maximum number of spline control points per frame is +// std::min(kMaxNumControlPoints, xsize * ysize / 2) +constexpr size_t kMaxNumControlPoints = 1u << 20u; +constexpr size_t kMaxNumControlPointsPerPixelRatio = 2; + +float AdjustedQuant(const int32_t adjustment) { + return (adjustment >= 0) ? (1.f + .125f * adjustment) + : 1.f / (1.f - .125f * adjustment); +} + +float InvAdjustedQuant(const int32_t adjustment) { + return (adjustment >= 0) ? 1.f / (1.f + .125f * adjustment) + : (1.f - .125f * adjustment); +} + +// X, Y, B, sigma. +static constexpr float kChannelWeight[] = {0.0042f, 0.075f, 0.07f, .3333f}; + +Status DecodeAllStartingPoints(std::vector* const points, + BitReader* const br, ANSSymbolReader* reader, + const std::vector& context_map, + const size_t num_splines) { + points->clear(); + points->reserve(num_splines); + int64_t last_x = 0; + int64_t last_y = 0; + for (size_t i = 0; i < num_splines; i++) { + int64_t x = + reader->ReadHybridUint(kStartingPositionContext, br, context_map); + int64_t y = + reader->ReadHybridUint(kStartingPositionContext, br, context_map); + if (i != 0) { + x = UnpackSigned(x) + last_x; + y = UnpackSigned(y) + last_y; + } + JXL_RETURN_IF_ERROR(ValidateSplinePointPos(x, y)); + points->emplace_back(static_cast(x), static_cast(y)); + last_x = x; + last_y = y; + } + return true; +} + +struct Vector { + float x, y; + Vector operator-() const { return {-x, -y}; } + Vector operator+(const Vector& other) const { + return {x + other.x, y + other.y}; + } + float SquaredNorm() const { return x * x + y * y; } +}; +Vector operator*(const float k, const Vector& vec) { + return {k * vec.x, k * vec.y}; +} + +Spline::Point operator+(const Spline::Point& p, const Vector& vec) { + return {p.x + vec.x, p.y + vec.y}; +} +Vector operator-(const Spline::Point& a, const Spline::Point& b) { + return {a.x - b.x, a.y - b.y}; +} + +// TODO(eustas): avoid making a copy of "points". +void DrawCentripetalCatmullRomSpline(std::vector points, + std::vector& result) { + if (points.empty()) return; + if (points.size() == 1) { + result.push_back(points[0]); + return; + } + // Number of points to compute between each control point. + static constexpr int kNumPoints = 16; + result.reserve((points.size() - 1) * kNumPoints + 1); + points.insert(points.begin(), points[0] + (points[0] - points[1])); + points.push_back(points[points.size() - 1] + + (points[points.size() - 1] - points[points.size() - 2])); + // points has at least 4 elements at this point. + for (size_t start = 0; start < points.size() - 3; ++start) { + // 4 of them are used, and we draw from p[1] to p[2]. + const Spline::Point* const p = &points[start]; + result.push_back(p[1]); + float d[3]; + float t[4]; + t[0] = 0; + for (int k = 0; k < 3; ++k) { + // TODO(eustas): for each segment delta is calculated 3 times... + // TODO(eustas): restrict d[k] with reasonable limit and spec it. + d[k] = std::sqrt(hypotf(p[k + 1].x - p[k].x, p[k + 1].y - p[k].y)); + t[k + 1] = t[k] + d[k]; + } + for (int i = 1; i < kNumPoints; ++i) { + const float tt = d[0] + (static_cast(i) / kNumPoints) * d[1]; + Spline::Point a[3]; + for (int k = 0; k < 3; ++k) { + // TODO(eustas): reciprocal multiplication would be faster. + a[k] = p[k] + ((tt - t[k]) / d[k]) * (p[k + 1] - p[k]); + } + Spline::Point b[2]; + for (int k = 0; k < 2; ++k) { + b[k] = a[k] + ((tt - t[k]) / (d[k] + d[k + 1])) * (a[k + 1] - a[k]); + } + result.push_back(b[0] + ((tt - t[1]) / d[1]) * (b[1] - b[0])); + } + } + result.push_back(points[points.size() - 2]); +} + +// Move along the line segments defined by `points`, `kDesiredRenderingDistance` +// pixels at a time, and call `functor` with each point and the actual distance +// to the previous point (which will always be kDesiredRenderingDistance except +// possibly for the very last point). +// TODO(eustas): this method always adds the last point, but never the first +// (unless those are one); I believe both ends matter. +template +void ForEachEquallySpacedPoint(const Points& points, const Functor& functor) { + JXL_ASSERT(!points.empty()); + Spline::Point current = points.front(); + functor(current, kDesiredRenderingDistance); + auto next = points.begin(); + while (next != points.end()) { + const Spline::Point* previous = ¤t; + float arclength_from_previous = 0.f; + for (;;) { + if (next == points.end()) { + functor(*previous, arclength_from_previous); + return; + } + const float arclength_to_next = + std::sqrt((*next - *previous).SquaredNorm()); + if (arclength_from_previous + arclength_to_next >= + kDesiredRenderingDistance) { + current = + *previous + ((kDesiredRenderingDistance - arclength_from_previous) / + arclength_to_next) * + (*next - *previous); + functor(current, kDesiredRenderingDistance); + break; + } + arclength_from_previous += arclength_to_next; + previous = &*next; + ++next; + } + } +} + +} // namespace + +QuantizedSpline::QuantizedSpline(const Spline& original, + const int32_t quantization_adjustment, + const float y_to_x, const float y_to_b) { + JXL_ASSERT(!original.control_points.empty()); + control_points_.reserve(original.control_points.size() - 1); + const Spline::Point& starting_point = original.control_points.front(); + int previous_x = static_cast(std::roundf(starting_point.x)); + int previous_y = static_cast(std::roundf(starting_point.y)); + int previous_delta_x = 0, previous_delta_y = 0; + for (auto it = original.control_points.begin() + 1; + it != original.control_points.end(); ++it) { + const int new_x = static_cast(std::roundf(it->x)); + const int new_y = static_cast(std::roundf(it->y)); + const int new_delta_x = new_x - previous_x; + const int new_delta_y = new_y - previous_y; + control_points_.emplace_back(new_delta_x - previous_delta_x, + new_delta_y - previous_delta_y); + previous_delta_x = new_delta_x; + previous_delta_y = new_delta_y; + previous_x = new_x; + previous_y = new_y; + } + + const auto to_int = [](float v) -> int { + // Maximal int representable with float. + constexpr float kMax = std::numeric_limits::max() - 127; + constexpr float kMin = -kMax; + return static_cast(std::roundf(Clamp1(v, kMin, kMax))); + }; + + const auto quant = AdjustedQuant(quantization_adjustment); + const auto inv_quant = InvAdjustedQuant(quantization_adjustment); + for (int c : {1, 0, 2}) { + float factor = (c == 0) ? y_to_x : (c == 1) ? 0 : y_to_b; + for (int i = 0; i < 32; ++i) { + const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; + const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; + auto restored_y = + color_dct_[1][i] * inv_dct_factor * kChannelWeight[1] * inv_quant; + auto decorellated = original.color_dct[c][i] - factor * restored_y; + color_dct_[c][i] = + to_int(decorellated * dct_factor * quant / kChannelWeight[c]); + } + } + for (int i = 0; i < 32; ++i) { + const float dct_factor = (i == 0) ? kSqrt2 : 1.0f; + sigma_dct_[i] = + to_int(original.sigma_dct[i] * dct_factor * quant / kChannelWeight[3]); + } +} + +Status QuantizedSpline::Dequantize(const Spline::Point& starting_point, + const int32_t quantization_adjustment, + const float y_to_x, const float y_to_b, + const uint64_t image_size, + uint64_t* total_estimated_area_reached, + Spline& result) const { + constexpr uint64_t kOne = static_cast(1); + const uint64_t area_limit = + std::min(1024 * image_size + (kOne << 32), kOne << 42); + + result.control_points.clear(); + result.control_points.reserve(control_points_.size() + 1); + float px = std::roundf(starting_point.x); + float py = std::roundf(starting_point.y); + JXL_RETURN_IF_ERROR(ValidateSplinePointPos(px, py)); + int current_x = static_cast(px); + int current_y = static_cast(py); + result.control_points.push_back(Spline::Point{static_cast(current_x), + static_cast(current_y)}); + int current_delta_x = 0, current_delta_y = 0; + uint64_t manhattan_distance = 0; + for (const auto& point : control_points_) { + current_delta_x += point.first; + current_delta_y += point.second; + manhattan_distance += std::abs(current_delta_x) + std::abs(current_delta_y); + if (manhattan_distance > area_limit) { + return JXL_FAILURE("Too large manhattan_distance reached: %" PRIu64, + manhattan_distance); + } + JXL_RETURN_IF_ERROR( + ValidateSplinePointPos(current_delta_x, current_delta_y)); + current_x += current_delta_x; + current_y += current_delta_y; + JXL_RETURN_IF_ERROR(ValidateSplinePointPos(current_x, current_y)); + result.control_points.push_back(Spline::Point{ + static_cast(current_x), static_cast(current_y)}); + } + + const auto inv_quant = InvAdjustedQuant(quantization_adjustment); + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < 32; ++i) { + const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; + result.color_dct[c][i] = + color_dct_[c][i] * inv_dct_factor * kChannelWeight[c] * inv_quant; + } + } + for (int i = 0; i < 32; ++i) { + result.color_dct[0][i] += y_to_x * result.color_dct[1][i]; + result.color_dct[2][i] += y_to_b * result.color_dct[1][i]; + } + uint64_t width_estimate = 0; + + uint64_t color[3] = {}; + for (int c = 0; c < 3; ++c) { + for (int i = 0; i < 32; ++i) { + color[c] += static_cast( + std::ceil(inv_quant * std::abs(color_dct_[c][i]))); + } + } + color[0] += static_cast(std::ceil(std::abs(y_to_x))) * color[1]; + color[2] += static_cast(std::ceil(std::abs(y_to_b))) * color[1]; + // This is not taking kChannelWeight into account, but up to constant factors + // it gives an indication of the influence of the color values on the area + // that will need to be rendered. + const uint64_t max_color = std::max({color[1], color[0], color[2]}); + uint64_t logcolor = + std::max(kOne, static_cast(CeilLog2Nonzero(kOne + max_color))); + + const float weight_limit = + std::ceil(std::sqrt((static_cast(area_limit) / logcolor) / + std::max(1, manhattan_distance))); + + for (int i = 0; i < 32; ++i) { + const float inv_dct_factor = (i == 0) ? kSqrt0_5 : 1.0f; + result.sigma_dct[i] = + sigma_dct_[i] * inv_dct_factor * kChannelWeight[3] * inv_quant; + // If we include the factor kChannelWeight[3]=.3333f here, we get a + // realistic area estimate. We leave it out to simplify the calculations, + // and understand that this way we underestimate the area by a factor of + // 1/(0.3333*0.3333). This is taken into account in the limits below. + float weight_f = std::ceil(inv_quant * std::abs(sigma_dct_[i])); + uint64_t weight = + static_cast(std::min(weight_limit, std::max(1.0f, weight_f))); + width_estimate += weight * weight * logcolor; + } + *total_estimated_area_reached += (width_estimate * manhattan_distance); + if (*total_estimated_area_reached > area_limit) { + return JXL_FAILURE("Too large total_estimated_area eached: %" PRIu64, + *total_estimated_area_reached); + } + + return true; +} + +Status QuantizedSpline::Decode(const std::vector& context_map, + ANSSymbolReader* const decoder, + BitReader* const br, + const size_t max_control_points, + size_t* total_num_control_points) { + const size_t num_control_points = + decoder->ReadHybridUint(kNumControlPointsContext, br, context_map); + *total_num_control_points += num_control_points; + if (*total_num_control_points > max_control_points) { + return JXL_FAILURE("Too many control points: %" PRIuS, + *total_num_control_points); + } + control_points_.resize(num_control_points); + // Maximal image dimension. + constexpr int64_t kDeltaLimit = 1u << 30; + for (std::pair& control_point : control_points_) { + control_point.first = UnpackSigned( + decoder->ReadHybridUint(kControlPointsContext, br, context_map)); + control_point.second = UnpackSigned( + decoder->ReadHybridUint(kControlPointsContext, br, context_map)); + // Check delta-deltas are not outrageous; it is not in spec, but there is + // no reason to allow larger values. + if ((control_point.first >= kDeltaLimit) || + (control_point.first <= -kDeltaLimit) || + (control_point.second >= kDeltaLimit) || + (control_point.second <= -kDeltaLimit)) { + return JXL_FAILURE("Spline delta-delta is out of bounds"); + } + } + + const auto decode_dct = [decoder, br, &context_map](int dct[32]) -> Status { + constexpr int kWeirdNumber = std::numeric_limits::min(); + for (int i = 0; i < 32; ++i) { + dct[i] = + UnpackSigned(decoder->ReadHybridUint(kDCTContext, br, context_map)); + if (dct[i] == kWeirdNumber) { + return JXL_FAILURE("The weird number in spline DCT"); + } + } + return true; + }; + for (int c = 0; c < 3; ++c) { + JXL_RETURN_IF_ERROR(decode_dct(color_dct_[c])); + } + JXL_RETURN_IF_ERROR(decode_dct(sigma_dct_)); + return true; +} + +void Splines::Clear() { + quantization_adjustment_ = 0; + splines_.clear(); + starting_points_.clear(); + segments_.clear(); + segment_indices_.clear(); + segment_y_start_.clear(); +} + +Status Splines::Decode(jxl::BitReader* br, const size_t num_pixels) { + std::vector context_map; + ANSCode code; + JXL_RETURN_IF_ERROR( + DecodeHistograms(br, kNumSplineContexts, &code, &context_map)); + ANSSymbolReader decoder(&code, br); + const size_t num_splines = + 1 + decoder.ReadHybridUint(kNumSplinesContext, br, context_map); + size_t max_control_points = std::min( + kMaxNumControlPoints, num_pixels / kMaxNumControlPointsPerPixelRatio); + if (num_splines > max_control_points) { + return JXL_FAILURE("Too many splines: %" PRIuS, num_splines); + } + JXL_RETURN_IF_ERROR(DecodeAllStartingPoints(&starting_points_, br, &decoder, + context_map, num_splines)); + + quantization_adjustment_ = UnpackSigned( + decoder.ReadHybridUint(kQuantizationAdjustmentContext, br, context_map)); + + splines_.clear(); + splines_.reserve(num_splines); + size_t num_control_points = num_splines; + for (size_t i = 0; i < num_splines; ++i) { + QuantizedSpline spline; + JXL_RETURN_IF_ERROR(spline.Decode(context_map, &decoder, br, + max_control_points, &num_control_points)); + splines_.push_back(std::move(spline)); + } + + JXL_RETURN_IF_ERROR(decoder.CheckANSFinalState()); + + if (!HasAny()) { + return JXL_FAILURE("Decoded splines but got none"); + } + + return true; +} + +void Splines::AddTo(Image3F* const opsin, const Rect& opsin_rect, + const Rect& image_rect) const { + return Apply(opsin, opsin_rect, image_rect); +} +void Splines::AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, + float* JXL_RESTRICT row_b, const Rect& image_row) const { + return ApplyToRow(row_x, row_y, row_b, image_row); +} + +void Splines::SubtractFrom(Image3F* const opsin) const { + return Apply(opsin, Rect(*opsin), Rect(*opsin)); +} + +Status Splines::InitializeDrawCache(const size_t image_xsize, + const size_t image_ysize, + const ColorCorrelationMap& cmap) { + // TODO(veluca): avoid storing segments that are entirely outside image + // boundaries. + segments_.clear(); + segment_indices_.clear(); + segment_y_start_.clear(); + std::vector> segments_by_y; + std::vector intermediate_points; + uint64_t total_estimated_area_reached = 0; + std::vector splines; + for (size_t i = 0; i < splines_.size(); ++i) { + Spline spline; + JXL_RETURN_IF_ERROR(splines_[i].Dequantize( + starting_points_[i], quantization_adjustment_, cmap.YtoXRatio(0), + cmap.YtoBRatio(0), image_xsize * image_ysize, + &total_estimated_area_reached, spline)); + if (std::adjacent_find(spline.control_points.begin(), + spline.control_points.end()) != + spline.control_points.end()) { + // Otherwise division by zero might occur. Once control points coincide, + // the direction of curve is undefined... + return JXL_FAILURE( + "identical successive control points in spline %" PRIuS, i); + } + splines.push_back(spline); + } + // TODO(firsching) Change this into a JXL_FAILURE for level 5 codestreams. + if (total_estimated_area_reached > + std::min((8 * image_xsize * image_ysize + (uint64_t(1) << 25)), + (uint64_t(1) << 30))) { + JXL_WARNING( + "Large total_estimated_area_reached, expect slower decoding: %" PRIu64, + total_estimated_area_reached); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + return JXL_FAILURE("Total spline area is too large"); +#endif + } + + for (Spline& spline : splines) { + std::vector> points_to_draw; + auto add_point = [&](const Spline::Point& point, const float multiplier) { + points_to_draw.emplace_back(point, multiplier); + }; + intermediate_points.clear(); + DrawCentripetalCatmullRomSpline(spline.control_points, intermediate_points); + ForEachEquallySpacedPoint(intermediate_points, add_point); + const float arc_length = + (points_to_draw.size() - 2) * kDesiredRenderingDistance + + points_to_draw.back().second; + if (arc_length <= 0.f) { + // This spline wouldn't have any effect. + continue; + } + HWY_DYNAMIC_DISPATCH(SegmentsFromPoints) + (spline, points_to_draw, arc_length, segments_, segments_by_y); + } + + // TODO(eustas): consider linear sorting here. + std::sort(segments_by_y.begin(), segments_by_y.end()); + segment_indices_.resize(segments_by_y.size()); + segment_y_start_.resize(image_ysize + 1); + for (size_t i = 0; i < segments_by_y.size(); i++) { + segment_indices_[i] = segments_by_y[i].second; + size_t y = segments_by_y[i].first; + if (y < image_ysize) { + segment_y_start_[y + 1]++; + } + } + for (size_t y = 0; y < image_ysize; y++) { + segment_y_start_[y + 1] += segment_y_start_[y]; + } + return true; +} + +template +void Splines::ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, + float* JXL_RESTRICT row_b, + const Rect& image_row) const { + if (segments_.empty()) return; + JXL_ASSERT(image_row.ysize() == 1); + for (size_t iy = 0; iy < image_row.ysize(); iy++) { + HWY_DYNAMIC_DISPATCH(DrawSegments) + (row_x, row_y, row_b, image_row.Line(iy), add, segments_.data(), + segment_indices_.data(), segment_y_start_.data()); + } +} + +template +void Splines::Apply(Image3F* const opsin, const Rect& opsin_rect, + const Rect& image_rect) const { + if (segments_.empty()) return; + for (size_t iy = 0; iy < image_rect.ysize(); iy++) { + const size_t y0 = opsin_rect.Line(iy).y0(); + const size_t x0 = opsin_rect.x0(); + ApplyToRow(opsin->PlaneRow(0, y0) + x0, opsin->PlaneRow(1, y0) + x0, + opsin->PlaneRow(2, y0) + x0, image_rect.Line(iy)); + } +} + +} // namespace jxl +#endif // HWY_ONCE diff --git a/third-party/libjxl/libjxl/lib/jxl/splines.h b/third-party/libjxl/libjxl/lib/jxl/splines.h new file mode 100644 index 0000000000..c8dad3417c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/splines.h @@ -0,0 +1,148 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_SPLINES_H_ +#define LIB_JXL_SPLINES_H_ + +#include +#include + +#include +#include + +#include "lib/jxl/ans_params.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/chroma_from_luma.h" +#include "lib/jxl/dec_ans.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/entropy_coder.h" +#include "lib/jxl/image.h" + +namespace jxl { + +static constexpr float kDesiredRenderingDistance = 1.f; + +enum SplineEntropyContexts : size_t { + kQuantizationAdjustmentContext = 0, + kStartingPositionContext, + kNumSplinesContext, + kNumControlPointsContext, + kControlPointsContext, + kDCTContext, + kNumSplineContexts +}; + +struct Spline { + struct Point { + Point() : x(0.0f), y(0.0f) {} + Point(float x, float y) : x(x), y(y) {} + float x, y; + bool operator==(const Point& other) const { + return std::fabs(x - other.x) < 1e-3f && std::fabs(y - other.y) < 1e-3f; + } + }; + std::vector control_points; + // X, Y, B. + float color_dct[3][32]; + // Splines are draws by normalized Gaussian splatting. This controls the + // Gaussian's parameter along the spline. + float sigma_dct[32]; +}; + +class QuantizedSplineEncoder; + +class QuantizedSpline { + public: + QuantizedSpline() = default; + explicit QuantizedSpline(const Spline& original, + int32_t quantization_adjustment, float y_to_x, + float y_to_b); + + Status Dequantize(const Spline::Point& starting_point, + int32_t quantization_adjustment, float y_to_x, float y_to_b, + uint64_t image_size, uint64_t* total_estimated_area_reached, + Spline& result) const; + + Status Decode(const std::vector& context_map, + ANSSymbolReader* decoder, BitReader* br, + size_t max_control_points, size_t* total_num_control_points); + + private: + friend class QuantizedSplineEncoder; + + std::vector> + control_points_; // Double delta-encoded. + int color_dct_[3][32] = {}; + int sigma_dct_[32] = {}; +}; + +// A single "drawable unit" of a spline, i.e. a line of the region in which we +// render each Gaussian. The structure doesn't actually depend on the exact +// row, which allows reuse for different y values (which are tracked +// separately). +struct SplineSegment { + float center_x, center_y; + float maximum_distance; + float inv_sigma; + float sigma_over_4_times_intensity; + float color[3]; +}; + +class Splines { + public: + Splines() = default; + explicit Splines(const int32_t quantization_adjustment, + std::vector splines, + std::vector starting_points) + : quantization_adjustment_(quantization_adjustment), + splines_(std::move(splines)), + starting_points_(std::move(starting_points)) {} + + bool HasAny() const { return !splines_.empty(); } + + void Clear(); + + Status Decode(BitReader* br, size_t num_pixels); + + void AddTo(Image3F* opsin, const Rect& opsin_rect, + const Rect& image_rect) const; + void AddToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, + float* JXL_RESTRICT row_b, const Rect& image_row) const; + void SubtractFrom(Image3F* opsin) const; + + const std::vector& QuantizedSplines() const { + return splines_; + } + const std::vector& StartingPoints() const { + return starting_points_; + } + + int32_t GetQuantizationAdjustment() const { return quantization_adjustment_; } + + Status InitializeDrawCache(size_t image_xsize, size_t image_ysize, + const ColorCorrelationMap& cmap); + + private: + template + void ApplyToRow(float* JXL_RESTRICT row_x, float* JXL_RESTRICT row_y, + float* JXL_RESTRICT row_b, const Rect& image_row) const; + template + void Apply(Image3F* opsin, const Rect& opsin_rect, + const Rect& image_rect) const; + + // If positive, quantization weights are multiplied by 1 + this/8, which + // increases precision. If negative, they are divided by 1 - this/8. If 0, + // they are unchanged. + int32_t quantization_adjustment_ = 0; + std::vector splines_; + std::vector starting_points_; + std::vector segments_; + std::vector segment_indices_; + std::vector segment_y_start_; +}; + +} // namespace jxl + +#endif // LIB_JXL_SPLINES_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/splines_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/splines_gbench.cc new file mode 100644 index 0000000000..78ff6d41c0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/splines_gbench.cc @@ -0,0 +1,52 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "benchmark/benchmark.h" +#include "lib/jxl/splines.h" + +namespace jxl { +namespace { + +constexpr int kQuantizationAdjustment = 0; +const ColorCorrelationMap* const cmap = new ColorCorrelationMap; +const float kYToX = cmap->YtoXRatio(0); +const float kYToB = cmap->YtoBRatio(0); + +void BM_Splines(benchmark::State& state) { + const size_t n = state.range(); + + std::vector spline_data = { + {/*control_points=*/{ + {9, 54}, {118, 159}, {97, 3}, {10, 40}, {150, 25}, {120, 300}}, + /*color_dct=*/ + {{0.03125f, 0.00625f, 0.003125f}, {1.f, 0.321875f}, {1.f, 0.24375f}}, + /*sigma_dct=*/{0.3125f, 0.f, 0.f, 0.0625f}}}; + std::vector quantized_splines; + std::vector starting_points; + for (const Spline& spline : spline_data) { + quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX, + kYToB); + starting_points.push_back(spline.control_points.front()); + } + Splines splines(kQuantizationAdjustment, std::move(quantized_splines), + std::move(starting_points)); + + Image3F drawing_area(320, 320); + ZeroFillImage(&drawing_area); + for (auto _ : state) { + for (size_t i = 0; i < n; ++i) { + JXL_CHECK(splines.InitializeDrawCache(drawing_area.xsize(), + drawing_area.ysize(), *cmap)); + splines.AddTo(&drawing_area, Rect(drawing_area), Rect(drawing_area)); + } + } + + state.SetItemsProcessed(n * state.iterations()); +} + +BENCHMARK(BM_Splines)->Range(1, 1 << 10); + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/splines_test.cc b/third-party/libjxl/libjxl/lib/jxl/splines_test.cc new file mode 100644 index 0000000000..358ba3f632 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/splines_test.cc @@ -0,0 +1,350 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/splines.h" + +#include "lib/extras/codec.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_splines.h" +#include "lib/jxl/image_test_utils.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +namespace jxl { + +std::ostream& operator<<(std::ostream& os, const Spline::Point& p) { + return os << "(" << p.x << ", " << p.y << ")"; +} + +std::ostream& operator<<(std::ostream& os, const Spline& spline) { + return os << "(spline with " << spline.control_points.size() + << " control points)"; +} + +namespace { + +using ::testing::AllOf; +using ::testing::Field; +using ::testing::FloatNear; +using ::testing::Pointwise; + +constexpr int kQuantizationAdjustment = 0; +const ColorCorrelationMap* const cmap = new ColorCorrelationMap; +const float kYToX = cmap->YtoXRatio(0); +const float kYToB = cmap->YtoBRatio(0); + +constexpr float kTolerance = 0.003125; + +std::vector DequantizeSplines(const Splines& splines) { + const auto& quantized_splines = splines.QuantizedSplines(); + const auto& starting_points = splines.StartingPoints(); + JXL_CHECK(quantized_splines.size() == starting_points.size()); + + std::vector dequantized; + uint64_t total = 0; + for (size_t i = 0; i < quantized_splines.size(); ++i) { + dequantized.emplace_back(); + JXL_CHECK(quantized_splines[i].Dequantize( + starting_points[i], kQuantizationAdjustment, kYToX, kYToB, 2u << 30u, + &total, dequantized.back())); + } + return dequantized; +} + +MATCHER(ControlPointIs, "") { + const Spline::Point& actual = std::get<0>(arg); + const Spline::Point& expected = std::get<1>(arg); + return testing::ExplainMatchResult( + AllOf(Field(&Spline::Point::x, FloatNear(expected.x, kTolerance)), + Field(&Spline::Point::y, FloatNear(expected.y, kTolerance))), + actual, result_listener); +} + +MATCHER(ControlPointsMatch, "") { + const Spline& actual = std::get<0>(arg); + const Spline& expected = std::get<1>(arg); + return testing::ExplainMatchResult( + Field(&Spline::control_points, + Pointwise(ControlPointIs(), expected.control_points)), + actual, result_listener); +} + +MATCHER(SplinesMatch, "") { + const Spline& actual = std::get<0>(arg); + const Spline& expected = std::get<1>(arg); + if (!testing::ExplainMatchResult(ControlPointsMatch(), arg, + result_listener)) { + return false; + } + for (int i = 0; i < 3; ++i) { + size_t color_dct_size = + sizeof(expected.color_dct[i]) / sizeof(expected.color_dct[i][0]); + for (size_t j = 0; j < color_dct_size; j++) { + testing::StringMatchResultListener color_dct_listener; + if (!testing::ExplainMatchResult( + FloatNear(expected.color_dct[i][j], kTolerance), + actual.color_dct[i][j], &color_dct_listener)) { + *result_listener << ", where color_dct[" << i << "][" << j + << "] don't match, " << color_dct_listener.str(); + return false; + } + } + } + size_t sigma_dct_size = + sizeof(expected.sigma_dct) / sizeof(expected.sigma_dct[0]); + for (size_t i = 0; i < sigma_dct_size; i++) { + testing::StringMatchResultListener sigma_listener; + if (!testing::ExplainMatchResult( + FloatNear(expected.sigma_dct[i], kTolerance), actual.sigma_dct[i], + &sigma_listener)) { + *result_listener << ", where sigma_dct[" << i << "] don't match, " + << sigma_listener.str(); + return false; + } + } + return true; +} + +} // namespace + +TEST(SplinesTest, Serialization) { + std::vector spline_data = { + {/*control_points=*/{ + {109, 54}, {218, 159}, {80, 3}, {110, 274}, {94, 185}, {17, 277}}, + /*color_dct=*/ + {{36.3, 39.7, 23.2, 67.5, 4.4, 71.5, 62.3, 32.3, 92.2, 10.1, 10.8, + 9.2, 6.1, 10.5, 79.1, 7, 24.6, 90.8, 5.5, 84, 43.8, 49, + 33.5, 78.9, 54.5, 77.9, 62.1, 51.4, 36.4, 14.3, 83.7, 35.4}, + {9.4, 53.4, 9.5, 74.9, 72.7, 26.7, 7.9, 0.9, 84.9, 23.2, 26.5, + 31.1, 91, 11.7, 74.1, 39.3, 23.7, 82.5, 4.8, 2.7, 61.2, 96.4, + 13.7, 66.7, 62.9, 82.4, 5.9, 98.7, 21.5, 7.9, 51.7, 63.1}, + {48, 39.3, 6.9, 26.3, 33.3, 6.2, 1.7, 98.9, 59.9, 59.6, 95, + 61.3, 82.7, 53, 6.1, 30.4, 34.7, 96.9, 93.4, 17, 38.8, 80.8, + 63, 18.6, 43.6, 32.3, 61, 20.2, 24.3, 28.3, 69.1, 62.4}}, + /*sigma_dct=*/{32.7, 21.5, 44.4, 1.8, 45.8, 90.6, 29.3, 59.2, + 23.7, 85.2, 84.8, 27.2, 42.1, 84.1, 50.6, 17.6, + 93.7, 4.9, 2.6, 69.8, 94.9, 52, 24.3, 18.8, + 12.1, 95.7, 28.5, 81.4, 89.9, 31.4, 74.8, 52}}, + {/*control_points=*/{{172, 309}, + {196, 277}, + {42, 238}, + {114, 350}, + {307, 290}, + {316, 269}, + {124, 66}, + {233, 267}}, + /*color_dct=*/ + {{15, 28.9, 22, 6.6, 41.8, 83, 8.6, 56.8, 68.9, 9.7, 5.4, + 19.8, 70.8, 90, 52.5, 65.2, 7.8, 23.5, 26.4, 72.2, 64.7, 87.1, + 1.3, 67.5, 46, 68.4, 65.4, 35.5, 29.1, 13, 41.6, 23.9}, + {47.7, 79.4, 62.7, 29.1, 96.8, 18.5, 17.6, 15.2, 80.5, 56, 96.2, + 59.9, 26.7, 96.1, 92.3, 42.1, 35.8, 54, 23.2, 55, 76, 35.8, + 58.4, 88.7, 2.4, 78.1, 95.6, 27.5, 6.6, 78.5, 24.1, 69.8}, + {43.8, 96.5, 0.9, 95.1, 49.1, 71.2, 25.1, 33.6, 75.2, 95, 82.1, + 19.7, 10.5, 44.9, 50, 93.3, 83.5, 99.5, 64.6, 54, 3.5, 99.7, + 45.3, 82.1, 22.4, 37.9, 60, 32.2, 12.6, 4.6, 65.5, 96.4}}, + /*sigma_dct=*/{72.5, 2.6, 41.7, 2.2, 39.7, 79.1, 69.6, 19.9, + 92.3, 71.5, 41.9, 62.1, 30, 49.4, 70.3, 45.3, + 62.5, 47.2, 46.7, 41.2, 90.8, 46.8, 91.2, 55, + 8.1, 69.6, 25.4, 84.7, 61.7, 27.6, 3.7, 46.9}}, + {/*control_points=*/{{100, 186}, + {257, 97}, + {170, 49}, + {25, 169}, + {309, 104}, + {232, 237}, + {385, 101}, + {122, 168}, + {26, 300}, + {390, 88}}, + /*color_dct=*/ + {{16.9, 64.8, 4.2, 10.6, 23.5, 17, 79.3, 5.7, 60.4, 16.6, 94.9, + 63.7, 87.6, 10.5, 3.8, 61.1, 22.9, 81.9, 80.4, 40.5, 45.9, 25.4, + 39.8, 30, 50.2, 90.4, 27.9, 93.7, 65.1, 48.2, 22.3, 43.9}, + {24.9, 66, 3.5, 90.2, 97.1, 15.8, 35.6, 0.6, 68, 39.6, 24.4, + 85.9, 57.7, 77.6, 47.5, 67.9, 4.3, 5.4, 91.2, 58.5, 0.1, 52.2, + 3.5, 47.8, 63.2, 43.5, 85.8, 35.8, 50.2, 35.9, 19.2, 48.2}, + {82.8, 44.9, 76.4, 39.5, 94.1, 14.3, 89.8, 10, 10.5, 74.5, 56.3, + 65.8, 7.8, 23.3, 52.8, 99.3, 56.8, 46, 76.7, 13.5, 67, 22.4, + 29.9, 43.3, 70.3, 26, 74.3, 53.9, 62, 19.1, 49.3, 46.7}}, + /*sigma_dct=*/{83.5, 1.7, 25.1, 18.7, 46.5, 75.3, 28, 62.3, + 50.3, 23.3, 85.6, 96, 45.8, 33.1, 33.4, 52.9, + 26.3, 58.5, 19.6, 70, 92.6, 22.5, 57, 21.6, + 76.8, 87.5, 22.9, 66.3, 35.7, 35.6, 56.8, 67.2}}, + }; + + std::vector quantized_splines; + std::vector starting_points; + for (const Spline& spline : spline_data) { + quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX, + kYToB); + starting_points.push_back(spline.control_points.front()); + } + + Splines splines(kQuantizationAdjustment, std::move(quantized_splines), + std::move(starting_points)); + const std::vector quantized_spline_data = DequantizeSplines(splines); + EXPECT_THAT(quantized_spline_data, + Pointwise(ControlPointsMatch(), spline_data)); + + BitWriter writer; + EncodeSplines(splines, &writer, kLayerSplines, HistogramParams(), nullptr); + writer.ZeroPadToByte(); + const size_t bits_written = writer.BitsWritten(); + + printf("Wrote %" PRIuS " bits of splines.\n", bits_written); + + BitReader reader(writer.GetSpan()); + Splines decoded_splines; + ASSERT_TRUE(decoded_splines.Decode(&reader, /*num_pixels=*/1000)); + ASSERT_TRUE(reader.JumpToByteBoundary()); + EXPECT_EQ(reader.TotalBitsConsumed(), bits_written); + ASSERT_TRUE(reader.Close()); + + const std::vector decoded_spline_data = + DequantizeSplines(decoded_splines); + EXPECT_THAT(decoded_spline_data, + Pointwise(SplinesMatch(), quantized_spline_data)); +} + +#ifdef JXL_CRASH_ON_ERROR +TEST(SplinesTest, DISABLED_TooManySplinesTest) { +#else +TEST(SplinesTest, TooManySplinesTest) { +#endif + // This is more than the limit for 1000 pixels. + const size_t kNumSplines = 300; + + std::vector quantized_splines; + std::vector starting_points; + for (size_t i = 0; i < kNumSplines; i++) { + Spline spline = { + /*control_points=*/{{1.f + i, 2}, {10.f + i, 25}, {30.f + i, 300}}, + /*color_dct=*/ + {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}}, + /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}}; + quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX, + kYToB); + starting_points.push_back(spline.control_points.front()); + } + + Splines splines(kQuantizationAdjustment, std::move(quantized_splines), + std::move(starting_points)); + BitWriter writer; + EncodeSplines(splines, &writer, kLayerSplines, + HistogramParams(SpeedTier::kFalcon, 1), nullptr); + writer.ZeroPadToByte(); + // Re-read splines. + BitReader reader(writer.GetSpan()); + Splines decoded_splines; + EXPECT_FALSE(decoded_splines.Decode(&reader, /*num_pixels=*/1000)); + EXPECT_TRUE(reader.Close()); +} + +#ifdef JXL_CRASH_ON_ERROR +TEST(SplinesTest, DISABLED_DuplicatePoints) { +#else +TEST(SplinesTest, DuplicatePoints) { +#endif + std::vector control_points{ + {9, 54}, {118, 159}, {97, 3}, // Repeated. + {97, 3}, {10, 40}, {150, 25}, {120, 300}}; + Spline spline{control_points, + /*color_dct=*/ + {{1.f, 0.2f, 0.1f}, {35.7f, 10.3f}, {35.7f, 7.8f}}, + /*sigma_dct=*/{10.f, 0.f, 0.f, 2.f}}; + std::vector spline_data{spline}; + std::vector quantized_splines; + std::vector starting_points; + for (const Spline& spline : spline_data) { + quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX, + kYToB); + starting_points.push_back(spline.control_points.front()); + } + Splines splines(kQuantizationAdjustment, std::move(quantized_splines), + std::move(starting_points)); + + Image3F image(320, 320); + ZeroFillImage(&image); + EXPECT_FALSE( + splines.InitializeDrawCache(image.xsize(), image.ysize(), *cmap)); +} + +TEST(SplinesTest, Drawing) { + CodecInOut io_expected; + const PaddedBytes orig = jxl::test::ReadTestData("jxl/splines.pfm"); + ASSERT_TRUE(SetFromBytes(Span(orig), &io_expected, + /*pool=*/nullptr)); + + std::vector control_points{{9, 54}, {118, 159}, {97, 3}, + {10, 40}, {150, 25}, {120, 300}}; + // Use values that survive quant/decorellation roundtrip. + const Spline spline{ + control_points, + /*color_dct=*/ + {{0.4989345073699951171875000f, 0.4997999966144561767578125f}, + {0.4772970676422119140625000f, 0.f, 0.5250000357627868652343750f}, + {-0.0176776945590972900390625f, 0.4900000095367431640625000f, + 0.5250000357627868652343750f}}, + /*sigma_dct=*/ + {0.9427147507667541503906250f, 0.f, 0.f, 0.f, 0.f, 0.f, 0.f, + 0.6665999889373779296875000f}}; + std::vector spline_data = {spline}; + std::vector quantized_splines; + std::vector starting_points; + for (const Spline& spline : spline_data) { + quantized_splines.emplace_back(spline, kQuantizationAdjustment, kYToX, + kYToB); + starting_points.push_back(spline.control_points.front()); + } + Splines splines(kQuantizationAdjustment, std::move(quantized_splines), + std::move(starting_points)); + + Image3F image(320, 320); + ZeroFillImage(&image); + ASSERT_TRUE(splines.InitializeDrawCache(image.xsize(), image.ysize(), *cmap)); + splines.AddTo(&image, Rect(image), Rect(image)); + + CodecInOut io_actual; + Image3F image2(320, 320); + CopyImageTo(image, &image2); + io_actual.SetFromImage(std::move(image2), ColorEncoding::SRGB()); + ASSERT_TRUE(io_actual.frames[0].TransformTo(io_expected.Main().c_current(), + GetJxlCms())); + + JXL_ASSERT_OK(VerifyRelativeError( + *io_expected.Main().color(), *io_actual.Main().color(), 1e-2f, 1e-1f, _)); +} + +TEST(SplinesTest, ClearedEveryFrame) { + CodecInOut io_expected; + const PaddedBytes bytes_expected = + jxl::test::ReadTestData("jxl/spline_on_first_frame.png"); + ASSERT_TRUE(SetFromBytes(Span(bytes_expected), &io_expected, + /*pool=*/nullptr)); + CodecInOut io_actual; + const PaddedBytes bytes_actual = + jxl::test::ReadTestData("jxl/spline_on_first_frame.jxl"); + ASSERT_TRUE( + test::DecodeFile({}, Span(bytes_actual), &io_actual)); + + ASSERT_TRUE( + io_actual.frames[0].TransformTo(ColorEncoding::SRGB(), GetJxlCms())); + for (size_t c = 0; c < 3; ++c) { + for (size_t y = 0; y < io_actual.ysize(); ++y) { + float* const JXL_RESTRICT row = io_actual.Main().color()->PlaneRow(c, y); + for (size_t x = 0; x < io_actual.xsize(); ++x) { + row[x] = Clamp1(row[x], 0.f, 1.f); + } + } + } + JXL_ASSERT_OK(VerifyRelativeError( + *io_expected.Main().color(), *io_actual.Main().color(), 1e-2f, 1e-1f, _)); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/test_image.cc b/third-party/libjxl/libjxl/lib/jxl/test_image.cc new file mode 100644 index 0000000000..af1d1293ef --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/test_image.cc @@ -0,0 +1,453 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/test_image.h" + +#include + +#include +#include +#include + +#include "lib/extras/dec/color_description.h" +#include "lib/extras/dec/color_hints.h" +#include "lib/extras/dec/decode.h" +#include "lib/jxl/base/byte_order.h" +#include "lib/jxl/base/random.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/color_encoding_internal.h" + +namespace jxl { +namespace test { + +namespace { + +void StoreValue(float val, size_t bits_per_sample, JxlPixelFormat format, + uint8_t** out) { + const float mul = (1u << bits_per_sample) - 1; + if (format.data_type == JXL_TYPE_UINT8) { + **out = val * mul; + } else if (format.data_type == JXL_TYPE_UINT16) { + uint16_t uval = val * mul; + if (SwapEndianness(format.endianness)) { + uval = JXL_BSWAP16(uval); + } + memcpy(*out, &uval, 2); + } else if (format.data_type == JXL_TYPE_FLOAT) { + // TODO(szabadka) Add support for custom bits / exponent bits floats. + if (SwapEndianness(format.endianness)) { + val = BSwapFloat(val); + } + memcpy(*out, &val, 4); + } else { + // TODO(szabadka) Add support for FLOAT16. + } + *out += extras::PackedImage::BitsPerChannel(format.data_type) / 8; +} + +void FillPackedImage(size_t bits_per_sample, uint16_t seed, + extras::PackedImage* image) { + const size_t xsize = image->xsize; + const size_t ysize = image->ysize; + const JxlPixelFormat format = image->format; + + // Cause more significant image difference for successive seeds. + Rng generator(seed); + + // Returns random integer in interval [0, max_value) + auto rngu = [&generator](size_t max_value) -> size_t { + return generator.UniformU(0, max_value); + }; + + // Returns random float in interval [0.0, max_value) + auto rngf = [&generator](float max_value) { + return generator.UniformF(0.0f, max_value); + }; + + // Dark background gradient color + float r0 = rngf(0.5f); + float g0 = rngf(0.5f); + float b0 = rngf(0.5f); + float a0 = rngf(0.5f); + float r1 = rngf(0.5f); + float g1 = rngf(0.5f); + float b1 = rngf(0.5f); + float a1 = rngf(0.5f); + + // Circle with different color + size_t circle_x = rngu(xsize); + size_t circle_y = rngu(ysize); + size_t circle_r = rngu(std::min(xsize, ysize)); + + // Rectangle with random noise + size_t rect_x0 = rngu(xsize); + size_t rect_y0 = rngu(ysize); + size_t rect_x1 = rngu(xsize); + size_t rect_y1 = rngu(ysize); + if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1); + if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1); + + // Create pixel content to test, actual content does not matter as long as it + // can be compared after roundtrip. + uint8_t* out = reinterpret_cast(image->pixels()); + const float imul16 = 1.0f / 65536.0f; + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + float r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize; + float g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize; + float b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize; + float a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize; + // put some shape in there for visual debugging + if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) < + circle_r * circle_r) { + r = std::min(1.0f, ((65535 - x * y) ^ seed) * imul16); + g = std::min(1.0f, ((x << 8) + y + seed) * imul16); + b = std::min(1.0f, ((y << 8) + x * seed) * imul16); + a = std::min(1.0f, (32768 + x * 256 - y) * imul16); + } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) { + r = rngf(1.0f); + g = rngf(1.0f); + b = rngf(1.0f); + a = rngf(1.0f); + } + if (format.num_channels == 1) { + StoreValue(g, bits_per_sample, format, &out); + } else if (format.num_channels == 2) { + StoreValue(g, bits_per_sample, format, &out); + StoreValue(a, bits_per_sample, format, &out); + } else if (format.num_channels == 3) { + StoreValue(r, bits_per_sample, format, &out); + StoreValue(g, bits_per_sample, format, &out); + StoreValue(b, bits_per_sample, format, &out); + } else if (format.num_channels == 4) { + StoreValue(r, bits_per_sample, format, &out); + StoreValue(g, bits_per_sample, format, &out); + StoreValue(b, bits_per_sample, format, &out); + StoreValue(a, bits_per_sample, format, &out); + } + } + } +} + +} // namespace + +std::vector GetSomeTestImage(size_t xsize, size_t ysize, + size_t num_channels, uint16_t seed) { + // Cause more significant image difference for successive seeds. + Rng generator(seed); + + // Returns random integer in interval [0, max_value) + auto rng = [&generator](size_t max_value) -> size_t { + return generator.UniformU(0, max_value); + }; + + // Dark background gradient color + uint16_t r0 = rng(32768); + uint16_t g0 = rng(32768); + uint16_t b0 = rng(32768); + uint16_t a0 = rng(32768); + uint16_t r1 = rng(32768); + uint16_t g1 = rng(32768); + uint16_t b1 = rng(32768); + uint16_t a1 = rng(32768); + + // Circle with different color + size_t circle_x = rng(xsize); + size_t circle_y = rng(ysize); + size_t circle_r = rng(std::min(xsize, ysize)); + + // Rectangle with random noise + size_t rect_x0 = rng(xsize); + size_t rect_y0 = rng(ysize); + size_t rect_x1 = rng(xsize); + size_t rect_y1 = rng(ysize); + if (rect_x1 < rect_x0) std::swap(rect_x0, rect_y1); + if (rect_y1 < rect_y0) std::swap(rect_y0, rect_y1); + + size_t num_pixels = xsize * ysize; + // 16 bits per channel, big endian, 4 channels + std::vector pixels(num_pixels * num_channels * 2); + // Create pixel content to test, actual content does not matter as long as it + // can be compared after roundtrip. + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + uint16_t r = r0 * (ysize - y - 1) / ysize + r1 * y / ysize; + uint16_t g = g0 * (ysize - y - 1) / ysize + g1 * y / ysize; + uint16_t b = b0 * (ysize - y - 1) / ysize + b1 * y / ysize; + uint16_t a = a0 * (ysize - y - 1) / ysize + a1 * y / ysize; + // put some shape in there for visual debugging + if ((x - circle_x) * (x - circle_x) + (y - circle_y) * (y - circle_y) < + circle_r * circle_r) { + r = (65535 - x * y) ^ seed; + g = (x << 8) + y + seed; + b = (y << 8) + x * seed; + a = 32768 + x * 256 - y; + } else if (x > rect_x0 && x < rect_x1 && y > rect_y0 && y < rect_y1) { + r = rng(65536); + g = rng(65536); + b = rng(65536); + a = rng(65536); + } + size_t i = (y * xsize + x) * 2 * num_channels; + pixels[i + 0] = (r >> 8); + pixels[i + 1] = (r & 255); + if (num_channels >= 2) { + // This may store what is called 'g' in the alpha channel of a 2-channel + // image, but that's ok since the content is arbitrary + pixels[i + 2] = (g >> 8); + pixels[i + 3] = (g & 255); + } + if (num_channels >= 3) { + pixels[i + 4] = (b >> 8); + pixels[i + 5] = (b & 255); + } + if (num_channels >= 4) { + pixels[i + 6] = (a >> 8); + pixels[i + 7] = (a & 255); + } + } + } + return pixels; +} + +TestImage::TestImage() { + SetChannels(3); + SetAllBitDepths(8); + SetColorEncoding("RGB_D65_SRG_Rel_SRG"); +} + +TestImage& TestImage::DecodeFromBytes(const PaddedBytes& bytes) { + ColorEncoding c_enc; + JXL_CHECK( + ConvertExternalToInternalColorEncoding(ppf_.color_encoding, &c_enc)); + extras::ColorHints color_hints; + color_hints.Add("color_space", Description(c_enc)); + JXL_CHECK( + extras::DecodeBytes(Span(bytes), color_hints, &ppf_)); + return *this; +} + +TestImage& TestImage::ClearMetadata() { + ppf_.metadata = extras::PackedMetadata(); + return *this; +} + +TestImage& TestImage::SetDimensions(size_t xsize, size_t ysize) { + if (xsize <= ppf_.info.xsize && ysize <= ppf_.info.ysize) { + for (auto& frame : ppf_.frames) { + CropLayerInfo(xsize, ysize, &frame.frame_info.layer_info); + CropImage(xsize, ysize, &frame.color); + for (auto& ec : frame.extra_channels) { + CropImage(xsize, ysize, &ec); + } + } + } else { + JXL_CHECK(ppf_.info.xsize == 0 && ppf_.info.ysize == 0); + } + ppf_.info.xsize = xsize; + ppf_.info.ysize = ysize; + return *this; +} + +TestImage& TestImage::SetChannels(size_t num_channels) { + JXL_CHECK(ppf_.frames.empty()); + JXL_CHECK(!ppf_.preview_frame); + ppf_.info.num_color_channels = num_channels < 3 ? 1 : 3; + ppf_.info.num_extra_channels = num_channels - ppf_.info.num_color_channels; + if (ppf_.info.num_extra_channels > 0 && ppf_.info.alpha_bits == 0) { + ppf_.info.alpha_bits = ppf_.info.bits_per_sample; + ppf_.info.alpha_exponent_bits = ppf_.info.exponent_bits_per_sample; + } + ppf_.extra_channels_info.clear(); + for (size_t i = 1; i < ppf_.info.num_extra_channels; ++i) { + extras::PackedExtraChannel ec; + ec.index = i; + JxlEncoderInitExtraChannelInfo(JXL_CHANNEL_ALPHA, &ec.ec_info); + if (ec.ec_info.bits_per_sample == 0) { + ec.ec_info.bits_per_sample = ppf_.info.bits_per_sample; + ec.ec_info.exponent_bits_per_sample = ppf_.info.exponent_bits_per_sample; + } + ppf_.extra_channels_info.emplace_back(std::move(ec)); + } + format_.num_channels = std::min(static_cast(4), num_channels); + if (ppf_.info.num_color_channels == 1 && + ppf_.color_encoding.color_space != JXL_COLOR_SPACE_GRAY) { + SetColorEncoding("Gra_D65_Rel_SRG"); + } + return *this; +} + +// Sets the same bit depth on color, alpha and all extra channels. +TestImage& TestImage::SetAllBitDepths(uint32_t bits_per_sample, + uint32_t exponent_bits_per_sample) { + ppf_.info.bits_per_sample = bits_per_sample; + ppf_.info.exponent_bits_per_sample = exponent_bits_per_sample; + if (ppf_.info.num_extra_channels > 0) { + ppf_.info.alpha_bits = bits_per_sample; + ppf_.info.alpha_exponent_bits = exponent_bits_per_sample; + } + for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) { + extras::PackedExtraChannel& ec = ppf_.extra_channels_info[i]; + ec.ec_info.bits_per_sample = bits_per_sample; + ec.ec_info.exponent_bits_per_sample = exponent_bits_per_sample; + } + format_.data_type = DefaultDataType(ppf_.info); + return *this; +} + +TestImage& TestImage::SetDataType(JxlDataType data_type) { + format_.data_type = data_type; + return *this; +} + +TestImage& TestImage::SetEndianness(JxlEndianness endianness) { + format_.endianness = endianness; + return *this; +} + +TestImage& TestImage::SetColorEncoding(const std::string& description) { + JXL_CHECK(ParseDescription(description, &ppf_.color_encoding)); + ColorEncoding c_enc; + JXL_CHECK( + ConvertExternalToInternalColorEncoding(ppf_.color_encoding, &c_enc)); + JXL_CHECK(c_enc.CreateICC()); + PaddedBytes icc = c_enc.ICC(); + ppf_.icc.assign(icc.begin(), icc.end()); + return *this; +} + +TestImage& TestImage::CoalesceGIFAnimationWithAlpha() { + extras::PackedFrame canvas = ppf_.frames[0].Copy(); + JXL_CHECK(canvas.color.format.num_channels == 3); + JXL_CHECK(canvas.color.format.data_type == JXL_TYPE_UINT8); + JXL_CHECK(canvas.extra_channels.size() == 1); + for (size_t i = 1; i < ppf_.frames.size(); i++) { + const extras::PackedFrame& frame = ppf_.frames[i]; + JXL_CHECK(frame.extra_channels.size() == 1); + const JxlLayerInfo& layer_info = frame.frame_info.layer_info; + extras::PackedFrame rendered = canvas.Copy(); + uint8_t* pixels_rendered = + reinterpret_cast(rendered.color.pixels()); + const uint8_t* pixels_frame = + reinterpret_cast(frame.color.pixels()); + uint8_t* alpha_rendered = + reinterpret_cast(rendered.extra_channels[0].pixels()); + const uint8_t* alpha_frame = + reinterpret_cast(frame.extra_channels[0].pixels()); + for (size_t y = 0; y < frame.color.ysize; y++) { + for (size_t x = 0; x < frame.color.xsize; x++) { + size_t idx_frame = y * frame.color.xsize + x; + size_t idx_rendered = ((layer_info.crop_y0 + y) * rendered.color.xsize + + (layer_info.crop_x0 + x)); + if (alpha_frame[idx_frame] != 0) { + memcpy(&pixels_rendered[idx_rendered * 3], + &pixels_frame[idx_frame * 3], 3); + alpha_rendered[idx_rendered] = alpha_frame[idx_frame]; + } + } + } + if (layer_info.save_as_reference != 0) { + canvas = rendered.Copy(); + } + ppf_.frames[i] = std::move(rendered); + } + return *this; +} + +TestImage::Frame::Frame(TestImage* parent, bool is_preview, size_t index) + : parent_(parent), is_preview_(is_preview), index_(index) {} + +void TestImage::Frame::ZeroFill() { + memset(frame().color.pixels(), 0, frame().color.pixels_size); + for (auto& ec : frame().extra_channels) { + memset(ec.pixels(), 0, ec.pixels_size); + } +} + +void TestImage::Frame::RandomFill(uint16_t seed) { + FillPackedImage(ppf().info.bits_per_sample, seed, &frame().color); + for (size_t i = 0; i < ppf().extra_channels_info.size(); ++i) { + FillPackedImage(ppf().extra_channels_info[i].ec_info.bits_per_sample, + seed + 1 + i, &frame().extra_channels[i]); + } +} + +void TestImage::Frame::SetValue(size_t y, size_t x, size_t c, float val) { + const extras::PackedImage& color = frame().color; + JxlPixelFormat format = color.format; + JXL_CHECK(y < ppf().info.ysize); + JXL_CHECK(x < ppf().info.xsize); + JXL_CHECK(c < format.num_channels); + size_t pwidth = extras::PackedImage::BitsPerChannel(format.data_type) / 8; + size_t idx = ((y * color.xsize + x) * format.num_channels + c) * pwidth; + uint8_t* pixels = reinterpret_cast(frame().color.pixels()); + uint8_t* p = pixels + idx; + StoreValue(val, ppf().info.bits_per_sample, frame().color.format, &p); +} + +TestImage::Frame TestImage::AddFrame() { + size_t index = ppf_.frames.size(); + extras::PackedFrame frame(ppf_.info.xsize, ppf_.info.ysize, format_); + for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) { + JxlPixelFormat ec_format = {1, format_.data_type, format_.endianness, 0}; + extras::PackedImage image(ppf_.info.xsize, ppf_.info.ysize, ec_format); + frame.extra_channels.emplace_back(std::move(image)); + } + ppf_.frames.emplace_back(std::move(frame)); + return Frame(this, false, index); +} + +TestImage::Frame TestImage::AddPreview(size_t xsize, size_t ysize) { + extras::PackedFrame frame(xsize, ysize, format_); + for (size_t i = 0; i < ppf_.extra_channels_info.size(); ++i) { + JxlPixelFormat ec_format = {1, format_.data_type, format_.endianness, 0}; + extras::PackedImage image(xsize, ysize, ec_format); + frame.extra_channels.emplace_back(std::move(image)); + } + ppf_.preview_frame = make_unique(std::move(frame)); + return Frame(this, true, 0); +} + +void TestImage::CropLayerInfo(size_t xsize, size_t ysize, JxlLayerInfo* info) { + if (info->crop_x0 < static_cast(xsize)) { + info->xsize = std::min(info->xsize, xsize - info->crop_x0); + } else { + info->xsize = 0; + } + if (info->crop_y0 < static_cast(ysize)) { + info->ysize = std::min(info->ysize, ysize - info->crop_y0); + } else { + info->ysize = 0; + } +} + +void TestImage::CropImage(size_t xsize, size_t ysize, + extras::PackedImage* image) { + size_t new_stride = (image->stride / image->xsize) * xsize; + uint8_t* buf = reinterpret_cast(image->pixels()); + for (size_t y = 0; y < ysize; ++y) { + memmove(&buf[y * new_stride], &buf[y * image->stride], new_stride); + } + image->xsize = xsize; + image->ysize = ysize; + image->stride = new_stride; + image->pixels_size = ysize * new_stride; +} + +JxlDataType TestImage::DefaultDataType(const JxlBasicInfo& info) { + if (info.bits_per_sample == 16 && info.exponent_bits_per_sample == 5) { + return JXL_TYPE_FLOAT16; + } else if (info.exponent_bits_per_sample > 0 || info.bits_per_sample > 16) { + return JXL_TYPE_FLOAT; + } else if (info.bits_per_sample > 8) { + return JXL_TYPE_UINT16; + } else { + return JXL_TYPE_UINT8; + } +} + +} // namespace test +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/test_image.h b/third-party/libjxl/libjxl/lib/jxl/test_image.h new file mode 100644 index 0000000000..0106a4b341 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/test_image.h @@ -0,0 +1,94 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_TEST_IMAGE_H_ +#define LIB_JXL_TEST_IMAGE_H_ + +#include +#include +#include +#include + +#include +#include + +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/padded_bytes.h" + +namespace jxl { +namespace test { + +// Returns a test image with some autogenerated pixel content, using 16 bits per +// channel, big endian order, 1 to 4 channels +// The seed parameter allows to create images with different pixel content. +std::vector GetSomeTestImage(size_t xsize, size_t ysize, + size_t num_channels, uint16_t seed); + +class TestImage { + public: + TestImage(); + + extras::PackedPixelFile& ppf() { return ppf_; } + + TestImage& DecodeFromBytes(const PaddedBytes& bytes); + + TestImage& ClearMetadata(); + + TestImage& SetDimensions(size_t xsize, size_t ysize); + + TestImage& SetChannels(size_t num_channels); + + // Sets the same bit depth on color, alpha and all extra channels. + TestImage& SetAllBitDepths(uint32_t bits_per_sample, + uint32_t exponent_bits_per_sample = 0); + + TestImage& SetDataType(JxlDataType data_type); + + TestImage& SetEndianness(JxlEndianness endianness); + + TestImage& SetColorEncoding(const std::string& description); + + TestImage& CoalesceGIFAnimationWithAlpha(); + + class Frame { + public: + Frame(TestImage* parent, bool is_preview, size_t index); + + void ZeroFill(); + void RandomFill(uint16_t seed = 177); + + void SetValue(size_t y, size_t x, size_t c, float val); + + private: + extras::PackedPixelFile& ppf() const { return parent_->ppf(); } + + extras::PackedFrame& frame() { + return is_preview_ ? *ppf().preview_frame : ppf().frames[index_]; + } + + TestImage* parent_; + bool is_preview_; + size_t index_; + }; + + Frame AddFrame(); + + Frame AddPreview(size_t xsize, size_t ysize); + + private: + extras::PackedPixelFile ppf_; + JxlPixelFormat format_ = {3, JXL_TYPE_UINT8, JXL_LITTLE_ENDIAN, 0}; + + static void CropLayerInfo(size_t xsize, size_t ysize, JxlLayerInfo* info); + + static void CropImage(size_t xsize, size_t ysize, extras::PackedImage* image); + + static JxlDataType DefaultDataType(const JxlBasicInfo& info); +}; + +} // namespace test +} // namespace jxl + +#endif // LIB_JXL_TEST_IMAGE_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/test_utils.cc b/third-party/libjxl/libjxl/lib/jxl/test_utils.cc new file mode 100644 index 0000000000..eb2e3c4ce0 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/test_utils.cc @@ -0,0 +1,672 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/test_utils.h" + +#include +#include +#include +#include + +#include "lib/extras/metrics.h" +#include "lib/extras/packed_image_convert.h" +#include "lib/jxl/base/float.h" +#include "lib/jxl/base/printf_macros.h" +#include "lib/jxl/enc_butteraugli_comparator.h" +#include "lib/jxl/enc_cache.h" +#include "lib/jxl/enc_color_management.h" +#include "lib/jxl/enc_external_image.h" +#include "lib/jxl/enc_file.h" + +#if !defined(TEST_DATA_PATH) +#include "tools/cpp/runfiles/runfiles.h" +#endif + +namespace jxl { +namespace test { + +#if defined(TEST_DATA_PATH) +std::string GetTestDataPath(const std::string& filename) { + return std::string(TEST_DATA_PATH "/") + filename; +} +#else +using bazel::tools::cpp::runfiles::Runfiles; +const std::unique_ptr kRunfiles(Runfiles::Create("")); +std::string GetTestDataPath(const std::string& filename) { + std::string root(JPEGXL_ROOT_PACKAGE "/testdata/"); + return kRunfiles->Rlocation(root + filename); +} +#endif + +PaddedBytes ReadTestData(const std::string& filename) { + std::string full_path = GetTestDataPath(filename); + fprintf(stderr, "ReadTestData %s\n", full_path.c_str()); + std::ifstream file(full_path, std::ios::binary); + std::vector str((std::istreambuf_iterator(file)), + std::istreambuf_iterator()); + JXL_CHECK(file.good()); + const uint8_t* raw = reinterpret_cast(str.data()); + std::vector data(raw, raw + str.size()); + printf("Test data %s is %d bytes long.\n", filename.c_str(), + static_cast(data.size())); + PaddedBytes result; + result.append(data); + return result; +} + +void DefaultAcceptedFormats(extras::JXLDecompressParams& dparams) { + if (dparams.accepted_formats.empty()) { + for (const uint32_t num_channels : {1, 2, 3, 4}) { + dparams.accepted_formats.push_back( + {num_channels, JXL_TYPE_FLOAT, JXL_LITTLE_ENDIAN, /*align=*/0}); + } + } +} + +Status DecodeFile(extras::JXLDecompressParams dparams, + const Span file, CodecInOut* JXL_RESTRICT io, + ThreadPool* pool) { + DefaultAcceptedFormats(dparams); + SetThreadParallelRunner(dparams, pool); + extras::PackedPixelFile ppf; + JXL_RETURN_IF_ERROR(DecodeImageJXL(file.data(), file.size(), dparams, + /*decoded_bytes=*/nullptr, &ppf)); + JXL_RETURN_IF_ERROR(ConvertPackedPixelFileToCodecInOut(ppf, pool, io)); + return true; +} + +void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info, + const JxlPixelFormat* pixel_format) { + JxlEncoderInitBasicInfo(basic_info); + switch (pixel_format->data_type) { + case JXL_TYPE_FLOAT: + basic_info->bits_per_sample = 32; + basic_info->exponent_bits_per_sample = 8; + break; + case JXL_TYPE_FLOAT16: + basic_info->bits_per_sample = 16; + basic_info->exponent_bits_per_sample = 5; + break; + case JXL_TYPE_UINT8: + basic_info->bits_per_sample = 8; + basic_info->exponent_bits_per_sample = 0; + break; + case JXL_TYPE_UINT16: + basic_info->bits_per_sample = 16; + basic_info->exponent_bits_per_sample = 0; + break; + default: + JXL_ABORT("Unhandled JxlDataType"); + } + if (pixel_format->num_channels < 3) { + basic_info->num_color_channels = 1; + } else { + basic_info->num_color_channels = 3; + } + if (pixel_format->num_channels == 2 || pixel_format->num_channels == 4) { + basic_info->alpha_exponent_bits = basic_info->exponent_bits_per_sample; + basic_info->alpha_bits = basic_info->bits_per_sample; + basic_info->num_extra_channels = 1; + } else { + basic_info->alpha_exponent_bits = 0; + basic_info->alpha_bits = 0; + } +} + +ColorEncoding ColorEncodingFromDescriptor(const ColorEncodingDescriptor& desc) { + ColorEncoding c; + c.SetColorSpace(desc.color_space); + if (desc.color_space != ColorSpace::kXYB) { + c.white_point = desc.white_point; + c.primaries = desc.primaries; + c.tf.SetTransferFunction(desc.tf); + } + c.rendering_intent = desc.rendering_intent; + JXL_CHECK(c.CreateICC()); + return c; +} + +namespace { +void CheckSameEncodings(const std::vector& a, + const std::vector& b, + const std::string& check_name, + std::stringstream& failures) { + JXL_CHECK(a.size() == b.size()); + for (size_t i = 0; i < a.size(); ++i) { + if ((a[i].ICC() == b[i].ICC()) || + ((a[i].primaries == b[i].primaries) && a[i].tf.IsSame(b[i].tf))) { + continue; + } + failures << "CheckSameEncodings " << check_name << ": " << i + << "-th encoding mismatch\n"; + } +} +} // namespace + +bool Roundtrip(const CodecInOut* io, const CompressParams& cparams, + extras::JXLDecompressParams dparams, + CodecInOut* JXL_RESTRICT io2, std::stringstream& failures, + size_t* compressed_size, ThreadPool* pool, AuxOut* aux_out) { + DefaultAcceptedFormats(dparams); + if (compressed_size) { + *compressed_size = static_cast(-1); + } + PaddedBytes compressed; + + std::vector original_metadata_encodings; + std::vector original_current_encodings; + std::vector metadata_encodings_1; + std::vector metadata_encodings_2; + std::vector current_encodings_2; + original_metadata_encodings.reserve(io->frames.size()); + original_current_encodings.reserve(io->frames.size()); + metadata_encodings_1.reserve(io->frames.size()); + metadata_encodings_2.reserve(io->frames.size()); + current_encodings_2.reserve(io->frames.size()); + + for (const ImageBundle& ib : io->frames) { + // Remember original encoding, will be returned by decoder. + original_metadata_encodings.push_back(ib.metadata()->color_encoding); + // c_current should not change during encoding. + original_current_encodings.push_back(ib.c_current()); + } + + std::unique_ptr enc_state = + jxl::make_unique(); + JXL_CHECK(EncodeFile(cparams, io, enc_state.get(), &compressed, GetJxlCms(), + aux_out, pool)); + + for (const ImageBundle& ib1 : io->frames) { + metadata_encodings_1.push_back(ib1.metadata()->color_encoding); + } + + // Should still be in the same color space after encoding. + CheckSameEncodings(metadata_encodings_1, original_metadata_encodings, + "original vs after encoding", failures); + + JXL_CHECK(DecodeFile(dparams, Span(compressed), io2, pool)); + JXL_CHECK(io2->frames.size() == io->frames.size()); + + for (const ImageBundle& ib2 : io2->frames) { + metadata_encodings_2.push_back(ib2.metadata()->color_encoding); + current_encodings_2.push_back(ib2.c_current()); + } + + // We always produce the original color encoding if a color transform hook is + // set. + CheckSameEncodings(current_encodings_2, original_current_encodings, + "current: original vs decoded", failures); + + // Decoder returns the originals passed to the encoder. + CheckSameEncodings(metadata_encodings_2, original_metadata_encodings, + "metadata: original vs decoded", failures); + + if (compressed_size) { + *compressed_size = compressed.size(); + } + + return failures.str().empty(); +} + +size_t Roundtrip(const extras::PackedPixelFile& ppf_in, + extras::JXLCompressParams cparams, + extras::JXLDecompressParams dparams, ThreadPool* pool, + extras::PackedPixelFile* ppf_out) { + DefaultAcceptedFormats(dparams); + SetThreadParallelRunner(cparams, pool); + SetThreadParallelRunner(dparams, pool); + std::vector compressed; + JXL_CHECK(extras::EncodeImageJXL(cparams, ppf_in, /*jpeg_bytes=*/nullptr, + &compressed)); + size_t decoded_bytes = 0; + JXL_CHECK(extras::DecodeImageJXL(compressed.data(), compressed.size(), + dparams, &decoded_bytes, ppf_out)); + JXL_CHECK(decoded_bytes == compressed.size()); + return compressed.size(); +} + +std::vector AllEncodings() { + std::vector all_encodings; + all_encodings.reserve(300); + ColorEncoding c; + + for (ColorSpace cs : Values()) { + if (cs == ColorSpace::kUnknown || cs == ColorSpace::kXYB) continue; + c.SetColorSpace(cs); + + for (WhitePoint wp : Values()) { + if (wp == WhitePoint::kCustom) continue; + if (c.ImplicitWhitePoint() && c.white_point != wp) continue; + c.white_point = wp; + + for (Primaries primaries : Values()) { + if (primaries == Primaries::kCustom) continue; + if (!c.HasPrimaries()) continue; + c.primaries = primaries; + + for (TransferFunction tf : Values()) { + if (tf == TransferFunction::kUnknown) continue; + if (c.tf.SetImplicit() && + (c.tf.IsGamma() || c.tf.GetTransferFunction() != tf)) { + continue; + } + c.tf.SetTransferFunction(tf); + + for (RenderingIntent ri : Values()) { + ColorEncodingDescriptor cdesc; + cdesc.color_space = cs; + cdesc.white_point = wp; + cdesc.primaries = primaries; + cdesc.tf = tf; + cdesc.rendering_intent = ri; + all_encodings.push_back(cdesc); + } + } + } + } + } + + return all_encodings; +} + +jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector& buf, + size_t num_channels, size_t xsize, + size_t ysize) { + jxl::CodecInOut io; + io.SetSize(xsize, ysize); + io.metadata.m.SetAlphaBits(16); + io.metadata.m.color_encoding = jxl::ColorEncoding::SRGB( + /*is_gray=*/num_channels == 1 || num_channels == 2); + JxlPixelFormat format = {static_cast(num_channels), JXL_TYPE_UINT16, + JXL_BIG_ENDIAN, 0}; + JXL_CHECK(ConvertFromExternal( + jxl::Span(buf.data(), buf.size()), xsize, ysize, + jxl::ColorEncoding::SRGB(/*is_gray=*/num_channels < 3), + /*bits_per_sample=*/16, format, + /*pool=*/nullptr, + /*ib=*/&io.Main())); + return io; +} + +bool Near(double expected, double value, double max_dist) { + double dist = expected > value ? expected - value : value - expected; + return dist <= max_dist; +} + +float LoadLEFloat16(const uint8_t* p) { + uint16_t bits16 = LoadLE16(p); + return LoadFloat16(bits16); +} + +float LoadBEFloat16(const uint8_t* p) { + uint16_t bits16 = LoadBE16(p); + return LoadFloat16(bits16); +} + +size_t GetPrecision(JxlDataType data_type) { + switch (data_type) { + case JXL_TYPE_UINT8: + return 8; + case JXL_TYPE_UINT16: + return 16; + case JXL_TYPE_FLOAT: + // Floating point mantissa precision + return 24; + case JXL_TYPE_FLOAT16: + return 11; + default: + JXL_ABORT("Unhandled JxlDataType"); + } +} + +size_t GetDataBits(JxlDataType data_type) { + switch (data_type) { + case JXL_TYPE_UINT8: + return 8; + case JXL_TYPE_UINT16: + return 16; + case JXL_TYPE_FLOAT: + return 32; + case JXL_TYPE_FLOAT16: + return 16; + default: + JXL_ABORT("Unhandled JxlDataType"); + } +} + +std::vector ConvertToRGBA32(const uint8_t* pixels, size_t xsize, + size_t ysize, const JxlPixelFormat& format, + double factor) { + std::vector result(xsize * ysize * 4); + size_t num_channels = format.num_channels; + bool gray = num_channels == 1 || num_channels == 2; + bool alpha = num_channels == 2 || num_channels == 4; + JxlEndianness endianness = format.endianness; + // Compute actual type: + if (endianness == JXL_NATIVE_ENDIAN) { + endianness = IsLittleEndian() ? JXL_LITTLE_ENDIAN : JXL_BIG_ENDIAN; + } + + size_t stride = + xsize * jxl::DivCeil(GetDataBits(format.data_type) * num_channels, + jxl::kBitsPerByte); + if (format.align > 1) stride = jxl::RoundUpTo(stride, format.align); + + if (format.data_type == JXL_TYPE_UINT8) { + // Multiplier to bring to 0-1.0 range + double mul = factor > 0.0 ? factor : 1.0 / 255.0; + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + size_t j = (y * xsize + x) * 4; + size_t i = y * stride + x * num_channels; + double r = pixels[i]; + double g = gray ? r : pixels[i + 1]; + double b = gray ? r : pixels[i + 2]; + double a = alpha ? pixels[i + num_channels - 1] : 255; + result[j + 0] = r * mul; + result[j + 1] = g * mul; + result[j + 2] = b * mul; + result[j + 3] = a * mul; + } + } + } else if (format.data_type == JXL_TYPE_UINT16) { + JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN); + // Multiplier to bring to 0-1.0 range + double mul = factor > 0.0 ? factor : 1.0 / 65535.0; + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + size_t j = (y * xsize + x) * 4; + size_t i = y * stride + x * num_channels * 2; + double r, g, b, a; + if (endianness == JXL_BIG_ENDIAN) { + r = (pixels[i + 0] << 8) + pixels[i + 1]; + g = gray ? r : (pixels[i + 2] << 8) + pixels[i + 3]; + b = gray ? r : (pixels[i + 4] << 8) + pixels[i + 5]; + a = alpha ? (pixels[i + num_channels * 2 - 2] << 8) + + pixels[i + num_channels * 2 - 1] + : 65535; + } else { + r = (pixels[i + 1] << 8) + pixels[i + 0]; + g = gray ? r : (pixels[i + 3] << 8) + pixels[i + 2]; + b = gray ? r : (pixels[i + 5] << 8) + pixels[i + 4]; + a = alpha ? (pixels[i + num_channels * 2 - 1] << 8) + + pixels[i + num_channels * 2 - 2] + : 65535; + } + result[j + 0] = r * mul; + result[j + 1] = g * mul; + result[j + 2] = b * mul; + result[j + 3] = a * mul; + } + } + } else if (format.data_type == JXL_TYPE_FLOAT) { + JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN); + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + size_t j = (y * xsize + x) * 4; + size_t i = y * stride + x * num_channels * 4; + double r, g, b, a; + if (endianness == JXL_BIG_ENDIAN) { + r = LoadBEFloat(pixels + i); + g = gray ? r : LoadBEFloat(pixels + i + 4); + b = gray ? r : LoadBEFloat(pixels + i + 8); + a = alpha ? LoadBEFloat(pixels + i + num_channels * 4 - 4) : 1.0; + } else { + r = LoadLEFloat(pixels + i); + g = gray ? r : LoadLEFloat(pixels + i + 4); + b = gray ? r : LoadLEFloat(pixels + i + 8); + a = alpha ? LoadLEFloat(pixels + i + num_channels * 4 - 4) : 1.0; + } + result[j + 0] = r; + result[j + 1] = g; + result[j + 2] = b; + result[j + 3] = a; + } + } + } else if (format.data_type == JXL_TYPE_FLOAT16) { + JXL_ASSERT(endianness != JXL_NATIVE_ENDIAN); + for (size_t y = 0; y < ysize; ++y) { + for (size_t x = 0; x < xsize; ++x) { + size_t j = (y * xsize + x) * 4; + size_t i = y * stride + x * num_channels * 2; + double r, g, b, a; + if (endianness == JXL_BIG_ENDIAN) { + r = LoadBEFloat16(pixels + i); + g = gray ? r : LoadBEFloat16(pixels + i + 2); + b = gray ? r : LoadBEFloat16(pixels + i + 4); + a = alpha ? LoadBEFloat16(pixels + i + num_channels * 2 - 2) : 1.0; + } else { + r = LoadLEFloat16(pixels + i); + g = gray ? r : LoadLEFloat16(pixels + i + 2); + b = gray ? r : LoadLEFloat16(pixels + i + 4); + a = alpha ? LoadLEFloat16(pixels + i + num_channels * 2 - 2) : 1.0; + } + result[j + 0] = r; + result[j + 1] = g; + result[j + 2] = b; + result[j + 3] = a; + } + } + } else { + JXL_ASSERT(false); // Unsupported type + } + return result; +} + +size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize, + size_t ysize, const JxlPixelFormat& format_a, + const JxlPixelFormat& format_b, + double threshold_multiplier) { + // Convert both images to equal full precision for comparison. + std::vector a_full = ConvertToRGBA32(a, xsize, ysize, format_a); + std::vector b_full = ConvertToRGBA32(b, xsize, ysize, format_b); + bool gray_a = format_a.num_channels < 3; + bool gray_b = format_b.num_channels < 3; + bool alpha_a = !(format_a.num_channels & 1); + bool alpha_b = !(format_b.num_channels & 1); + size_t bits_a = GetPrecision(format_a.data_type); + size_t bits_b = GetPrecision(format_b.data_type); + size_t bits = std::min(bits_a, bits_b); + // How much distance is allowed in case of pixels with lower bit depths, given + // that the double precision float images use range 0-1.0. + // E.g. in case of 1-bit this is 0.5 since 0.499 must map to 0 and 0.501 must + // map to 1. + double precision = 0.5 * threshold_multiplier / ((1ull << bits) - 1ull); + if (format_a.data_type == JXL_TYPE_FLOAT16 || + format_b.data_type == JXL_TYPE_FLOAT16) { + // Lower the precision for float16, because it currently looks like the + // scalar and wasm implementations of hwy have 1 less bit of precision + // than the x86 implementations. + // TODO(lode): Set the required precision back to 11 bits when possible. + precision = 0.5 * threshold_multiplier / ((1ull << (bits - 1)) - 1ull); + } + size_t numdiff = 0; + for (size_t y = 0; y < ysize; y++) { + for (size_t x = 0; x < xsize; x++) { + size_t i = (y * xsize + x) * 4; + bool ok = true; + if (gray_a || gray_b) { + if (!Near(a_full[i + 0], b_full[i + 0], precision)) ok = false; + // If the input was grayscale and the output not, then the output must + // have all channels equal. + if (gray_a && b_full[i + 0] != b_full[i + 1] && + b_full[i + 2] != b_full[i + 2]) { + ok = false; + } + } else { + if (!Near(a_full[i + 0], b_full[i + 0], precision) || + !Near(a_full[i + 1], b_full[i + 1], precision) || + !Near(a_full[i + 2], b_full[i + 2], precision)) { + ok = false; + } + } + if (alpha_a && alpha_b) { + if (!Near(a_full[i + 3], b_full[i + 3], precision)) ok = false; + } else { + // If the input had no alpha channel, the output should be opaque + // after roundtrip. + if (alpha_b && !Near(1.0, b_full[i + 3], precision)) ok = false; + } + if (!ok) numdiff++; + } + } + return numdiff; +} + +double DistanceRMS(const uint8_t* a, const uint8_t* b, size_t xsize, + size_t ysize, const JxlPixelFormat& format) { + // Convert both images to equal full precision for comparison. + std::vector a_full = ConvertToRGBA32(a, xsize, ysize, format); + std::vector b_full = ConvertToRGBA32(b, xsize, ysize, format); + double sum = 0.0; + for (size_t y = 0; y < ysize; y++) { + double row_sum = 0.0; + for (size_t x = 0; x < xsize; x++) { + size_t i = (y * xsize + x) * 4; + for (size_t c = 0; c < format.num_channels; ++c) { + double diff = a_full[i + c] - b_full[i + c]; + row_sum += diff * diff; + } + } + sum += row_sum; + } + sum /= (xsize * ysize); + return sqrt(sum); +} + +float ButteraugliDistance(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b, ThreadPool* pool) { + CodecInOut io0; + JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, pool, &io0)); + CodecInOut io1; + JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, pool, &io1)); + // TODO(eustas): simplify? + return ButteraugliDistance(io0.frames, io1.frames, ButteraugliParams(), + GetJxlCms(), + /*distmap=*/nullptr, pool); +} + +float Butteraugli3Norm(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b, ThreadPool* pool) { + CodecInOut io0; + JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, pool, &io0)); + CodecInOut io1; + JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, pool, &io1)); + ButteraugliParams ba; + ImageF distmap; + ButteraugliDistance(io0.frames, io1.frames, ba, GetJxlCms(), &distmap, pool); + return ComputeDistanceP(distmap, ba, 3); +} + +float ComputeDistance2(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b) { + CodecInOut io0; + JXL_CHECK(ConvertPackedPixelFileToCodecInOut(a, nullptr, &io0)); + CodecInOut io1; + JXL_CHECK(ConvertPackedPixelFileToCodecInOut(b, nullptr, &io1)); + return ComputeDistance2(io0.Main(), io1.Main(), GetJxlCms()); +} + +bool SameAlpha(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b) { + JXL_CHECK(a.info.xsize == b.info.xsize); + JXL_CHECK(a.info.ysize == b.info.ysize); + JXL_CHECK(a.info.alpha_bits == b.info.alpha_bits); + JXL_CHECK(a.info.alpha_exponent_bits == b.info.alpha_exponent_bits); + JXL_CHECK(a.info.alpha_bits > 0); + JXL_CHECK(a.frames.size() == b.frames.size()); + for (size_t i = 0; i < a.frames.size(); ++i) { + const extras::PackedImage& color_a = a.frames[i].color; + const extras::PackedImage& color_b = b.frames[i].color; + JXL_CHECK(color_a.format.num_channels == color_b.format.num_channels); + JXL_CHECK(color_a.format.data_type == color_b.format.data_type); + JXL_CHECK(color_a.format.endianness == color_b.format.endianness); + JXL_CHECK(color_a.pixels_size == color_b.pixels_size); + size_t pwidth = + extras::PackedImage::BitsPerChannel(color_a.format.data_type) / 8; + size_t num_color = color_a.format.num_channels < 3 ? 1 : 3; + const uint8_t* p_a = reinterpret_cast(color_a.pixels()); + const uint8_t* p_b = reinterpret_cast(color_b.pixels()); + for (size_t y = 0; y < a.info.ysize; ++y) { + for (size_t x = 0; x < a.info.xsize; ++x) { + size_t idx = + ((y * a.info.xsize + x) * color_a.format.num_channels + num_color) * + pwidth; + if (memcmp(&p_a[idx], &p_b[idx], pwidth) != 0) { + return false; + } + } + } + } + return true; +} + +bool SamePixels(const extras::PackedImage& a, const extras::PackedImage& b) { + JXL_CHECK(a.xsize == b.xsize); + JXL_CHECK(a.ysize == b.ysize); + JXL_CHECK(a.format.num_channels == b.format.num_channels); + JXL_CHECK(a.format.data_type == b.format.data_type); + JXL_CHECK(a.format.endianness == b.format.endianness); + JXL_CHECK(a.pixels_size == b.pixels_size); + const uint8_t* p_a = reinterpret_cast(a.pixels()); + const uint8_t* p_b = reinterpret_cast(b.pixels()); + for (size_t y = 0; y < a.ysize; ++y) { + for (size_t x = 0; x < a.xsize; ++x) { + size_t idx = (y * a.xsize + x) * a.pixel_stride(); + if (memcmp(&p_a[idx], &p_b[idx], a.pixel_stride()) != 0) { + printf("Mismatch at row %" PRIuS " col %" PRIuS "\n", y, x); + printf(" a: "); + for (size_t j = 0; j < a.pixel_stride(); ++j) { + printf(" %3u", p_a[idx + j]); + } + printf("\n b: "); + for (size_t j = 0; j < a.pixel_stride(); ++j) { + printf(" %3u", p_b[idx + j]); + } + printf("\n"); + return false; + } + } + } + return true; +} + +bool SamePixels(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b) { + JXL_CHECK(a.info.xsize == b.info.xsize); + JXL_CHECK(a.info.ysize == b.info.ysize); + JXL_CHECK(a.info.bits_per_sample == b.info.bits_per_sample); + JXL_CHECK(a.info.exponent_bits_per_sample == b.info.exponent_bits_per_sample); + JXL_CHECK(a.frames.size() == b.frames.size()); + for (size_t i = 0; i < a.frames.size(); ++i) { + const auto& frame_a = a.frames[i]; + const auto& frame_b = b.frames[i]; + if (!SamePixels(frame_a.color, frame_b.color)) { + return false; + } + JXL_CHECK(frame_a.extra_channels.size() == frame_b.extra_channels.size()); + for (size_t j = 0; j < frame_a.extra_channels.size(); ++j) { + if (!SamePixels(frame_a.extra_channels[i], frame_b.extra_channels[i])) { + return false; + } + } + } + return true; +} + +} // namespace test + +bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) { + if (a.size() != b.size()) return false; + if (memcmp(a.data(), b.data(), a.size()) != 0) return false; + return true; +} + +// Allow using EXPECT_EQ on jxl::PaddedBytes +bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b) { + return !(a == b); +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/test_utils.h b/third-party/libjxl/libjxl/lib/jxl/test_utils.h new file mode 100644 index 0000000000..c6fab66ddc --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/test_utils.h @@ -0,0 +1,183 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_TEST_UTILS_H_ +#define LIB_JXL_TEST_UTILS_H_ + +// TODO(eustas): reduce includes (move to .cc) + +// Macros and functions useful for tests. + +#include +#include +#include +#include + +#include +#include + +#include "lib/extras/dec/decode.h" +#include "lib/extras/dec/jxl.h" +#include "lib/extras/enc/jxl.h" +#include "lib/extras/packed_image.h" +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/base/padded_bytes.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/codec_in_out.h" +#include "lib/jxl/color_encoding_internal.h" +#include "lib/jxl/enc_params.h" + +#define TEST_LIBJPEG_SUPPORT() \ + do { \ + if (!jxl::extras::CanDecode(jxl::extras::Codec::kJPG)) { \ + fprintf(stderr, "Skipping test because of missing libjpeg codec.\n"); \ + return; \ + } \ + } while (0) + +namespace jxl { + +struct AuxOut; + +namespace test { + +std::string GetTestDataPath(const std::string& filename); +PaddedBytes ReadTestData(const std::string& filename); + +void JxlBasicInfoSetFromPixelFormat(JxlBasicInfo* basic_info, + const JxlPixelFormat* pixel_format); + +void DefaultAcceptedFormats(extras::JXLDecompressParams& dparams); + +template +void SetThreadParallelRunner(Params params, ThreadPool* pool) { + if (pool && !params.runner_opaque) { + params.runner = pool->runner(); + params.runner_opaque = pool->runner_opaque(); + } +} + +Status DecodeFile(extras::JXLDecompressParams dparams, + const Span file, CodecInOut* JXL_RESTRICT io, + ThreadPool* pool = nullptr); + +bool Roundtrip(const CodecInOut* io, const CompressParams& cparams, + extras::JXLDecompressParams dparams, + CodecInOut* JXL_RESTRICT io2, std::stringstream& failures, + size_t* compressed_size = nullptr, ThreadPool* pool = nullptr, + AuxOut* aux_out = nullptr); + +// Returns compressed size [bytes]. +size_t Roundtrip(const extras::PackedPixelFile& ppf_in, + extras::JXLCompressParams cparams, + extras::JXLDecompressParams dparams, ThreadPool* pool, + extras::PackedPixelFile* ppf_out); + +// A POD descriptor of a ColorEncoding. Only used in tests as the return value +// of AllEncodings(). +struct ColorEncodingDescriptor { + ColorSpace color_space; + WhitePoint white_point; + Primaries primaries; + TransferFunction tf; + RenderingIntent rendering_intent; +}; + +ColorEncoding ColorEncodingFromDescriptor(const ColorEncodingDescriptor& desc); + +// Define the operator<< for tests. +static inline ::std::ostream& operator<<(::std::ostream& os, + const ColorEncodingDescriptor& c) { + return os << "ColorEncoding/" << Description(ColorEncodingFromDescriptor(c)); +} + +// Returns ColorEncodingDescriptors, which are only used in tests. To obtain a +// ColorEncoding object call ColorEncodingFromDescriptor and then call +// ColorEncoding::CreateProfile() on that object to generate a profile. +std::vector AllEncodings(); + +// Returns a CodecInOut based on the buf, xsize, ysize, and the assumption +// that the buffer was created using `GetSomeTestImage`. +jxl::CodecInOut SomeTestImageToCodecInOut(const std::vector& buf, + size_t num_channels, size_t xsize, + size_t ysize); + +bool Near(double expected, double value, double max_dist); + +float LoadLEFloat16(const uint8_t* p); + +float LoadBEFloat16(const uint8_t* p); + +size_t GetPrecision(JxlDataType data_type); + +size_t GetDataBits(JxlDataType data_type); + +// Procedure to convert pixels to double precision, not efficient, but +// well-controlled for testing. It uses double, to be able to represent all +// precisions needed for the maximum data types the API supports: uint32_t +// integers, and, single precision float. The values are in range 0-1 for SDR. +std::vector ConvertToRGBA32(const uint8_t* pixels, size_t xsize, + size_t ysize, const JxlPixelFormat& format, + double factor = 0.0); + +// Returns amount of pixels which differ between the two pictures. Image b is +// the image after roundtrip after roundtrip, image a before roundtrip. There +// are more strict requirements for the alpha channel and grayscale values of +// the output image. +size_t ComparePixels(const uint8_t* a, const uint8_t* b, size_t xsize, + size_t ysize, const JxlPixelFormat& format_a, + const JxlPixelFormat& format_b, + double threshold_multiplier = 1.0); + +double DistanceRMS(const uint8_t* a, const uint8_t* b, size_t xsize, + size_t ysize, const JxlPixelFormat& format); + +float ButteraugliDistance(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b, + ThreadPool* pool = nullptr); + +float Butteraugli3Norm(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b, + ThreadPool* pool = nullptr); + +float ComputeDistance2(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b); + +bool SameAlpha(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b); + +bool SamePixels(const extras::PackedImage& a, const extras::PackedImage& b); + +bool SamePixels(const extras::PackedPixelFile& a, + const extras::PackedPixelFile& b); + +class ThreadPoolForTests { + public: + explicit ThreadPoolForTests(int num_threads) { + runner_ = + JxlThreadParallelRunnerMake(/* memory_manager */ nullptr, num_threads); + pool_ = + jxl::make_unique(JxlThreadParallelRunner, runner_.get()); + } + ThreadPoolForTests(const ThreadPoolForTests&) = delete; + ThreadPoolForTests& operator&(const ThreadPoolForTests&) = delete; + ThreadPool* operator&() { return pool_.get(); } + + private: + JxlThreadParallelRunnerPtr runner_; + std::unique_ptr pool_; +}; + +} // namespace test + +bool operator==(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b); + +// Allow using EXPECT_EQ on jxl::PaddedBytes +bool operator!=(const jxl::PaddedBytes& a, const jxl::PaddedBytes& b); + +} // namespace jxl + +#endif // LIB_JXL_TEST_UTILS_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/testing.h b/third-party/libjxl/libjxl/lib/jxl/testing.h new file mode 100644 index 0000000000..d10b0c3c54 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/testing.h @@ -0,0 +1,73 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_TESTING_H_ +#define LIB_JXL_TESTING_H_ + +// GTest/GMock specific macros / wrappers. + +// gmock unconditionally redefines those macros (to wrong values). +// Lets include it only here and mitigate the problem. +#pragma push_macro("PRIdS") +#pragma push_macro("PRIuS") +#include "gmock/gmock.h" +#pragma pop_macro("PRIuS") +#pragma pop_macro("PRIdS") + +#include + +#include "gtest/gtest.h" + +#ifdef JXL_DISABLE_SLOW_TESTS +#define JXL_SLOW_TEST(X) DISABLED_##X +#else +#define JXL_SLOW_TEST(X) X +#endif // JXL_DISABLE_SLOW_TESTS + +#if JPEGXL_ENABLE_TRANSCODE_JPEG +#define JXL_TRANSCODE_JPEG_TEST(X) X +#else +#define JXL_TRANSCODE_JPEG_TEST(X) DISABLED_##X +#endif // JPEGXL_ENABLE_TRANSCODE_JPEG + +#if JPEGXL_ENABLE_BOXES +#define JXL_BOXES_TEST(X) X +#else +#define JXL_BOXES_TEST(X) DISABLED_##X +#endif // JPEGXL_ENABLE_BOXES + +#ifdef THREAD_SANITIZER +#define JXL_TSAN_SLOW_TEST(X) DISABLED_##X +#else +#define JXL_TSAN_SLOW_TEST(X) X +#endif // THREAD_SANITIZER + +// googletest before 1.10 didn't define INSTANTIATE_TEST_SUITE_P() but instead +// used INSTANTIATE_TEST_CASE_P which is now deprecated. +#ifdef INSTANTIATE_TEST_SUITE_P +#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_SUITE_P +#else +#define JXL_GTEST_INSTANTIATE_TEST_SUITE_P INSTANTIATE_TEST_CASE_P +#endif + +// Ensures that we don't make our test bounds too lax, effectively disabling the +// tests. +MATCHER_P(IsSlightlyBelow, max, "") { + return max * 0.75 <= arg && arg <= max * 1.0; +} + +#define JXL_EXPECT_OK(F) \ + { \ + std::stringstream _; \ + EXPECT_TRUE(F) << _.str(); \ + } + +#define JXL_ASSERT_OK(F) \ + { \ + std::stringstream _; \ + ASSERT_TRUE(F) << _.str(); \ + } + +#endif // LIB_JXL_TESTING_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/tf_gbench.cc b/third-party/libjxl/libjxl/lib/jxl/tf_gbench.cc new file mode 100644 index 0000000000..9c010d460a --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/tf_gbench.cc @@ -0,0 +1,143 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "benchmark/benchmark.h" +#include "lib/jxl/image_ops.h" + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/tf_gbench.cc" +#include +#include + +#include "lib/jxl/transfer_functions-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +#define RUN_BENCHMARK(F) \ + constexpr size_t kNum = 1 << 12; \ + HWY_FULL(float) d; \ + /* Three parallel runs, as this will run on R, G and B. */ \ + auto sum1 = Zero(d); \ + auto sum2 = Zero(d); \ + auto sum3 = Zero(d); \ + for (auto _ : state) { \ + auto x = Set(d, 1e-5); \ + auto v1 = Set(d, 1e-5); \ + auto v2 = Set(d, 1.1e-5); \ + auto v3 = Set(d, 1.2e-5); \ + for (size_t i = 0; i < kNum; i++) { \ + sum1 += F(d, v1); \ + sum2 += F(d, v2); \ + sum3 += F(d, v3); \ + v1 += x; \ + v2 += x; \ + v3 += x; \ + } \ + } \ + /* floats per second */ \ + state.SetItemsProcessed(kNum* state.iterations() * Lanes(d) * 3); \ + benchmark::DoNotOptimize(sum1 + sum2 + sum3); + +#define RUN_BENCHMARK_SCALAR(F) \ + constexpr size_t kNum = 1 << 12; \ + /* Three parallel runs, as this will run on R, G and B. */ \ + float sum1 = 0, sum2 = 0, sum3 = 0; \ + for (auto _ : state) { \ + float x = 1e-5; \ + float v1 = 1e-5; \ + float v2 = 1.1e-5; \ + float v3 = 1.2e-5; \ + for (size_t i = 0; i < kNum; i++) { \ + sum1 += F(v1); \ + sum2 += F(v2); \ + sum3 += F(v3); \ + v1 += x; \ + v2 += x; \ + v3 += x; \ + } \ + } \ + /* floats per second */ \ + state.SetItemsProcessed(kNum* state.iterations() * 3); \ + benchmark::DoNotOptimize(sum1 + sum2 + sum3); + +HWY_NOINLINE void BM_FastSRGB(benchmark::State& state) { + RUN_BENCHMARK(FastLinearToSRGB); +} + +HWY_NOINLINE void BM_TFSRGB(benchmark::State& state) { + RUN_BENCHMARK(TF_SRGB().EncodedFromDisplay); +} + +HWY_NOINLINE void BM_PQDFE(benchmark::State& state) { + RUN_BENCHMARK(TF_PQ().DisplayFromEncoded); +} + +HWY_NOINLINE void BM_PQEFD(benchmark::State& state) { + RUN_BENCHMARK(TF_PQ().EncodedFromDisplay); +} + +HWY_NOINLINE void BM_PQSlowDFE(benchmark::State& state) { + RUN_BENCHMARK_SCALAR(TF_PQ().DisplayFromEncoded); +} + +HWY_NOINLINE void BM_PQSlowEFD(benchmark::State& state) { + RUN_BENCHMARK_SCALAR(TF_PQ().EncodedFromDisplay); +} +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { +namespace { + +HWY_EXPORT(BM_FastSRGB); +HWY_EXPORT(BM_TFSRGB); +HWY_EXPORT(BM_PQDFE); +HWY_EXPORT(BM_PQEFD); +HWY_EXPORT(BM_PQSlowDFE); +HWY_EXPORT(BM_PQSlowEFD); + +float SRGB_pow(float x) { + return x < 0.0031308f ? 12.92f * x : 1.055f * powf(x, 1.0f / 2.4f) - 0.055f; +} + +void BM_FastSRGB(benchmark::State& state) { + HWY_DYNAMIC_DISPATCH(BM_FastSRGB)(state); +} +void BM_TFSRGB(benchmark::State& state) { + HWY_DYNAMIC_DISPATCH(BM_TFSRGB)(state); +} +void BM_PQDFE(benchmark::State& state) { + HWY_DYNAMIC_DISPATCH(BM_PQDFE)(state); +} +void BM_PQEFD(benchmark::State& state) { + HWY_DYNAMIC_DISPATCH(BM_PQEFD)(state); +} +void BM_PQSlowDFE(benchmark::State& state) { + HWY_DYNAMIC_DISPATCH(BM_PQSlowDFE)(state); +} +void BM_PQSlowEFD(benchmark::State& state) { + HWY_DYNAMIC_DISPATCH(BM_PQSlowEFD)(state); +} + +void BM_SRGB_pow(benchmark::State& state) { RUN_BENCHMARK_SCALAR(SRGB_pow); } + +BENCHMARK(BM_FastSRGB); +BENCHMARK(BM_TFSRGB); +BENCHMARK(BM_SRGB_pow); +BENCHMARK(BM_PQDFE); +BENCHMARK(BM_PQEFD); +BENCHMARK(BM_PQSlowDFE); +BENCHMARK(BM_PQSlowEFD); + +} // namespace +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl/toc.cc b/third-party/libjxl/libjxl/lib/jxl/toc.cc new file mode 100644 index 0000000000..fd7740c144 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/toc.cc @@ -0,0 +1,105 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/toc.h" + +#include + +#include "lib/jxl/coeff_order.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/common.h" +#include "lib/jxl/fields.h" + +namespace jxl { +size_t MaxBits(const size_t num_sizes) { + const size_t entry_bits = U32Coder::MaxEncodedBits(kTocDist) * num_sizes; + // permutation bit (not its tokens!), padding, entries, padding. + return 1 + kBitsPerByte + entry_bits + kBitsPerByte; +} + +Status ReadToc(size_t toc_entries, BitReader* JXL_RESTRICT reader, + std::vector* JXL_RESTRICT sizes, + std::vector* JXL_RESTRICT permutation) { + if (toc_entries > 65536) { + // Prevent out of memory if invalid JXL codestream causes a bogus amount + // of toc_entries such as 2720436919446 to be computed. + // TODO(lode): verify whether 65536 is a reasonable upper bound + return JXL_FAILURE("too many toc entries"); + } + + sizes->clear(); + sizes->resize(toc_entries); + if (reader->TotalBitsConsumed() >= reader->TotalBytes() * kBitsPerByte) { + return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC"); + } + const auto check_bit_budget = [&](size_t num_entries) -> Status { + // U32Coder reads 2 bits to recognize variant and kTocDist cheapest variant + // is Bits(10), this way at least 12 bits are required per toc-entry. + size_t minimal_bit_cost = num_entries * (2 + 10); + size_t bit_budget = reader->TotalBytes() * 8; + size_t expenses = reader->TotalBitsConsumed(); + if ((expenses <= bit_budget) && + (minimal_bit_cost <= bit_budget - expenses)) { + return true; + } + return JXL_STATUS(StatusCode::kNotEnoughBytes, "Not enough bytes for TOC"); + }; + + JXL_DASSERT(toc_entries > 0); + if (reader->ReadFixedBits<1>() == 1) { + JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries)); + permutation->resize(toc_entries); + JXL_RETURN_IF_ERROR(DecodePermutation(/*skip=*/0, toc_entries, + permutation->data(), reader)); + } + JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary()); + JXL_RETURN_IF_ERROR(check_bit_budget(toc_entries)); + for (size_t i = 0; i < toc_entries; ++i) { + (*sizes)[i] = U32Coder::Read(kTocDist, reader); + } + JXL_RETURN_IF_ERROR(reader->JumpToByteBoundary()); + JXL_RETURN_IF_ERROR(check_bit_budget(0)); + return true; +} + +Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader, + std::vector* JXL_RESTRICT offsets, + std::vector* JXL_RESTRICT sizes, + uint64_t* total_size) { + std::vector permutation; + JXL_RETURN_IF_ERROR(ReadToc(toc_entries, reader, sizes, &permutation)); + + offsets->clear(); + offsets->resize(toc_entries); + + // Prefix sum starting with 0 and ending with the offset of the last group + uint64_t offset = 0; + for (size_t i = 0; i < toc_entries; ++i) { + if (offset + (*sizes)[i] < offset) { + return JXL_FAILURE("group offset overflow"); + } + (*offsets)[i] = offset; + offset += (*sizes)[i]; + } + if (total_size) { + *total_size = offset; + } + + if (!permutation.empty()) { + std::vector permuted_offsets; + std::vector permuted_sizes; + permuted_offsets.reserve(toc_entries); + permuted_sizes.reserve(toc_entries); + for (coeff_order_t index : permutation) { + permuted_offsets.push_back((*offsets)[index]); + permuted_sizes.push_back((*sizes)[index]); + } + std::swap(*offsets, permuted_offsets); + std::swap(*sizes, permuted_sizes); + } + + return true; +} +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/toc.h b/third-party/libjxl/libjxl/lib/jxl/toc.h new file mode 100644 index 0000000000..a97197ad45 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/toc.h @@ -0,0 +1,55 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef LIB_JXL_TOC_H_ +#define LIB_JXL_TOC_H_ + +#include +#include + +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/coeff_order_fwd.h" +#include "lib/jxl/dec_bit_reader.h" +#include "lib/jxl/field_encodings.h" + +namespace jxl { + +// (2+bits) = 2,3,4 bytes so encoders can patch TOC after encoding. +// 30 is sufficient for 4K channels of uncompressed 16-bit samples. +constexpr U32Enc kTocDist(Bits(10), BitsOffset(14, 1024), BitsOffset(22, 17408), + BitsOffset(30, 4211712)); + +size_t MaxBits(const size_t num_sizes); + +// TODO(veluca): move these to FrameDimensions. +static JXL_INLINE size_t AcGroupIndex(size_t pass, size_t group, + size_t num_groups, size_t num_dc_groups, + bool has_ac_global) { + return 1 + num_dc_groups + static_cast(has_ac_global) + + pass * num_groups + group; +} + +static JXL_INLINE size_t NumTocEntries(size_t num_groups, size_t num_dc_groups, + size_t num_passes, bool has_ac_global) { + if (num_groups == 1 && num_passes == 1) return 1; + return AcGroupIndex(0, 0, num_groups, num_dc_groups, has_ac_global) + + num_groups * num_passes; +} + +Status ReadToc(size_t toc_entries, BitReader* JXL_RESTRICT reader, + std::vector* JXL_RESTRICT sizes, + std::vector* JXL_RESTRICT permutation); + +Status ReadGroupOffsets(size_t toc_entries, BitReader* JXL_RESTRICT reader, + std::vector* JXL_RESTRICT offsets, + std::vector* JXL_RESTRICT sizes, + uint64_t* total_size); + +} // namespace jxl + +#endif // LIB_JXL_TOC_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/toc_test.cc b/third-party/libjxl/libjxl/lib/jxl/toc_test.cc new file mode 100644 index 0000000000..a7f0f2c27b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/toc_test.cc @@ -0,0 +1,92 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/jxl/toc.h" + +#include "lib/jxl/base/random.h" +#include "lib/jxl/base/span.h" +#include "lib/jxl/common.h" +#include "lib/jxl/enc_aux_out.h" +#include "lib/jxl/enc_toc.h" +#include "lib/jxl/testing.h" + +namespace jxl { +namespace { + +void Roundtrip(size_t num_entries, bool permute, Rng* rng) { + // Generate a random permutation. + std::vector permutation(num_entries); + std::vector inv_permutation(num_entries); + for (size_t i = 0; i < num_entries; i++) { + permutation[i] = i; + inv_permutation[i] = i; + } + if (permute) { + rng->Shuffle(permutation.data(), permutation.size()); + for (size_t i = 0; i < num_entries; i++) { + inv_permutation[permutation[i]] = i; + } + } + + // Generate num_entries groups of random (byte-aligned) length + std::vector group_codes(num_entries); + for (BitWriter& writer : group_codes) { + const size_t max_bits = (*rng)() & 0xFFF; + BitWriter::Allotment allotment(&writer, max_bits + kBitsPerByte); + size_t i = 0; + for (; i + BitWriter::kMaxBitsPerCall < max_bits; + i += BitWriter::kMaxBitsPerCall) { + writer.Write(BitWriter::kMaxBitsPerCall, 0); + } + for (; i < max_bits; i += 1) { + writer.Write(/*n_bits=*/1, 0); + } + writer.ZeroPadToByte(); + AuxOut aux_out; + allotment.ReclaimAndCharge(&writer, 0, &aux_out); + } + + BitWriter writer; + AuxOut aux_out; + ASSERT_TRUE(WriteGroupOffsets(group_codes, permute ? &permutation : nullptr, + &writer, &aux_out)); + + BitReader reader(writer.GetSpan()); + std::vector group_offsets; + std::vector group_sizes; + uint64_t total_size; + ASSERT_TRUE(ReadGroupOffsets(num_entries, &reader, &group_offsets, + &group_sizes, &total_size)); + ASSERT_EQ(num_entries, group_offsets.size()); + ASSERT_EQ(num_entries, group_sizes.size()); + EXPECT_TRUE(reader.Close()); + + uint64_t prefix_sum = 0; + for (size_t i = 0; i < num_entries; ++i) { + EXPECT_EQ(prefix_sum, group_offsets[inv_permutation[i]]); + + EXPECT_EQ(0u, group_codes[i].BitsWritten() % kBitsPerByte); + prefix_sum += group_codes[i].BitsWritten() / kBitsPerByte; + + if (i + 1 < num_entries) { + EXPECT_EQ( + group_offsets[inv_permutation[i]] + group_sizes[inv_permutation[i]], + group_offsets[inv_permutation[i + 1]]); + } + } + EXPECT_EQ(prefix_sum, total_size); +} + +TEST(TocTest, Test) { + Rng rng(0); + for (size_t num_entries = 1; num_entries < 10; ++num_entries) { + for (bool permute : std::vector{false, true}) { + Roundtrip(num_entries, permute, &rng); + } + } +} + +} // namespace +} // namespace jxl diff --git a/third-party/libjxl/libjxl/lib/jxl/transfer_functions-inl.h b/third-party/libjxl/libjxl/lib/jxl/transfer_functions-inl.h new file mode 100644 index 0000000000..9f4c10c76d --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/transfer_functions-inl.h @@ -0,0 +1,413 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Transfer functions for color encodings. + +#if defined(LIB_JXL_TRANSFER_FUNCTIONS_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_ +#undef LIB_JXL_TRANSFER_FUNCTIONS_INL_H_ +#else +#define LIB_JXL_TRANSFER_FUNCTIONS_INL_H_ +#endif + +#include +#include +#include + +#include "lib/jxl/base/compiler_specific.h" +#include "lib/jxl/base/status.h" +#include "lib/jxl/fast_math-inl.h" +#include "lib/jxl/rational_polynomial-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::And; +using hwy::HWY_NAMESPACE::AndNot; +using hwy::HWY_NAMESPACE::Gt; +using hwy::HWY_NAMESPACE::IfThenElse; +using hwy::HWY_NAMESPACE::Lt; +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::Sqrt; +using hwy::HWY_NAMESPACE::TableLookupBytes; + +// Definitions for BT.2100-2 transfer functions (used inside/outside SIMD): +// "display" is linear light (nits) normalized to [0, 1]. +// "encoded" is a nonlinear encoding (e.g. PQ) in [0, 1]. +// "scene" is a linear function of photon counts, normalized to [0, 1]. + +// Despite the stated ranges, we need unbounded transfer functions: see +// http://www.littlecms.com/CIC18_UnboundedCMM.pdf. Inputs can be negative or +// above 1 due to chromatic adaptation. To avoid severe round-trip errors caused +// by clamping, we mirror negative inputs via copysign (f(-x) = -f(x), see +// https://developer.apple.com/documentation/coregraphics/cgcolorspace/1644735-extendedsrgb) +// and extend the function domains above 1. + +// Hybrid Log-Gamma. +class TF_HLG { + public: + // EOTF. e = encoded. + JXL_INLINE double DisplayFromEncoded(const double e) const { + return OOTF(InvOETF(e)); + } + + // Inverse EOTF. d = display. + JXL_INLINE double EncodedFromDisplay(const double d) const { + return OETF(InvOOTF(d)); + } + + // Maximum error 5e-7. + template + JXL_INLINE V EncodedFromDisplay(D d, V x) const { + const hwy::HWY_NAMESPACE::Rebind du; + const V kSign = BitCast(d, Set(du, 0x80000000u)); + const V original_sign = And(x, kSign); + x = AndNot(kSign, x); // abs + const V below_div12 = Sqrt(Mul(Set(d, 3.0f), x)); + const V e = + MulAdd(Set(d, kA * 0.693147181f), + FastLog2f(d, MulAdd(Set(d, 12), x, Set(d, -kB))), Set(d, kC)); + const V magnitude = IfThenElse(Le(x, Set(d, kDiv12)), below_div12, e); + return Or(AndNot(kSign, magnitude), original_sign); + } + + private: + // OETF (defines the HLG approach). s = scene, returns encoded. + JXL_INLINE double OETF(double s) const { + if (s == 0.0) return 0.0; + const double original_sign = s; + s = std::abs(s); + + if (s <= kDiv12) return copysignf(std::sqrt(3.0 * s), original_sign); + + const double e = kA * std::log(12 * s - kB) + kC; + JXL_ASSERT(e > 0.0); + return copysignf(e, original_sign); + } + + // e = encoded, returns scene. + JXL_INLINE double InvOETF(double e) const { + if (e == 0.0) return 0.0; + const double original_sign = e; + e = std::abs(e); + + if (e <= 0.5) return copysignf(e * e * (1.0 / 3), original_sign); + + const double s = (std::exp((e - kC) * kRA) + kB) * kDiv12; + JXL_ASSERT(s >= 0); + return copysignf(s, original_sign); + } + + // s = scene, returns display. + JXL_INLINE double OOTF(const double s) const { + // The actual (red channel) OOTF is RD = alpha * YS^(gamma-1) * RS, where + // YS = 0.2627 * RS + 0.6780 * GS + 0.0593 * BS. Let alpha = 1 so we return + // "display" (normalized [0, 1]) instead of nits. Our transfer function + // interface does not allow a dependency on YS. Fortunately, the system + // gamma at 334 nits is 1.0, so this reduces to RD = RS. + return s; + } + + // d = display, returns scene. + JXL_INLINE double InvOOTF(const double d) const { + return d; // see OOTF(). + } + + static constexpr double kA = 0.17883277; + static constexpr double kRA = 1.0 / kA; + static constexpr double kB = 1 - 4 * kA; + static constexpr double kC = 0.5599107295; + static constexpr double kDiv12 = 1.0 / 12; +}; + +class TF_709 { + public: + JXL_INLINE double EncodedFromDisplay(const double d) const { + if (d < kThresh) return kMulLow * d; + return kMulHi * std::pow(d, kPowHi) + kSub; + } + + // Maximum error 1e-6. + template + JXL_INLINE V EncodedFromDisplay(D d, V x) const { + auto low = Mul(Set(d, kMulLow), x); + auto hi = + MulAdd(Set(d, kMulHi), FastPowf(d, x, Set(d, kPowHi)), Set(d, kSub)); + return IfThenElse(Le(x, Set(d, kThresh)), low, hi); + } + + template + JXL_INLINE V DisplayFromEncoded(D d, V x) const { + auto low = Mul(Set(d, kInvMulLow), x); + auto hi = FastPowf(d, MulAdd(x, Set(d, kInvMulHi), Set(d, kInvAdd)), + Set(d, kInvPowHi)); + return IfThenElse(Lt(x, Set(d, kInvThresh)), low, hi); + } + + private: + static constexpr double kThresh = 0.018; + static constexpr double kMulLow = 4.5; + static constexpr double kMulHi = 1.099; + static constexpr double kPowHi = 0.45; + static constexpr double kSub = -0.099; + + static constexpr double kInvThresh = 0.081; + static constexpr double kInvMulLow = 1 / 4.5; + static constexpr double kInvMulHi = 1 / 1.099; + static constexpr double kInvPowHi = 1 / 0.45; + static constexpr double kInvAdd = 0.099 * kInvMulHi; +}; + +// Perceptual Quantization +class TF_PQ { + public: + // EOTF (defines the PQ approach). e = encoded. + JXL_INLINE double DisplayFromEncoded(double e) const { + if (e == 0.0) return 0.0; + const double original_sign = e; + e = std::abs(e); + + const double xp = std::pow(e, 1.0 / kM2); + const double num = std::max(xp - kC1, 0.0); + const double den = kC2 - kC3 * xp; + JXL_DASSERT(den != 0.0); + const double d = std::pow(num / den, 1.0 / kM1); + JXL_DASSERT(d >= 0.0); // Equal for e ~= 1E-9 + return copysignf(d, original_sign); + } + + // Maximum error 3e-6 + template + JXL_INLINE V DisplayFromEncoded(D d, V x) const { + const hwy::HWY_NAMESPACE::Rebind du; + const V kSign = BitCast(d, Set(du, 0x80000000u)); + const V original_sign = And(x, kSign); + x = AndNot(kSign, x); // abs + // 4-over-4-degree rational polynomial approximation on x+x*x. This improves + // the maximum error by about 5x over a rational polynomial for x. + auto xpxx = MulAdd(x, x, x); + HWY_ALIGN constexpr float p[(4 + 1) * 4] = { + HWY_REP4(2.62975656e-04f), HWY_REP4(-6.23553089e-03f), + HWY_REP4(7.38602301e-01f), HWY_REP4(2.64553172e+00f), + HWY_REP4(5.50034862e-01f), + }; + HWY_ALIGN constexpr float q[(4 + 1) * 4] = { + HWY_REP4(4.21350107e+02f), HWY_REP4(-4.28736818e+02f), + HWY_REP4(1.74364667e+02f), HWY_REP4(-3.39078883e+01f), + HWY_REP4(2.67718770e+00f), + }; + auto magnitude = EvalRationalPolynomial(d, xpxx, p, q); + return Or(AndNot(kSign, magnitude), original_sign); + } + + // Inverse EOTF. d = display. + JXL_INLINE double EncodedFromDisplay(double d) const { + if (d == 0.0) return 0.0; + const double original_sign = d; + d = std::abs(d); + + const double xp = std::pow(d, kM1); + const double num = kC1 + xp * kC2; + const double den = 1.0 + xp * kC3; + const double e = std::pow(num / den, kM2); + JXL_DASSERT(e > 0.0); + return copysignf(e, original_sign); + } + + // Maximum error 7e-7. + template + JXL_INLINE V EncodedFromDisplay(D d, V x) const { + const hwy::HWY_NAMESPACE::Rebind du; + const V kSign = BitCast(d, Set(du, 0x80000000u)); + const V original_sign = And(x, kSign); + x = AndNot(kSign, x); // abs + // 4-over-4-degree rational polynomial approximation on x**0.25, with two + // different polynomials above and below 1e-4. + auto xto025 = Sqrt(Sqrt(x)); + HWY_ALIGN constexpr float p[(4 + 1) * 4] = { + HWY_REP4(1.351392e-02f), HWY_REP4(-1.095778e+00f), + HWY_REP4(5.522776e+01f), HWY_REP4(1.492516e+02f), + HWY_REP4(4.838434e+01f), + }; + HWY_ALIGN constexpr float q[(4 + 1) * 4] = { + HWY_REP4(1.012416e+00f), HWY_REP4(2.016708e+01f), + HWY_REP4(9.263710e+01f), HWY_REP4(1.120607e+02f), + HWY_REP4(2.590418e+01f), + }; + + HWY_ALIGN constexpr float plo[(4 + 1) * 4] = { + HWY_REP4(9.863406e-06f), HWY_REP4(3.881234e-01f), + HWY_REP4(1.352821e+02f), HWY_REP4(6.889862e+04f), + HWY_REP4(-2.864824e+05f), + }; + HWY_ALIGN constexpr float qlo[(4 + 1) * 4] = { + HWY_REP4(3.371868e+01f), HWY_REP4(1.477719e+03f), + HWY_REP4(1.608477e+04f), HWY_REP4(-4.389884e+04f), + HWY_REP4(-2.072546e+05f), + }; + + auto magnitude = IfThenElse(Lt(x, Set(d, 1e-4f)), + EvalRationalPolynomial(d, xto025, plo, qlo), + EvalRationalPolynomial(d, xto025, p, q)); + return Or(AndNot(kSign, magnitude), original_sign); + } + + private: + static constexpr double kM1 = 2610.0 / 16384; + static constexpr double kM2 = (2523.0 / 4096) * 128; + static constexpr double kC1 = 3424.0 / 4096; + static constexpr double kC2 = (2413.0 / 4096) * 32; + static constexpr double kC3 = (2392.0 / 4096) * 32; +}; + +// sRGB +class TF_SRGB { + public: + template + JXL_INLINE V DisplayFromEncoded(V x) const { + const HWY_FULL(float) d; + const HWY_FULL(uint32_t) du; + const V kSign = BitCast(d, Set(du, 0x80000000u)); + const V original_sign = And(x, kSign); + x = AndNot(kSign, x); // abs + + // TODO(janwas): range reduction + // Computed via af_cheb_rational (k=100); replicated 4x. + HWY_ALIGN constexpr float p[(4 + 1) * 4] = { + 2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f, 2.200248328e-04f, + 1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f, 1.043637593e-02f, + 1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f, 1.624820318e-01f, + 7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f, 7.961564959e-01f, + 8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f, 8.210152774e-01f, + }; + HWY_ALIGN constexpr float q[(4 + 1) * 4] = { + 2.631846970e-01f, 2.631846970e-01f, 2.631846970e-01f, + 2.631846970e-01f, 1.076976492e+00f, 1.076976492e+00f, + 1.076976492e+00f, 1.076976492e+00f, 4.987528350e-01f, + 4.987528350e-01f, 4.987528350e-01f, 4.987528350e-01f, + -5.512498495e-02f, -5.512498495e-02f, -5.512498495e-02f, + -5.512498495e-02f, 6.521209011e-03f, 6.521209011e-03f, + 6.521209011e-03f, 6.521209011e-03f, + }; + const V linear = Mul(x, Set(d, kLowDivInv)); + const V poly = EvalRationalPolynomial(d, x, p, q); + const V magnitude = + IfThenElse(Gt(x, Set(d, kThreshSRGBToLinear)), poly, linear); + return Or(AndNot(kSign, magnitude), original_sign); + } + + // Error ~5e-07 + template + JXL_INLINE V EncodedFromDisplay(D d, V x) const { + const hwy::HWY_NAMESPACE::Rebind du; + const V kSign = BitCast(d, Set(du, 0x80000000u)); + const V original_sign = And(x, kSign); + x = AndNot(kSign, x); // abs + + // Computed via af_cheb_rational (k=100); replicated 4x. + HWY_ALIGN constexpr float p[(4 + 1) * 4] = { + -5.135152395e-04f, -5.135152395e-04f, -5.135152395e-04f, + -5.135152395e-04f, 5.287254571e-03f, 5.287254571e-03f, + 5.287254571e-03f, 5.287254571e-03f, 3.903842876e-01f, + 3.903842876e-01f, 3.903842876e-01f, 3.903842876e-01f, + 1.474205315e+00f, 1.474205315e+00f, 1.474205315e+00f, + 1.474205315e+00f, 7.352629620e-01f, 7.352629620e-01f, + 7.352629620e-01f, 7.352629620e-01f, + }; + HWY_ALIGN constexpr float q[(4 + 1) * 4] = { + 1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f, 1.004519624e-02f, + 3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f, 3.036675394e-01f, + 1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f, 1.340816930e+00f, + 9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f, 9.258482155e-01f, + 2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f, 2.424867759e-02f, + }; + const V linear = Mul(x, Set(d, kLowDiv)); + const V poly = EvalRationalPolynomial(d, Sqrt(x), p, q); + const V magnitude = + IfThenElse(Gt(x, Set(d, kThreshLinearToSRGB)), poly, linear); + return Or(AndNot(kSign, magnitude), original_sign); + } + + private: + static constexpr float kThreshSRGBToLinear = 0.04045f; + static constexpr float kThreshLinearToSRGB = 0.0031308f; + static constexpr float kLowDiv = 12.92f; + static constexpr float kLowDivInv = 1.0f / kLowDiv; +}; + +// Linear to sRGB conversion with error of at most 1.2e-4. +template +V FastLinearToSRGB(D d, V v) { + const hwy::HWY_NAMESPACE::Rebind du; + const hwy::HWY_NAMESPACE::Rebind di; + // Convert to 0.25 - 0.5 range. + auto v025_05 = BitCast( + d, And(Or(BitCast(du, v), Set(du, 0x3e800000)), Set(du, 0x3effffff))); + // third degree polynomial approximation between 0.25 and 0.5 + // of 1.055/2^(7/2.4) * x^(1/2.4) * 0.5. A degree 4 polynomial only improves + // accuracy by about 3x. + auto d1 = MulAdd(v025_05, Set(d, 0.059914046f), Set(d, -0.108894556f)); + auto d2 = MulAdd(d1, v025_05, Set(d, 0.107963754f)); + auto pow = MulAdd(d2, v025_05, Set(d, 0.018092343f)); + // Compute extra multiplier depending on exponent. Valid exponent range for + // [0.0031308f, 1.0) is 0...8 after subtracting 118. + // The next three constants contain a representation of the powers of + // 2**(1/2.4) = 2**(5/12) times two; in particular, bits from 26 to 31 are + // always the same and in k2to512powers_basebits, and the two arrays contain + // the next groups of 8 bits. This ends up being a 22-bit representation (with + // a mantissa of 13 bits). The choice of polynomial to approximate is such + // that the multiplication factor has the highest 5 bits constant, and that + // the factor for the lowest possible exponent is a power of two (thus making + // the additional bits 0, which is used to correctly merge back together the + // floats). + constexpr uint32_t k2to512powers_basebits = 0x40000000; + HWY_ALIGN constexpr uint8_t k2to512powers_25to18bits[16] = { + 0x0, 0xa, 0x19, 0x26, 0x32, 0x41, 0x4d, 0x5c, + 0x68, 0x75, 0x83, 0x8f, 0xa0, 0xaa, 0xb9, 0xc6, + }; + HWY_ALIGN constexpr uint8_t k2to512powers_17to10bits[16] = { + 0x0, 0xb7, 0x4, 0xd, 0xcb, 0xe7, 0x41, 0x68, + 0x51, 0xd1, 0xeb, 0xf2, 0x0, 0xb7, 0x4, 0xd, + }; + // Note that vld1q_s8_x2 on ARM seems to actually be slower. +#if HWY_TARGET != HWY_SCALAR + using hwy::HWY_NAMESPACE::ShiftLeft; + using hwy::HWY_NAMESPACE::ShiftRight; + // Every lane of exp is now (if cast to byte) {0, 0, 0, }. + auto exp = Sub(ShiftRight<23>(BitCast(di, v)), Set(di, 118)); + auto pow25to18bits = TableLookupBytes( + LoadDup128(di, + reinterpret_cast(k2to512powers_25to18bits)), + exp); + auto pow17to10bits = TableLookupBytes( + LoadDup128(di, + reinterpret_cast(k2to512powers_17to10bits)), + exp); + // Now, pow* contain {0, 0, 0, }. Here + // we take advantage of the fact that each table has its position 0 equal to + // 0. + // We can now just reassemble the float. + auto mul = BitCast( + d, Or(Or(ShiftLeft<18>(pow25to18bits), ShiftLeft<10>(pow17to10bits)), + Set(di, k2to512powers_basebits))); +#else + // Fallback for scalar. + uint32_t exp = ((BitCast(di, v).raw >> 23) - 118) & 0xf; + auto mul = BitCast(d, Set(di, (k2to512powers_25to18bits[exp] << 18) | + (k2to512powers_17to10bits[exp] << 10) | + k2to512powers_basebits)); +#endif + return IfThenElse(Lt(v, Set(d, 0.0031308f)), Mul(v, Set(d, 12.92f)), + MulAdd(pow, mul, Set(d, -0.055))); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_TRANSFER_FUNCTIONS_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/transpose-inl.h b/third-party/libjxl/libjxl/lib/jxl/transpose-inl.h new file mode 100644 index 0000000000..4674420737 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/transpose-inl.h @@ -0,0 +1,203 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Block transpose for DCT/IDCT + +#if defined(LIB_JXL_TRANSPOSE_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_TRANSPOSE_INL_H_ +#undef LIB_JXL_TRANSPOSE_INL_H_ +#else +#define LIB_JXL_TRANSPOSE_INL_H_ +#endif + +#include + +#include +#include + +#include "lib/jxl/base/status.h" +#include "lib/jxl/dct_block-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +#ifndef JXL_INLINE_TRANSPOSE +// Workaround for issue #42 - (excessive?) inlining causes invalid codegen. +#if defined(__arm__) +#define JXL_INLINE_TRANSPOSE HWY_NOINLINE +#else +#define JXL_INLINE_TRANSPOSE HWY_INLINE +#endif +#endif // JXL_INLINE_TRANSPOSE + +// Simple wrapper that ensures that a function will not be inlined. +template +JXL_NOINLINE void NoInlineWrapper(const T& f, const Args&... args) { + return f(args...); +} + +template +struct TransposeSimdTag {}; + +// TODO(veluca): it's not super useful to have this in the SIMD namespace. +template +JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag, + const From& from, const To& to, + size_t ROWSp, size_t COLSp) { + size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0; + size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0; + for (size_t n = 0; n < ROWS; ++n) { + for (size_t m = 0; m < COLS; ++m) { + to.Write(from.Read(n, m), m, n); + } + } +} + +// TODO(veluca): AVX3? +#if HWY_CAP_GE256 +constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) { + return ROWS % 8 == 0 && COLS % 8 == 0; +} + +template +JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag, + const From& from, const To& to, + size_t ROWSp, size_t COLSp) { + size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0; + size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0; + static_assert(MaxLanes(BlockDesc<8>()) == 8, "Invalid descriptor size"); + static_assert(ROWS_or_0 % 8 == 0, "Invalid number of rows"); + static_assert(COLS_or_0 % 8 == 0, "Invalid number of columns"); + for (size_t n = 0; n < ROWS; n += 8) { + for (size_t m = 0; m < COLS; m += 8) { + const BlockDesc<8> d; + auto i0 = from.LoadPart(d, n + 0, m + 0); + auto i1 = from.LoadPart(d, n + 1, m + 0); + auto i2 = from.LoadPart(d, n + 2, m + 0); + auto i3 = from.LoadPart(d, n + 3, m + 0); + auto i4 = from.LoadPart(d, n + 4, m + 0); + auto i5 = from.LoadPart(d, n + 5, m + 0); + auto i6 = from.LoadPart(d, n + 6, m + 0); + auto i7 = from.LoadPart(d, n + 7, m + 0); + // Surprisingly, this straightforward implementation (24 cycles on port5) + // is faster than load128+insert and LoadDup128+ConcatUpperLower+blend. + const auto q0 = InterleaveLower(d, i0, i2); + const auto q1 = InterleaveLower(d, i1, i3); + const auto q2 = InterleaveUpper(d, i0, i2); + const auto q3 = InterleaveUpper(d, i1, i3); + const auto q4 = InterleaveLower(d, i4, i6); + const auto q5 = InterleaveLower(d, i5, i7); + const auto q6 = InterleaveUpper(d, i4, i6); + const auto q7 = InterleaveUpper(d, i5, i7); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + const auto r4 = InterleaveLower(d, q4, q5); + const auto r5 = InterleaveUpper(d, q4, q5); + const auto r6 = InterleaveLower(d, q6, q7); + const auto r7 = InterleaveUpper(d, q6, q7); + + i0 = ConcatLowerLower(d, r4, r0); + i1 = ConcatLowerLower(d, r5, r1); + i2 = ConcatLowerLower(d, r6, r2); + i3 = ConcatLowerLower(d, r7, r3); + i4 = ConcatUpperUpper(d, r4, r0); + i5 = ConcatUpperUpper(d, r5, r1); + i6 = ConcatUpperUpper(d, r6, r2); + i7 = ConcatUpperUpper(d, r7, r3); + to.StorePart(d, i0, m + 0, n + 0); + to.StorePart(d, i1, m + 1, n + 0); + to.StorePart(d, i2, m + 2, n + 0); + to.StorePart(d, i3, m + 3, n + 0); + to.StorePart(d, i4, m + 4, n + 0); + to.StorePart(d, i5, m + 5, n + 0); + to.StorePart(d, i6, m + 6, n + 0); + to.StorePart(d, i7, m + 7, n + 0); + } + } +} +#elif HWY_TARGET != HWY_SCALAR +constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) { + return ROWS % 4 == 0 && COLS % 4 == 0; +} + +template +JXL_INLINE_TRANSPOSE void GenericTransposeBlock(TransposeSimdTag, + const From& from, const To& to, + size_t ROWSp, size_t COLSp) { + size_t ROWS = ROWS_or_0 == 0 ? ROWSp : ROWS_or_0; + size_t COLS = COLS_or_0 == 0 ? COLSp : COLS_or_0; + static_assert(MaxLanes(BlockDesc<4>()) == 4, "Invalid descriptor size"); + static_assert(ROWS_or_0 % 4 == 0, "Invalid number of rows"); + static_assert(COLS_or_0 % 4 == 0, "Invalid number of columns"); + for (size_t n = 0; n < ROWS; n += 4) { + for (size_t m = 0; m < COLS; m += 4) { + const BlockDesc<4> d; + const auto p0 = from.LoadPart(d, n + 0, m + 0); + const auto p1 = from.LoadPart(d, n + 1, m + 0); + const auto p2 = from.LoadPart(d, n + 2, m + 0); + const auto p3 = from.LoadPart(d, n + 3, m + 0); + + const auto q0 = InterleaveLower(d, p0, p2); + const auto q1 = InterleaveLower(d, p1, p3); + const auto q2 = InterleaveUpper(d, p0, p2); + const auto q3 = InterleaveUpper(d, p1, p3); + + const auto r0 = InterleaveLower(d, q0, q1); + const auto r1 = InterleaveUpper(d, q0, q1); + const auto r2 = InterleaveLower(d, q2, q3); + const auto r3 = InterleaveUpper(d, q2, q3); + + to.StorePart(d, r0, m + 0, n + 0); + to.StorePart(d, r1, m + 1, n + 0); + to.StorePart(d, r2, m + 2, n + 0); + to.StorePart(d, r3, m + 3, n + 0); + } + } +} +#else +constexpr bool TransposeUseSimd(size_t ROWS, size_t COLS) { return false; } +#endif + +template +struct Transpose { + template + static void Run(const From& from, const To& to) { + // This does not guarantee anything, just saves from the most stupid + // mistakes. + JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0)); + TransposeSimdTag tag; + GenericTransposeBlock(tag, from, to, N, M); + } +}; + +// Avoid inlining and unrolling transposes for large blocks. +template +struct Transpose< + N, M, typename std::enable_if<(N >= 8 && M >= 8 && N * M >= 512)>::type> { + template + static void Run(const From& from, const To& to) { + // This does not guarantee anything, just saves from the most stupid + // mistakes. + JXL_DASSERT(from.Address(0, 0) != to.Address(0, 0)); + TransposeSimdTag tag; + constexpr void (*transpose)(TransposeSimdTag, + const From&, const To&, size_t, size_t) = + GenericTransposeBlock<0, 0, From, To>; + NoInlineWrapper(transpose, tag, from, to, N, M); + } +}; + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_TRANSPOSE_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/version.h.in b/third-party/libjxl/libjxl/lib/jxl/version.h.in new file mode 100644 index 0000000000..d077abec79 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/version.h.in @@ -0,0 +1,39 @@ +/* Copyright (c) the JPEG XL Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style + * license that can be found in the LICENSE file. + */ + +/** @addtogroup libjxl_common + * @{ + * @file version.h + * @brief libjxl version information + */ + +#ifndef JXL_VERSION_H_ +#define JXL_VERSION_H_ + +#define JPEGXL_MAJOR_VERSION @JPEGXL_MAJOR_VERSION@ ///< JPEG XL Major version +#define JPEGXL_MINOR_VERSION @JPEGXL_MINOR_VERSION@ ///< JPEG XL Minor version +#define JPEGXL_PATCH_VERSION @JPEGXL_PATCH_VERSION@ ///< JPEG XL Patch version + +/** Can be used to conditionally compile code for a specific JXL version + * @param[maj] major version + * @param[min] minor version + * + * @code + * #if JPEGXL_NUMERIC_VERSION < JPEGXL_COMPUTE_NUMERIC_VERSION(0,8,0) + * // use old/deprecated api + * #else + * // use current api + * #endif + * @endcode + */ +#define JPEGXL_COMPUTE_NUMERIC_VERSION(major,minor,patch) ((major<<24) | (minor<<16) | (patch<<8) | 0) + +/* Numeric representation of the version */ +#define JPEGXL_NUMERIC_VERSION JPEGXL_COMPUTE_NUMERIC_VERSION(JPEGXL_MAJOR_VERSION,JPEGXL_MINOR_VERSION,JPEGXL_PATCH_VERSION) + +#endif /* JXL_VERSION_H_ */ + +/** @}*/ diff --git a/third-party/libjxl/libjxl/lib/jxl/xorshift128plus-inl.h b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus-inl.h new file mode 100644 index 0000000000..a473d591f2 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus-inl.h @@ -0,0 +1,103 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Fast but weak random generator. + +#if defined(LIB_JXL_XORSHIFT128PLUS_INL_H_) == defined(HWY_TARGET_TOGGLE) +#ifdef LIB_JXL_XORSHIFT128PLUS_INL_H_ +#undef LIB_JXL_XORSHIFT128PLUS_INL_H_ +#else +#define LIB_JXL_XORSHIFT128PLUS_INL_H_ +#endif + +#include + +#include +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { +namespace { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Add; +using hwy::HWY_NAMESPACE::ShiftLeft; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Xor; + +// Adapted from https://github.com/vpxyz/xorshift/blob/master/xorshift128plus/ +// (MIT-license) +class Xorshift128Plus { + public: + // 8 independent generators (= single iteration for AVX-512) + enum { N = 8 }; + + explicit HWY_MAYBE_UNUSED Xorshift128Plus(const uint64_t seed) { + // Init state using SplitMix64 generator + s0_[0] = SplitMix64(seed + 0x9E3779B97F4A7C15ull); + s1_[0] = SplitMix64(s0_[0]); + for (size_t i = 1; i < N; ++i) { + s0_[i] = SplitMix64(s1_[i - 1]); + s1_[i] = SplitMix64(s0_[i]); + } + } + + HWY_MAYBE_UNUSED Xorshift128Plus(const uint32_t seed1, const uint32_t seed2, + const uint32_t seed3, const uint32_t seed4) { + // Init state using SplitMix64 generator + s0_[0] = SplitMix64(((static_cast(seed1) << 32) + seed2) + + 0x9E3779B97F4A7C15ull); + s1_[0] = SplitMix64(((static_cast(seed3) << 32) + seed4) + + 0x9E3779B97F4A7C15ull); + for (size_t i = 1; i < N; ++i) { + s0_[i] = SplitMix64(s0_[i - 1]); + s1_[i] = SplitMix64(s1_[i - 1]); + } + } + + HWY_INLINE HWY_MAYBE_UNUSED void Fill(uint64_t* HWY_RESTRICT random_bits) { +#if HWY_CAP_INTEGER64 + const HWY_FULL(uint64_t) d; + for (size_t i = 0; i < N; i += Lanes(d)) { + auto s1 = Load(d, s0_ + i); + const auto s0 = Load(d, s1_ + i); + const auto bits = Add(s1, s0); // b, c + Store(s0, d, s0_ + i); + s1 = Xor(s1, ShiftLeft<23>(s1)); + Store(bits, d, random_bits + i); + s1 = Xor(s1, Xor(s0, Xor(ShiftRight<18>(s1), ShiftRight<5>(s0)))); + Store(s1, d, s1_ + i); + } +#else + for (size_t i = 0; i < N; ++i) { + auto s1 = s0_[i]; + const auto s0 = s1_[i]; + const auto bits = s1 + s0; // b, c + s0_[i] = s0; + s1 ^= s1 << 23; + random_bits[i] = bits; + s1 ^= s0 ^ (s1 >> 18) ^ (s0 >> 5); + s1_[i] = s1; + } +#endif + } + + private: + static uint64_t SplitMix64(uint64_t z) { + z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull; + z = (z ^ (z >> 27)) * 0x94D049BB133111EBull; + return z ^ (z >> 31); + } + + HWY_ALIGN uint64_t s0_[N]; + HWY_ALIGN uint64_t s1_[N]; +}; + +} // namespace +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#endif // LIB_JXL_XORSHIFT128PLUS_INL_H_ diff --git a/third-party/libjxl/libjxl/lib/jxl/xorshift128plus_test.cc b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus_test.cc new file mode 100644 index 0000000000..2b0c78b1d1 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl/xorshift128plus_test.cc @@ -0,0 +1,378 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include +#include + +#undef HWY_TARGET_INCLUDE +#define HWY_TARGET_INCLUDE "lib/jxl/xorshift128plus_test.cc" +#include +#include +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/xorshift128plus-inl.h" + +HWY_BEFORE_NAMESPACE(); +namespace jxl { +namespace HWY_NAMESPACE { + +// These templates are not found via ADL. +using hwy::HWY_NAMESPACE::Or; +using hwy::HWY_NAMESPACE::ShiftRight; +using hwy::HWY_NAMESPACE::Sub; + +// Define to nonzero in order to print the (new) golden outputs. +#define PRINT_RESULTS 0 + +const size_t kVectors = 64; + +#if PRINT_RESULTS + +template +void Print(const uint64_t (&result)[kNumLanes]) { + printf("{ "); + for (int i = 0; i < kNumLanes; ++i) { + if (i != 0) { + printf(", "); + } + printf("0x%016llXull", result[i]); + } + printf("},\n"); +} + +#else // PRINT_RESULTS + +const uint64_t kExpected[kVectors][Xorshift128Plus::N] = { + {0x6E901576D477CBB1ull, 0xE9E53789195DA2A2ull, 0xB681F6DDA5E0AE99ull, + 0x8EFD18CE21FD6896ull, 0xA898A80DF75CF532ull, 0x50CEB2C9E2DE7E32ull, + 0x3CA7C2FEB25C0DD0ull, 0xA4D0866B80B4D836ull}, + {0x8CD6A1E6233D3A26ull, 0x3D4603ADE98B112Dull, 0xDC427AF674019E36ull, + 0xE28B4D230705AC53ull, 0x7297E9BBA88783DDull, 0x34D3D23CFCD9B41Aull, + 0x5A223615ADBE96B8ull, 0xE5EB529027CFBD01ull}, + {0xC1894CF00DFAC6A2ull, 0x18EDF8AE9085E404ull, 0x8E936625296B4CCDull, + 0x31971EF3A14A899Bull, 0xBE87535FCE0BF26Aull, 0x576F7A752BC6649Full, + 0xA44CBADCE0C6B937ull, 0x3DBA819BB17A353Aull}, + {0x27CE38DFCC1C5EB6ull, 0x920BEB5606340256ull, 0x3986CBC40C9AFC2Cull, + 0xE22BCB3EEB1E191Eull, 0x6E1FCDD3602A8FBAull, 0x052CB044E5415A29ull, + 0x46266646EFB9ECD7ull, 0x8F44914618D29335ull}, + {0xDD30AEDF72A362C5ull, 0xBC1D824E16BB98F4ull, 0x9EA6009C2AA3D2F1ull, + 0xF65C0FBBE17AF081ull, 0x22424D06A8738991ull, 0x8A62763F2B7611D2ull, + 0x2F3E89F722637939ull, 0x84D338BEF50AFD50ull}, + {0x00F46494898E2B0Bull, 0x81239DC4FB8E8003ull, 0x414AD93EC5773FE7ull, + 0x791473C450E4110Full, 0x87F127BF68C959ACull, 0x6429282D695EF67Bull, + 0x661082E11546CBA8ull, 0x5815D53FA5436BFDull}, + {0xB3DEADAB9BE6E0F9ull, 0xAA1B7B8F7CED0202ull, 0x4C5ED437699D279Eull, + 0xA4471727F1CB39D3ull, 0xE439DA193F802F70ull, 0xF89401BB04FA6493ull, + 0x3B08045A4FE898BAull, 0x32137BFE98227950ull}, + {0xFBAE4A092897FEF3ull, 0x0639F6CE56E71C8Eull, 0xF0AD6465C07F0C1Eull, + 0xFF8E28563361DCE5ull, 0xC2013DB7F86BC6B9ull, 0x8EFCC0503330102Full, + 0x3F6B767EA5C4DA40ull, 0xB9864B950B2232E1ull}, + {0x76EB58DE8E5EC22Aull, 0x9BBBF49A18B32F4Full, 0xC8405F02B2B2FAB9ull, + 0xC3E122A5F146BC34ull, 0xC90BB046660F5765ull, 0xB933981310DBECCFull, + 0x5A2A7BFC9126FD1Cull, 0x8BB388C94DF87901ull}, + {0x753EB89AD63EF3C3ull, 0xF24AAF40C89D65ADull, 0x23F68931C1A6AA6Dull, + 0xF47E79BF702C6DD0ull, 0xA3AD113244EE7EAEull, 0xD42CBEA28F793DC3ull, + 0xD896FCF1820F497Cull, 0x042B86D2818948C1ull}, + {0x8F2A4FC5A4265763ull, 0xEC499E6F95EAA10Cull, 0xE3786D4ECCD0DEB5ull, + 0xC725C53D3AC4CC43ull, 0x065A4ACBBF83610Eull, 0x35C61C9FEF167129ull, + 0x7B720AEAA7D70048ull, 0x14206B841377D039ull}, + {0xAD27D78BF96055F6ull, 0x5F43B20FF47ADCD4ull, 0xE184C2401E2BF71Eull, + 0x30B263D78990045Dull, 0xC22F00EBFF9BA201ull, 0xAE7F86522B53A562ull, + 0x2853312BC039F0A4ull, 0x868D619E6549C3C8ull}, + {0xFD5493D8AE9A8371ull, 0x773D5E224DF61B3Bull, 0x5377C54FBB1A8280ull, + 0xCAD4DE3B8265CAFAull, 0xCDF3F19C91EBD5F6ull, 0xC8EA0F182D73BD78ull, + 0x220502D593433FF1ull, 0xB81205E612DC31B1ull}, + {0x8F32A39EAEDA4C70ull, 0x1D4B0914AA4DAC7Full, 0x56EF1570F3A8B405ull, + 0x29812CB17404A592ull, 0x97A2AAF69CAE90F2ull, 0x12BF5E02778BBFE5ull, + 0x9D4B55AD42A05FD2ull, 0x06C2BAB5E6086620ull}, + {0x8DB4B9648302B253ull, 0xD756AD9E3AEA12C7ull, 0x68709B7F11D4B188ull, + 0x7CC299DDCD707A4Bull, 0x97B860C370A7661Dull, 0xCECD314FC20E64F5ull, + 0x55F412CDFB4C7EC3ull, 0x55EE97591193B525ull}, + {0xCF70F3ACA96E6254ull, 0x022FEDECA2E09F46ull, 0x686823DB60AE1ECFull, + 0xFD36190D3739830Eull, 0x74E1C09027F68120ull, 0xB5883A835C093842ull, + 0x93E1EFB927E9E4E3ull, 0xB2721E249D7E5EBEull}, + {0x69B6E21C44188CB8ull, 0x5D6CFB853655A7AAull, 0x3E001A0B425A66DCull, + 0x8C57451103A5138Full, 0x7BF8B4BE18EAB402ull, 0x494102EB8761A365ull, + 0xB33796A9F6A81F0Eull, 0x10005AB3BCCFD960ull}, + {0xB2CF25740AE965DCull, 0x6F7C1DF7EF53D670ull, 0x648DD6087AC2251Eull, + 0x040955D9851D487Dull, 0xBD550FC7E21A7F66ull, 0x57408F484DEB3AB5ull, + 0x481E24C150B506C1ull, 0x72C0C3EAF91A40D6ull}, + {0x1997A481858A5D39ull, 0x539718F4BEF50DC1ull, 0x2EC4DC4787E7E368ull, + 0xFF1CE78879419845ull, 0xE219A93DD6F6DD30ull, 0x85328618D02FEC1Aull, + 0xC86E02D969181B20ull, 0xEBEC8CD8BBA34E6Eull}, + {0x28B55088A16CE947ull, 0xDD25AC11E6350195ull, 0xBD1F176694257B1Cull, + 0x09459CCF9FCC9402ull, 0xF8047341E386C4E4ull, 0x7E8E9A9AD984C6C0ull, + 0xA4661E95062AA092ull, 0x70A9947005ED1152ull}, + {0x4C01CF75DBE98CCDull, 0x0BA076CDFC7373B9ull, 0x6C5E7A004B57FB59ull, + 0x336B82297FD3BC56ull, 0x7990C0BE74E8D60Full, 0xF0275CC00EC5C8C8ull, + 0x6CF29E682DFAD2E9ull, 0xFA4361524BD95D72ull}, + {0x631D2A19FF62F018ull, 0x41C43863B985B3FAull, 0xE052B2267038EFD9ull, + 0xE2A535FAC575F430ull, 0xE004EEA90B1FF5B8ull, 0x42DFE2CA692A1F26ull, + 0x90FB0BFC9A189ECCull, 0x4484102BD3536BD0ull}, + {0xD027134E9ACCA5A5ull, 0xBBAB4F966D476A9Bull, 0x713794A96E03D693ull, + 0x9F6335E6B94CD44Aull, 0xC5090C80E7471617ull, 0x6D9C1B0C87B58E33ull, + 0x1969CE82E31185A5ull, 0x2099B97E87754EBEull}, + {0x60EBAF4ED934350Full, 0xC26FBF0BA5E6ECFFull, 0x9E54150F0312EC57ull, + 0x0973B48364ED0041ull, 0x800A523241426CFCull, 0x03AB5EC055F75989ull, + 0x8CF315935DEEB40Aull, 0x83D3FC0190BD1409ull}, + {0x26D35394CF720A51ull, 0xCE9EAA15243CBAFEull, 0xE2B45FBAF21B29E0ull, + 0xDB92E98EDE73F9E0ull, 0x79B16F5101C26387ull, 0x1AC15959DE88C86Full, + 0x387633AEC6D6A580ull, 0xA6FC05807BFC5EB8ull}, + {0x2D26C8E47C6BADA9ull, 0x820E6EC832D52D73ull, 0xB8432C3E0ED0EE5Bull, + 0x0F84B3C4063AAA87ull, 0xF393E4366854F651ull, 0x749E1B4D2366A567ull, + 0x805EACA43480D004ull, 0x244EBF3AA54400A5ull}, + {0xBFDC3763AA79F75Aull, 0x9E3A74CC751F41DBull, 0xF401302A149DBC55ull, + 0x6B25F7973D7BF7BCull, 0x13371D34FDBC3DAEull, 0xC5E1998C8F484DCDull, + 0x7031B8AE5C364464ull, 0x3847F0C4F3DA2C25ull}, + {0x24C6387D2C0F1225ull, 0x77CCE960255C67A4ull, 0x21A0947E497B10EBull, + 0xBB5DB73A825A9D7Eull, 0x26294A41999E553Dull, 0x3953E0089F87D925ull, + 0x3DAE6E5D4E5EAAFEull, 0x74B545460341A7AAull}, + {0x710E5EB08A7DB820ull, 0x7E43C4E77CAEA025ull, 0xD4C91529C8B060C1ull, + 0x09AE26D8A7B0CA29ull, 0xAB9F356BB360A772ull, 0xB68834A25F19F6E9ull, + 0x79B8D9894C5734E2ull, 0xC6847E7C8FFD265Full}, + {0x10C4BCB06A5111E6ull, 0x57CB50955B6A2516ull, 0xEF53C87798B6995Full, + 0xAB38E15BBD8D0197ull, 0xA51C6106EFF73C93ull, 0x83D7F0E2270A7134ull, + 0x0923FD330397FCE5ull, 0xF9DE54EDFE58FB45ull}, + {0x07D44833ACCD1A94ull, 0xAAD3C9E945E2F9F3ull, 0xABF4C879B876AA37ull, + 0xF29C69A21B301619ull, 0x2DDCE959111C788Bull, 0x7CEDB48F8AC1729Bull, + 0x93F3BA9A02B659BEull, 0xF20A87FF17933CBEull}, + {0x8E96EBE93180CFE6ull, 0x94CAA12873937079ull, 0x05F613D9380D4189ull, + 0xBCAB40C1DC79F38Aull, 0x0AD8907B7C61D19Eull, 0x88534E189D103910ull, + 0x2DB2FAABA160AB8Full, 0xA070E7506B06F15Cull}, + {0x6FB1FCDAFFEF87A9ull, 0xE735CF25337A090Dull, 0x172C6EDCEFEF1825ull, + 0x76957EA49EF0542Dull, 0x819BF4CD250F7C49ull, 0xD6FF23E4AD00C4D4ull, + 0xE79673C1EC358FF0ull, 0xAC9C048144337938ull}, + {0x4C5387FF258B3AF4ull, 0xEDB68FAEC2CB1AA3ull, 0x02A624E67B4E1DA4ull, + 0x5C44797A38E08AF2ull, 0x36546A70E9411B4Bull, 0x47C17B24D2FD9675ull, + 0x101957AAA020CA26ull, 0x47A1619D4779F122ull}, + {0xF84B8BCDC92D9A3Cull, 0x951D7D2C74B3066Bull, 0x7AC287C06EDDD9B2ull, + 0x4C38FC476608D38Full, 0x224D793B19CB4BCDull, 0x835A255899BF1A41ull, + 0x4AD250E9F62DB4ABull, 0xD9B44F4B58781096ull}, + {0xABBAF99A8EB5C6B8ull, 0xFB568E900D3A9F56ull, 0x11EDF63D23C5DF11ull, + 0xA9C3011D3FA7C5A8ull, 0xAEDD3CF11AFFF725ull, 0xABCA472B5F1EDD6Bull, + 0x0600B6BB5D879804ull, 0xDB4DE007F22191A0ull}, + {0xD76CC9EFF0CE9392ull, 0xF5E0A772B59BA49Aull, 0x7D1AE1ED0C1261B5ull, + 0x79224A33B5EA4F4Aull, 0x6DD825D80C40EA60ull, 0x47FC8E747E51C953ull, + 0x695C05F72888BF98ull, 0x1A012428440B9015ull}, + {0xD754DD61F9B772BFull, 0xC4A2FCF4C0F9D4EBull, 0x461167CDF67A24A2ull, + 0x434748490EBCB9D4ull, 0x274DD9CDCA5781DEull, 0x36BAC63BA9A85209ull, + 0x30324DAFDA36B70Full, 0x337570DB4FE6DAB3ull}, + {0xF46CBDD57C551546ull, 0x8E02507E676DA3E3ull, 0xD826245A8C15406Dull, + 0xDFB38A5B71113B72ull, 0x5EA38454C95B16B5ull, 0x28C054FB87ABF3E1ull, + 0xAA2724C0BA1A8096ull, 0xECA83EC980304F2Full}, + {0x6AA76EC294EB3303ull, 0x42D4CDB2A8032E3Bull, 0x7999EDF75DCD8735ull, + 0xB422BFFE696CCDCCull, 0x8F721461FD7CCDFEull, 0x148E1A5814FDE253ull, + 0x4DC941F4375EF8FFull, 0x27B2A9E0EB5B49CFull}, + {0xCEA592EF9343EBE1ull, 0xF7D38B5FA7698903ull, 0x6CCBF352203FEAB6ull, + 0x830F3095FCCDA9C5ull, 0xDBEEF4B81B81C8F4ull, 0x6D7EB9BCEECA5CF9ull, + 0xC58ABB0FBE436C69ull, 0xE4B97E6DB2041A4Bull}, + {0x7E40FC772978AF14ull, 0xCDDA4BBAE28354A1ull, 0xE4F993B832C32613ull, + 0xD3608093C68A4B35ull, 0x9A3B60E01BEE3699ull, 0x03BEF248F3288713ull, + 0x70B9294318F3E9B4ull, 0x8D2ABB913B8610DEull}, + {0x37F209128E7D8B2Cull, 0x81D2AB375BD874BCull, 0xA716A1B7373F7408ull, + 0x0CEE97BEC4706540ull, 0xA40C5FD9CDBC1512ull, 0x73CAF6C8918409E7ull, + 0x45E11BCEDF0BBAA1ull, 0x612C612BFF6E6605ull}, + {0xF8ECB14A12D0F649ull, 0xDA683CD7C01BA1ACull, 0xA2203F7510E124C1ull, + 0x7F83E52E162F3C78ull, 0x77D2BB73456ACADBull, 0x37FC34FC840BBA6Full, + 0x3076BC7D4C6EBC1Full, 0x4F514123632B5FA9ull}, + {0x44D789DED935E884ull, 0xF8291591E09FEC9Full, 0xD9CED2CF32A2E4B7ull, + 0x95F70E1EB604904Aull, 0xDE438FE43C14F6ABull, 0x4C8D23E4FAFCF8D8ull, + 0xC716910A3067EB86ull, 0x3D6B7915315095D3ull}, + {0x3170FDBADAB92095ull, 0x8F1963933FC5650Bull, 0x72F94F00ABECFEABull, + 0x6E3AE826C6AAB4CEull, 0xA677A2BF31068258ull, 0x9660CDC4F363AF10ull, + 0xD81A15A152379EF1ull, 0x5D7D285E1080A3F9ull}, + {0xDAD5DDFF9A2249B3ull, 0x6F9721D926103FAEull, 0x1418CBB83FFA349Aull, + 0xE71A30AD48C012B2ull, 0xBE76376C63751132ull, 0x3496467ACA713AE6ull, + 0x8D7EC01369F991A3ull, 0xD8C73A88B96B154Eull}, + {0x8B5D9C74AEB4833Aull, 0xF914FB3F867B912Full, 0xB894EA034936B1DCull, + 0x8A16D21BE51C4F5Bull, 0x31FF048ED582D98Eull, 0xB95AB2F4DC65B820ull, + 0x04082B9170561AF7ull, 0xA215610A5DC836FAull}, + {0xB2ADE592C092FAACull, 0x7A1E683BCBF13294ull, 0xC7A4DBF86858C096ull, + 0x3A49940F97BFF316ull, 0xCAE5C06B82C46703ull, 0xC7F413A0F951E2BDull, + 0x6665E7BB10EB5916ull, 0x86F84A5A94EDE319ull}, + {0x4EA199D8FAA79CA3ull, 0xDFA26E5BF1981704ull, 0x0F5E081D37FA4E01ull, + 0x9CB632F89CD675CDull, 0x4A09DB89D48C0304ull, 0x88142742EA3C7672ull, + 0xAC4F149E6D2E9BDBull, 0x6D9E1C23F8B1C6C6ull}, + {0xD58BE47B92DEC0E9ull, 0x8E57573645E34328ull, 0x4CC094CCB5FB5126ull, + 0x5F1D66AF6FB40E3Cull, 0x2BA15509132D3B00ull, 0x0D6545646120E567ull, + 0x3CF680C45C223666ull, 0x96B28E32930179DAull}, + {0x5900C45853AC7990ull, 0x61881E3E8B7FF169ull, 0x4DE5F835DF2230FFull, + 0x4427A9E7932F73FFull, 0x9B641BAD379A8C8Dull, 0xDF271E5BF98F4E5Cull, + 0xDFDA16DB830FF5EEull, 0x371C7E7CFB89C0E9ull}, + {0x4410A8576247A250ull, 0x6AD2DA12B45AC0D9ull, 0x18DFC72AAC85EECCull, + 0x06FC8BB2A0EF25C8ull, 0xEB287619C85E6118ull, 0x19553ECA67F25A2Cull, + 0x3B9557F1DCEC5BAAull, 0x7BAD9E8B710D1079ull}, + {0x34F365D66BD22B28ull, 0xE6E124B9F10F835Dull, 0x0573C38ABF2B24DCull, + 0xD32E6AF10A0125AEull, 0x383590ACEA979519ull, 0x8376ED7A39E28205ull, + 0xF0B7F184DCBDA435ull, 0x062A203390E31794ull}, + {0xA2AFFD7E41918760ull, 0x7F90FC1BD0819C86ull, 0x5033C08E5A969533ull, + 0x2707AF5C6D039590ull, 0x57BBD5980F17DF9Cull, 0xD3FE6E61D763268Aull, + 0x9E0A0AE40F335A3Bull, 0x43CF4EB0A99613C5ull}, + {0xD4D2A397CE1A7C2Eull, 0x3DF7CE7CC3212DADull, 0x0880F0D5D356C75Aull, + 0xA8AFC44DD03B1346ull, 0x79263B46C13A29E0ull, 0x11071B3C0ED58E7Aull, + 0xED46DC9F538406BFull, 0x2C94974F2B94843Dull}, + {0xE246E13C39AB5D5Eull, 0xAC1018489D955B20ull, 0x8601B558771852B8ull, + 0x110BD4C06DB40173ull, 0x738FC8A18CCA0EBBull, 0x6673E09BE0EA76E5ull, + 0x024BC7A0C7527877ull, 0x45E6B4652E2EC34Eull}, + {0xD1ED26A1A375CDC8ull, 0xAABC4E896A617CB8ull, 0x0A9C9E8E57D753C6ull, + 0xA3774A75FEB4C30Eull, 0x30B816C01C93E49Eull, 0xF405BABC06D2408Cull, + 0xCC0CE6B4CE788ABCull, 0x75E7922D0447956Cull}, + {0xD07C1676A698BC95ull, 0x5F9AEA4840E2D860ull, 0xD5FC10D58BDF6F02ull, + 0xF190A2AD4BC2EEA7ull, 0x0C24D11F51726931ull, 0xDB646899A16B6512ull, + 0x7BC10670047B1DD8ull, 0x2413A5ABCD45F092ull}, + {0x4E66892190CFD923ull, 0xF10162440365EC8Eull, 0x158ACA5A6A2280AEull, + 0x0D60ED11C0224166ull, 0x7CD2E9A71B9D7488ull, 0x450D7289706AB2A3ull, + 0x88FAE34EC9A0D7DCull, 0x96FF9103575A97DAull}, + {0x77990FAC6046C446ull, 0xB174B5FB30C76676ull, 0xE352CE3EB56CF82Aull, + 0xC6039B6873A9A082ull, 0xE3F80F3AE333148Aull, 0xB853BA24BA3539B9ull, + 0xE8863E52ECCB0C74ull, 0x309B4CC1092CC245ull}, + {0xBC2B70BEE8388D9Full, 0xE48D92AE22216DCEull, 0xF15F3BF3E2C15D8Full, + 0x1DD964D4812D8B24ull, 0xD56AF02FB4665E4Cull, 0x98002200595BD9A3ull, + 0x049246D50BB8FA12ull, 0x1B542DF485B579B9ull}, + {0x2347409ADFA8E497ull, 0x36015C2211D62498ull, 0xE9F141F32EB82690ull, + 0x1F839912D0449FB9ull, 0x4E4DCFFF2D02D97Cull, 0xF8A03AB4C0F625C9ull, + 0x0605F575795DAC5Cull, 0x4746C9BEA0DDA6B1ull}, + {0xCA5BB519ECE7481Bull, 0xFD496155E55CA945ull, 0xF753B9DBB1515F81ull, + 0x50549E8BAC0F70E7ull, 0x8614FB0271E21C60ull, 0x60C72947EB0F0070ull, + 0xA6511C10AEE742B6ull, 0x48FB48F2CACCB43Eull}}; + +#endif // PRINT_RESULTS + +// Ensures Xorshift128+ returns consistent and unchanging values. +void TestGolden() { + HWY_ALIGN Xorshift128Plus rng(12345); + for (uint64_t vector = 0; vector < kVectors; ++vector) { + HWY_ALIGN uint64_t lanes[Xorshift128Plus::N]; + rng.Fill(lanes); +#if PRINT_RESULTS + Print(lanes); +#else + for (size_t i = 0; i < Xorshift128Plus::N; ++i) { + ASSERT_EQ(kExpected[vector][i], lanes[i]) + << "Where vector=" << vector << " i=" << i; + } +#endif + } +} + +// Output changes when given different seeds +void TestSeedChanges() { + HWY_ALIGN uint64_t lanes[Xorshift128Plus::N]; + + std::vector first; + constexpr size_t kNumSeeds = 16384; + first.reserve(kNumSeeds); + + // All 14-bit seeds + for (size_t seed = 0; seed < kNumSeeds; ++seed) { + HWY_ALIGN Xorshift128Plus rng(seed); + + rng.Fill(lanes); + first.push_back(lanes[0]); + } + + // All outputs are unique + ASSERT_EQ(kNumSeeds, first.size()); + std::sort(first.begin(), first.end()); + first.erase(std::unique(first.begin(), first.end()), first.end()); + EXPECT_EQ(kNumSeeds, first.size()); +} + +void TestFloat() { + test::ThreadPoolForTests pool(8); + +#ifdef JXL_DISABLE_SLOW_TESTS + const uint32_t kMaxSeed = 256; +#else // JXL_DISABLE_SLOW_TESTS + const uint32_t kMaxSeed = 4096; +#endif // JXL_DISABLE_SLOW_TESTS + EXPECT_TRUE(RunOnPool( + &pool, 0, kMaxSeed, ThreadPool::NoInit, + [](const uint32_t seed, size_t /*thread*/) { + HWY_ALIGN Xorshift128Plus rng(seed); + + const HWY_FULL(uint32_t) du; + const HWY_FULL(float) df; + HWY_ALIGN uint64_t batch[Xorshift128Plus::N]; + HWY_ALIGN float lanes[MaxLanes(df)]; + double sum = 0.0; + size_t count = 0; + const size_t kReps = 2000; + for (size_t reps = 0; reps < kReps; ++reps) { + rng.Fill(batch); + for (size_t i = 0; i < Xorshift128Plus::N * 2; i += Lanes(df)) { + const auto bits = + Load(du, reinterpret_cast(batch) + i); + // 1.0 + 23 random mantissa bits = [1, 2) + const auto rand12 = + BitCast(df, Or(ShiftRight<9>(bits), Set(du, 0x3F800000))); + const auto rand01 = Sub(rand12, Set(df, 1.0f)); + Store(rand01, df, lanes); + for (float lane : lanes) { + sum += lane; + count += 1; + EXPECT_LE(lane, 1.0f); + EXPECT_GE(lane, 0.0f); + } + } + } + + // Verify average (uniform distribution) + EXPECT_NEAR(0.5, sum / count, 0.00702); + }, + "TestXorShift")); +} + +// Not more than one 64-bit zero +void TestNotZero() { + test::ThreadPoolForTests pool(8); + +#ifdef JXL_DISABLE_SLOW_TESTS + const uint32_t kMaxSeed = 500; +#else // JXL_DISABLE_SLOW_TESTS + const uint32_t kMaxSeed = 2000; +#endif // JXL_DISABLE_SLOW_TESTS + EXPECT_TRUE(RunOnPool( + &pool, 0, kMaxSeed, ThreadPool::NoInit, + [](const uint32_t task, size_t /*thread*/) { + HWY_ALIGN uint64_t lanes[Xorshift128Plus::N]; + + HWY_ALIGN Xorshift128Plus rng(task); + size_t num_zero = 0; + for (size_t vectors = 0; vectors < 10000; ++vectors) { + rng.Fill(lanes); + for (uint64_t lane : lanes) { + num_zero += static_cast(lane == 0); + } + } + EXPECT_LE(num_zero, 1u); + }, + "TestNotZero")); +} + +// NOLINTNEXTLINE(google-readability-namespace-comments) +} // namespace HWY_NAMESPACE +} // namespace jxl +HWY_AFTER_NAMESPACE(); + +#if HWY_ONCE +namespace jxl { + +class Xorshift128Test : public hwy::TestWithParamTarget {}; + +HWY_TARGET_INSTANTIATE_TEST_SUITE_P(Xorshift128Test); + +HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestNotZero); +HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestGolden); +HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestSeedChanges); +HWY_EXPORT_AND_TEST_P(Xorshift128Test, TestFloat); + +} // namespace jxl +#endif diff --git a/third-party/libjxl/libjxl/lib/jxl_benchmark.cmake b/third-party/libjxl/libjxl/lib/jxl_benchmark.cmake new file mode 100644 index 0000000000..10871e3073 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_benchmark.cmake @@ -0,0 +1,36 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include(jxl_lists.cmake) + +# benchmark.h doesn't work in our MINGW set up since it ends up including the +# wrong stdlib header. We don't run gbench on MINGW targets anyway. +if(NOT MINGW) + +# This is the Google benchmark project (https://github.com/google/benchmark). +find_package(benchmark QUIET) + +if(benchmark_FOUND) + if(JPEGXL_STATIC AND NOT MINGW) + # benchmark::benchmark hardcodes the librt.so which obviously doesn't + # compile in static mode. + set_target_properties(benchmark::benchmark PROPERTIES + INTERFACE_LINK_LIBRARIES "Threads::Threads;-lrt") + endif() + + # Compiles all the benchmark files into a single binary. Individual benchmarks + # can be run with --benchmark_filter. + add_executable(jxl_gbench "${JPEGXL_INTERNAL_GBENCH_SOURCES}" gbench_main.cc) + + target_compile_definitions(jxl_gbench PRIVATE + -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}") + target_link_libraries(jxl_gbench + jxl_extras-static + jxl-static + benchmark::benchmark + ) +endif() # benchmark_FOUND + +endif() # MINGW diff --git a/third-party/libjxl/libjxl/lib/jxl_extras.cmake b/third-party/libjxl/libjxl/lib/jxl_extras.cmake new file mode 100644 index 0000000000..c38d4f4d2e --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_extras.cmake @@ -0,0 +1,187 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include(compatibility.cmake) +include(jxl_lists.cmake) + +# Object library for those parts of extras that do not depend on jxl internals +# or jpegli. We will create two versions of these object files, one with and one +# without external codec support compiled in. +list(APPEND JPEGXL_EXTRAS_CORE_SOURCES + "${JPEGXL_INTERNAL_EXTRAS_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_APNG_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_EXR_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_JPG_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_JXL_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_PGX_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_PNM_SOURCES}" + "${JPEGXL_INTERNAL_CODEC_NPY_SOURCES}" + extras/dec/gif.cc + extras/dec/gif.h +) +foreach(LIB jxl_extras_core-obj jxl_extras_core_nocodec-obj) + add_library("${LIB}" OBJECT "${JPEGXL_EXTRAS_CORE_SOURCES}") + list(APPEND JXL_EXTRAS_OBJECT_LIBRARIES "${LIB}") +endforeach() +list(APPEND JXL_EXTRAS_OBJECTS $) + +# Object library for those parts of extras that depend on jxl internals. +add_library(jxl_extras_internal-obj OBJECT + "${JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES}" +) +list(APPEND JXL_EXTRAS_OBJECT_LIBRARIES jxl_extras_internal-obj) +list(APPEND JXL_EXTRAS_OBJECTS $) + +set(JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES) + +find_package(GIF 5.1) +if(GIF_FOUND) + target_include_directories(jxl_extras_core-obj PRIVATE "${GIF_INCLUDE_DIRS}") + target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_GIF=1) + list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${GIF_LIBRARIES}) + if(JPEGXL_DEP_LICENSE_DIR) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/libgif-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.libgif COPYONLY) + endif() # JPEGXL_DEP_LICENSE_DIR +endif() + +find_package(JPEG) +if(JPEG_FOUND) + target_include_directories(jxl_extras_core-obj PRIVATE "${JPEG_INCLUDE_DIRS}") + target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_JPEG=1) + list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${JPEG_LIBRARIES}) + if(JPEGXL_DEP_LICENSE_DIR) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/libjpeg-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.libjpeg COPYONLY) + endif() # JPEGXL_DEP_LICENSE_DIR +endif() + +if (JPEGXL_ENABLE_SJPEG) + target_compile_definitions(jxl_extras_core-obj PRIVATE + -DJPEGXL_ENABLE_SJPEG=1) + target_include_directories(jxl_extras_core-obj PRIVATE + ../third_party/sjpeg/src) + list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES sjpeg) +endif() + +if(JPEGXL_ENABLE_JPEGLI) + add_library(jxl_extras_jpegli-obj OBJECT + "${JPEGXL_INTERNAL_CODEC_JPEGLI_SOURCES}" + ) + target_include_directories(jxl_extras_jpegli-obj PRIVATE + "${CMAKE_CURRENT_BINARY_DIR}/include/jpegli" + ) + list(APPEND JXL_EXTRAS_OBJECT_LIBRARIES jxl_extras_jpegli-obj) + list(APPEND JXL_EXTRAS_OBJECTS $) +endif() + +if(NOT JPEGXL_BUNDLE_LIBPNG) + find_package(PNG) +endif() +if(PNG_FOUND) + target_include_directories(jxl_extras_core-obj PRIVATE "${PNG_INCLUDE_DIRS}") + target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_APNG=1) + list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES ${PNG_LIBRARIES}) + configure_file(extras/LICENSE.apngdis + ${PROJECT_BINARY_DIR}/LICENSE.apngdis COPYONLY) +endif() + +if (JPEGXL_ENABLE_OPENEXR) +pkg_check_modules(OpenEXR IMPORTED_TARGET OpenEXR) +if (OpenEXR_FOUND) + target_include_directories(jxl_extras_core-obj PRIVATE + "${OpenEXR_INCLUDE_DIRS}" + ) + target_compile_definitions(jxl_extras_core-obj PRIVATE -DJPEGXL_ENABLE_EXR=1) + list(APPEND JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES PkgConfig::OpenEXR) + if(JPEGXL_DEP_LICENSE_DIR) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/libopenexr-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.libopenexr COPYONLY) + endif() # JPEGXL_DEP_LICENSE_DIR + # OpenEXR generates exceptions, so we need exception support to catch them. + # Actually those flags counteract the ones set in JPEGXL_INTERNAL_FLAGS. + if (NOT WIN32) + set_source_files_properties( + extras/dec/exr.cc extras/enc/exr.cc PROPERTIES COMPILE_FLAGS -fexceptions) + if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set_source_files_properties( + extras/dec/exr.cc extras/enc/exr.cc PROPERTIES COMPILE_FLAGS + -fcxx-exceptions) + endif() + endif() +endif() # OpenEXR_FOUND +endif() # JPEGXL_ENABLE_OPENEXR + +# Common settings for the object libraries. +foreach(LIB ${JXL_EXTRAS_OBJECT_LIBRARIES}) + target_compile_options("${LIB}" PRIVATE "${JPEGXL_INTERNAL_FLAGS}") + target_compile_definitions("${LIB}" PRIVATE -DJXL_EXPORT=) + set_property(TARGET "${LIB}" PROPERTY POSITION_INDEPENDENT_CODE ON) + target_include_directories("${LIB}" PRIVATE + ${PROJECT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_BINARY_DIR}/include + ${JXL_HWY_INCLUDE_DIRS} + ) +endforeach() + +# Define an extras library that does not have the image codecs, only the core +# extras code. This is needed for some of the fuzzers. +add_library(jxl_extras_nocodec-static STATIC EXCLUDE_FROM_ALL + $ + $ +) +target_link_libraries(jxl_extras_nocodec-static PUBLIC + jxl-static + jxl_threads-static +) + +# We only define a static library jxl_extras since it uses internal parts of +# jxl library which are not accessible from outside the library in the +# shared library case. +add_library(jxl_extras-static STATIC EXCLUDE_FROM_ALL ${JXL_EXTRAS_OBJECTS}) +target_link_libraries(jxl_extras-static PUBLIC + ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES} + jxl-static + jxl_threads-static +) +if(JPEGXL_ENABLE_JPEGLI) + target_compile_definitions(jxl_extras-static PUBLIC -DJPEGXL_ENABLE_JPEGLI=1) + target_link_libraries(jxl_extras-static PRIVATE jpegli-static) +endif() + +### Static library that does not depend on internal parts of jxl library. +add_library(jxl_extras_codec-static STATIC + $ +) +target_link_libraries(jxl_extras_codec-static PRIVATE + ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES} + jxl +) + +### Shared library that does not depend on internal parts of jxl library. +### Used by cjxl and djxl binaries. +if (BUILD_SHARED_LIBS) +add_library(jxl_extras_codec SHARED + $ +) +target_link_libraries(jxl_extras_codec PRIVATE + ${JXL_EXTRAS_CODEC_INTERNAL_LIBRARIES} + jxl +) +set_target_properties(jxl_extras_codec PROPERTIES + VERSION ${JPEGXL_LIBRARY_VERSION} + SOVERSION ${JPEGXL_LIBRARY_SOVERSION} + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" +) +install(TARGETS jxl_extras_codec + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} +) +else() +add_library(jxl_extras_codec ALIAS jxl_extras_codec-static) +endif() # BUILD_SHARED_LIBS diff --git a/third-party/libjxl/libjxl/lib/jxl_lists.bzl b/third-party/libjxl/libjxl/lib/jxl_lists.bzl new file mode 100644 index 0000000000..e3418cd9a5 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_lists.bzl @@ -0,0 +1,644 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# This file is generated, do not modify by manually. +# Run `tools/scripts/build_cleaner.py --update` to regenerate it. + +libjxl_base_sources = [ + "jxl/base/arch_macros.h", + "jxl/base/bits.h", + "jxl/base/byte_order.h", + "jxl/base/cache_aligned.cc", + "jxl/base/cache_aligned.h", + "jxl/base/compiler_specific.h", + "jxl/base/data_parallel.cc", + "jxl/base/data_parallel.h", + "jxl/base/float.h", + "jxl/base/iaca.h", + "jxl/base/os_macros.h", + "jxl/base/override.h", + "jxl/base/padded_bytes.cc", + "jxl/base/padded_bytes.h", + "jxl/base/printf_macros.h", + "jxl/base/random.cc", + "jxl/base/random.h", + "jxl/base/sanitizer_definitions.h", + "jxl/base/scope_guard.h", + "jxl/base/span.h", + "jxl/base/status.h", +] + +libjxl_codec_apng_sources = [ + "extras/dec/apng.cc", + "extras/dec/apng.h", + "extras/enc/apng.cc", + "extras/enc/apng.h", +] + +libjxl_codec_exr_sources = [ + "extras/dec/exr.cc", + "extras/dec/exr.h", + "extras/enc/exr.cc", + "extras/enc/exr.h", +] + +libjxl_codec_gif_sources = [ + "extras/dec/gif.cc", + "extras/dec/gif.h", +] + +libjxl_codec_jpegli_sources = [ + "extras/dec/jpegli.cc", + "extras/dec/jpegli.h", + "extras/enc/jpegli.cc", + "extras/enc/jpegli.h", +] + +libjxl_codec_jpg_sources = [ + "extras/dec/jpg.cc", + "extras/dec/jpg.h", + "extras/enc/jpg.cc", + "extras/enc/jpg.h", +] + +libjxl_codec_jxl_sources = [ + "extras/dec/jxl.cc", + "extras/dec/jxl.h", + "extras/enc/jxl.cc", + "extras/enc/jxl.h", +] + +libjxl_codec_npy_sources = [ + "extras/enc/npy.cc", + "extras/enc/npy.h", +] + +libjxl_codec_pgx_sources = [ + "extras/dec/pgx.cc", + "extras/dec/pgx.h", + "extras/enc/pgx.cc", + "extras/enc/pgx.h", +] + +libjxl_codec_pnm_sources = [ + "extras/dec/pnm.cc", + "extras/dec/pnm.h", + "extras/enc/pnm.cc", + "extras/enc/pnm.h", +] + +libjxl_dec_box_sources = [ + "jxl/box_content_decoder.cc", + "jxl/box_content_decoder.h", +] + +libjxl_dec_jpeg_sources = [ + "jxl/decode_to_jpeg.cc", + "jxl/decode_to_jpeg.h", + "jxl/jpeg/dec_jpeg_data.cc", + "jxl/jpeg/dec_jpeg_data.h", + "jxl/jpeg/dec_jpeg_data_writer.cc", + "jxl/jpeg/dec_jpeg_data_writer.h", + "jxl/jpeg/dec_jpeg_output_chunk.h", + "jxl/jpeg/dec_jpeg_serialization_state.h", + "jxl/jpeg/jpeg_data.cc", + "jxl/jpeg/jpeg_data.h", +] + +libjxl_dec_sources = [ + "jxl/ac_context.h", + "jxl/ac_strategy.cc", + "jxl/ac_strategy.h", + "jxl/alpha.cc", + "jxl/alpha.h", + "jxl/ans_common.cc", + "jxl/ans_common.h", + "jxl/ans_params.h", + "jxl/blending.cc", + "jxl/blending.h", + "jxl/chroma_from_luma.cc", + "jxl/chroma_from_luma.h", + "jxl/codec_in_out.h", + "jxl/coeff_order.cc", + "jxl/coeff_order.h", + "jxl/coeff_order_fwd.h", + "jxl/color_encoding_internal.cc", + "jxl/color_encoding_internal.h", + "jxl/color_management.cc", + "jxl/color_management.h", + "jxl/common.h", + "jxl/compressed_dc.cc", + "jxl/compressed_dc.h", + "jxl/convolve-inl.h", + "jxl/convolve.h", + "jxl/convolve_separable5.cc", + "jxl/convolve_separable7.cc", + "jxl/convolve_slow.cc", + "jxl/convolve_symmetric3.cc", + "jxl/convolve_symmetric5.cc", + "jxl/dct-inl.h", + "jxl/dct_block-inl.h", + "jxl/dct_scales.cc", + "jxl/dct_scales.h", + "jxl/dct_util.h", + "jxl/dec_ans.cc", + "jxl/dec_ans.h", + "jxl/dec_bit_reader.h", + "jxl/dec_cache.cc", + "jxl/dec_cache.h", + "jxl/dec_context_map.cc", + "jxl/dec_context_map.h", + "jxl/dec_external_image.cc", + "jxl/dec_external_image.h", + "jxl/dec_frame.cc", + "jxl/dec_frame.h", + "jxl/dec_group.cc", + "jxl/dec_group.h", + "jxl/dec_group_border.cc", + "jxl/dec_group_border.h", + "jxl/dec_huffman.cc", + "jxl/dec_huffman.h", + "jxl/dec_modular.cc", + "jxl/dec_modular.h", + "jxl/dec_noise.cc", + "jxl/dec_noise.h", + "jxl/dec_patch_dictionary.cc", + "jxl/dec_patch_dictionary.h", + "jxl/dec_tone_mapping-inl.h", + "jxl/dec_transforms-inl.h", + "jxl/dec_xyb-inl.h", + "jxl/dec_xyb.cc", + "jxl/dec_xyb.h", + "jxl/decode.cc", + "jxl/entropy_coder.cc", + "jxl/entropy_coder.h", + "jxl/epf.cc", + "jxl/epf.h", + "jxl/exif.h", + "jxl/fast_dct-inl.h", + "jxl/fast_dct.cc", + "jxl/fast_dct.h", + "jxl/fast_dct128-inl.h", + "jxl/fast_dct16-inl.h", + "jxl/fast_dct256-inl.h", + "jxl/fast_dct32-inl.h", + "jxl/fast_dct64-inl.h", + "jxl/fast_dct8-inl.h", + "jxl/fast_math-inl.h", + "jxl/field_encodings.h", + "jxl/fields.cc", + "jxl/fields.h", + "jxl/frame_header.cc", + "jxl/frame_header.h", + "jxl/gauss_blur.cc", + "jxl/gauss_blur.h", + "jxl/headers.cc", + "jxl/headers.h", + "jxl/huffman_table.cc", + "jxl/huffman_table.h", + "jxl/icc_codec.cc", + "jxl/icc_codec.h", + "jxl/icc_codec_common.cc", + "jxl/icc_codec_common.h", + "jxl/image.cc", + "jxl/image.h", + "jxl/image_bundle.cc", + "jxl/image_bundle.h", + "jxl/image_metadata.cc", + "jxl/image_metadata.h", + "jxl/image_ops.h", + "jxl/inverse_mtf-inl.h", + "jxl/lehmer_code.h", + "jxl/loop_filter.cc", + "jxl/loop_filter.h", + "jxl/luminance.cc", + "jxl/luminance.h", + "jxl/matrix_ops.h", + "jxl/memory_manager_internal.cc", + "jxl/memory_manager_internal.h", + "jxl/modular/encoding/context_predict.h", + "jxl/modular/encoding/dec_ma.cc", + "jxl/modular/encoding/dec_ma.h", + "jxl/modular/encoding/encoding.cc", + "jxl/modular/encoding/encoding.h", + "jxl/modular/encoding/ma_common.h", + "jxl/modular/modular_image.cc", + "jxl/modular/modular_image.h", + "jxl/modular/options.h", + "jxl/modular/transform/palette.cc", + "jxl/modular/transform/palette.h", + "jxl/modular/transform/rct.cc", + "jxl/modular/transform/rct.h", + "jxl/modular/transform/squeeze.cc", + "jxl/modular/transform/squeeze.h", + "jxl/modular/transform/transform.cc", + "jxl/modular/transform/transform.h", + "jxl/noise.h", + "jxl/opsin_params.cc", + "jxl/opsin_params.h", + "jxl/passes_state.cc", + "jxl/passes_state.h", + "jxl/patch_dictionary_internal.h", + "jxl/quant_weights.cc", + "jxl/quant_weights.h", + "jxl/quantizer-inl.h", + "jxl/quantizer.cc", + "jxl/quantizer.h", + "jxl/rational_polynomial-inl.h", + "jxl/render_pipeline/low_memory_render_pipeline.cc", + "jxl/render_pipeline/low_memory_render_pipeline.h", + "jxl/render_pipeline/render_pipeline.cc", + "jxl/render_pipeline/render_pipeline.h", + "jxl/render_pipeline/render_pipeline_stage.h", + "jxl/render_pipeline/simple_render_pipeline.cc", + "jxl/render_pipeline/simple_render_pipeline.h", + "jxl/render_pipeline/stage_blending.cc", + "jxl/render_pipeline/stage_blending.h", + "jxl/render_pipeline/stage_chroma_upsampling.cc", + "jxl/render_pipeline/stage_chroma_upsampling.h", + "jxl/render_pipeline/stage_epf.cc", + "jxl/render_pipeline/stage_epf.h", + "jxl/render_pipeline/stage_from_linear.cc", + "jxl/render_pipeline/stage_from_linear.h", + "jxl/render_pipeline/stage_gaborish.cc", + "jxl/render_pipeline/stage_gaborish.h", + "jxl/render_pipeline/stage_noise.cc", + "jxl/render_pipeline/stage_noise.h", + "jxl/render_pipeline/stage_patches.cc", + "jxl/render_pipeline/stage_patches.h", + "jxl/render_pipeline/stage_splines.cc", + "jxl/render_pipeline/stage_splines.h", + "jxl/render_pipeline/stage_spot.cc", + "jxl/render_pipeline/stage_spot.h", + "jxl/render_pipeline/stage_to_linear.cc", + "jxl/render_pipeline/stage_to_linear.h", + "jxl/render_pipeline/stage_tone_mapping.cc", + "jxl/render_pipeline/stage_tone_mapping.h", + "jxl/render_pipeline/stage_upsampling.cc", + "jxl/render_pipeline/stage_upsampling.h", + "jxl/render_pipeline/stage_write.cc", + "jxl/render_pipeline/stage_write.h", + "jxl/render_pipeline/stage_xyb.cc", + "jxl/render_pipeline/stage_xyb.h", + "jxl/render_pipeline/stage_ycbcr.cc", + "jxl/render_pipeline/stage_ycbcr.h", + "jxl/sanitizers.h", + "jxl/simd_util-inl.h", + "jxl/splines.cc", + "jxl/splines.h", + "jxl/toc.cc", + "jxl/toc.h", + "jxl/transfer_functions-inl.h", + "jxl/transpose-inl.h", + "jxl/xorshift128plus-inl.h", +] + +libjxl_enc_sources = [ + "jxl/butteraugli/butteraugli.cc", + "jxl/butteraugli/butteraugli.h", + "jxl/enc_ac_strategy.cc", + "jxl/enc_ac_strategy.h", + "jxl/enc_adaptive_quantization.cc", + "jxl/enc_adaptive_quantization.h", + "jxl/enc_ans.cc", + "jxl/enc_ans.h", + "jxl/enc_ans_params.h", + "jxl/enc_ar_control_field.cc", + "jxl/enc_ar_control_field.h", + "jxl/enc_aux_out.cc", + "jxl/enc_aux_out.h", + "jxl/enc_bit_writer.cc", + "jxl/enc_bit_writer.h", + "jxl/enc_butteraugli_comparator.cc", + "jxl/enc_butteraugli_comparator.h", + "jxl/enc_cache.cc", + "jxl/enc_cache.h", + "jxl/enc_chroma_from_luma.cc", + "jxl/enc_chroma_from_luma.h", + "jxl/enc_cluster.cc", + "jxl/enc_cluster.h", + "jxl/enc_coeff_order.cc", + "jxl/enc_coeff_order.h", + "jxl/enc_color_management.cc", + "jxl/enc_color_management.h", + "jxl/enc_comparator.cc", + "jxl/enc_comparator.h", + "jxl/enc_context_map.cc", + "jxl/enc_context_map.h", + "jxl/enc_debug_image.cc", + "jxl/enc_debug_image.h", + "jxl/enc_detect_dots.cc", + "jxl/enc_detect_dots.h", + "jxl/enc_dot_dictionary.cc", + "jxl/enc_dot_dictionary.h", + "jxl/enc_entropy_coder.cc", + "jxl/enc_entropy_coder.h", + "jxl/enc_external_image.cc", + "jxl/enc_external_image.h", + "jxl/enc_fast_lossless.cc", + "jxl/enc_fast_lossless.h", + "jxl/enc_fields.cc", + "jxl/enc_fields.h", + "jxl/enc_file.cc", + "jxl/enc_file.h", + "jxl/enc_frame.cc", + "jxl/enc_frame.h", + "jxl/enc_gaborish.cc", + "jxl/enc_gaborish.h", + "jxl/enc_gamma_correct.h", + "jxl/enc_group.cc", + "jxl/enc_group.h", + "jxl/enc_heuristics.cc", + "jxl/enc_heuristics.h", + "jxl/enc_huffman.cc", + "jxl/enc_huffman.h", + "jxl/enc_huffman_tree.cc", + "jxl/enc_huffman_tree.h", + "jxl/enc_icc_codec.cc", + "jxl/enc_icc_codec.h", + "jxl/enc_image_bundle.cc", + "jxl/enc_image_bundle.h", + "jxl/enc_jxl_skcms.h", + "jxl/enc_linalg.cc", + "jxl/enc_linalg.h", + "jxl/enc_modular.cc", + "jxl/enc_modular.h", + "jxl/enc_noise.cc", + "jxl/enc_noise.h", + "jxl/enc_optimize.cc", + "jxl/enc_optimize.h", + "jxl/enc_params.h", + "jxl/enc_patch_dictionary.cc", + "jxl/enc_patch_dictionary.h", + "jxl/enc_photon_noise.cc", + "jxl/enc_photon_noise.h", + "jxl/enc_progressive_split.cc", + "jxl/enc_progressive_split.h", + "jxl/enc_quant_weights.cc", + "jxl/enc_quant_weights.h", + "jxl/enc_splines.cc", + "jxl/enc_splines.h", + "jxl/enc_toc.cc", + "jxl/enc_toc.h", + "jxl/enc_transforms-inl.h", + "jxl/enc_transforms.cc", + "jxl/enc_transforms.h", + "jxl/enc_xyb.cc", + "jxl/enc_xyb.h", + "jxl/encode.cc", + "jxl/encode_internal.h", + "jxl/jpeg/enc_jpeg_data.cc", + "jxl/jpeg/enc_jpeg_data.h", + "jxl/jpeg/enc_jpeg_data_reader.cc", + "jxl/jpeg/enc_jpeg_data_reader.h", + "jxl/jpeg/enc_jpeg_huffman_decode.cc", + "jxl/jpeg/enc_jpeg_huffman_decode.h", + "jxl/modular/encoding/enc_debug_tree.cc", + "jxl/modular/encoding/enc_debug_tree.h", + "jxl/modular/encoding/enc_encoding.cc", + "jxl/modular/encoding/enc_encoding.h", + "jxl/modular/encoding/enc_ma.cc", + "jxl/modular/encoding/enc_ma.h", + "jxl/modular/transform/enc_palette.cc", + "jxl/modular/transform/enc_palette.h", + "jxl/modular/transform/enc_rct.cc", + "jxl/modular/transform/enc_rct.h", + "jxl/modular/transform/enc_squeeze.cc", + "jxl/modular/transform/enc_squeeze.h", + "jxl/modular/transform/enc_transform.cc", + "jxl/modular/transform/enc_transform.h", +] + +libjxl_extras_for_tools_sources = [ + "extras/codec.cc", + "extras/codec.h", + "extras/hlg.cc", + "extras/hlg.h", + "extras/metrics.cc", + "extras/metrics.h", + "extras/packed_image_convert.cc", + "extras/packed_image_convert.h", + "extras/tone_mapping.cc", + "extras/tone_mapping.h", +] + +libjxl_extras_sources = [ + "extras/dec/color_description.cc", + "extras/dec/color_description.h", + "extras/dec/color_hints.cc", + "extras/dec/color_hints.h", + "extras/dec/decode.cc", + "extras/dec/decode.h", + "extras/enc/encode.cc", + "extras/enc/encode.h", + "extras/exif.cc", + "extras/exif.h", + "extras/packed_image.h", + "extras/size_constraints.h", + "extras/time.cc", + "extras/time.h", +] + +libjxl_gbench_sources = [ + "extras/tone_mapping_gbench.cc", + "jxl/dec_external_image_gbench.cc", + "jxl/enc_external_image_gbench.cc", + "jxl/gauss_blur_gbench.cc", + "jxl/splines_gbench.cc", + "jxl/tf_gbench.cc", +] + +libjxl_jpegli_lib_version = 62 + +libjxl_jpegli_libjpeg_helper_files = [ + "jpegli/libjpeg_test_util.cc", + "jpegli/libjpeg_test_util.h", +] + +libjxl_jpegli_sources = [ + "jpegli/adaptive_quantization.cc", + "jpegli/adaptive_quantization.h", + "jpegli/bit_writer.cc", + "jpegli/bit_writer.h", + "jpegli/bitstream.cc", + "jpegli/bitstream.h", + "jpegli/color_quantize.cc", + "jpegli/color_quantize.h", + "jpegli/color_transform.cc", + "jpegli/color_transform.h", + "jpegli/common.cc", + "jpegli/common.h", + "jpegli/common_internal.h", + "jpegli/dct-inl.h", + "jpegli/decode.cc", + "jpegli/decode.h", + "jpegli/decode_internal.h", + "jpegli/decode_marker.cc", + "jpegli/decode_marker.h", + "jpegli/decode_scan.cc", + "jpegli/decode_scan.h", + "jpegli/destination_manager.cc", + "jpegli/downsample.cc", + "jpegli/downsample.h", + "jpegli/encode.cc", + "jpegli/encode.h", + "jpegli/encode_finish.cc", + "jpegli/encode_finish.h", + "jpegli/encode_internal.h", + "jpegli/encode_streaming.cc", + "jpegli/encode_streaming.h", + "jpegli/entropy_coding-inl.h", + "jpegli/entropy_coding.cc", + "jpegli/entropy_coding.h", + "jpegli/error.cc", + "jpegli/error.h", + "jpegli/huffman.cc", + "jpegli/huffman.h", + "jpegli/idct.cc", + "jpegli/idct.h", + "jpegli/input.cc", + "jpegli/input.h", + "jpegli/memory_manager.cc", + "jpegli/memory_manager.h", + "jpegli/quant.cc", + "jpegli/quant.h", + "jpegli/render.cc", + "jpegli/render.h", + "jpegli/simd.cc", + "jpegli/simd.h", + "jpegli/source_manager.cc", + "jpegli/transpose-inl.h", + "jpegli/types.h", + "jpegli/upsample.cc", + "jpegli/upsample.h", +] + +libjxl_jpegli_testlib_files = [ + "jpegli/test_params.h", + "jpegli/test_utils-inl.h", + "jpegli/test_utils.cc", + "jpegli/test_utils.h", +] + +libjxl_jpegli_tests = [ + "jpegli/decode_api_test.cc", + "jpegli/encode_api_test.cc", + "jpegli/error_handling_test.cc", + "jpegli/input_suspension_test.cc", + "jpegli/output_suspension_test.cc", + "jpegli/source_manager_test.cc", + "jpegli/streaming_test.cc", + "jpegli/transcode_api_test.cc", +] + +libjxl_jpegli_wrapper_sources = [ + "jpegli/libjpeg_wrapper.cc", +] + +libjxl_major_version = 0 + +libjxl_minor_version = 9 + +libjxl_patch_version = 0 + +libjxl_public_headers = [ + "include/jxl/cms_interface.h", + "include/jxl/codestream_header.h", + "include/jxl/color_encoding.h", + "include/jxl/decode.h", + "include/jxl/decode_cxx.h", + "include/jxl/encode.h", + "include/jxl/encode_cxx.h", + "include/jxl/memory_manager.h", + "include/jxl/parallel_runner.h", + "include/jxl/stats.h", + "include/jxl/types.h", +] + +libjxl_testlib_files = [ + "jxl/dct_for_test.h", + "jxl/dec_transforms_testonly.cc", + "jxl/dec_transforms_testonly.h", + "jxl/fake_parallel_runner_testonly.h", + "jxl/image_test_utils.h", + "jxl/render_pipeline/test_render_pipeline_stages.h", + "jxl/test_image.cc", + "jxl/test_image.h", + "jxl/test_utils.cc", + "jxl/test_utils.h", +] + +libjxl_tests = [ + "extras/codec_test.cc", + "extras/dec/color_description_test.cc", + "extras/dec/pgx_test.cc", + "extras/jpegli_test.cc", + "jxl/ac_strategy_test.cc", + "jxl/alpha_test.cc", + "jxl/ans_common_test.cc", + "jxl/ans_test.cc", + "jxl/bit_reader_test.cc", + "jxl/bits_test.cc", + "jxl/blending_test.cc", + "jxl/byte_order_test.cc", + "jxl/coeff_order_test.cc", + "jxl/color_encoding_internal_test.cc", + "jxl/color_management_test.cc", + "jxl/convolve_test.cc", + "jxl/data_parallel_test.cc", + "jxl/dct_test.cc", + "jxl/decode_test.cc", + "jxl/enc_external_image_test.cc", + "jxl/enc_gaborish_test.cc", + "jxl/enc_linalg_test.cc", + "jxl/enc_optimize_test.cc", + "jxl/enc_photon_noise_test.cc", + "jxl/encode_test.cc", + "jxl/entropy_coder_test.cc", + "jxl/fast_dct_test.cc", + "jxl/fast_math_test.cc", + "jxl/fields_test.cc", + "jxl/gamma_correct_test.cc", + "jxl/gauss_blur_test.cc", + "jxl/gradient_test.cc", + "jxl/iaca_test.cc", + "jxl/icc_codec_test.cc", + "jxl/image_bundle_test.cc", + "jxl/image_ops_test.cc", + "jxl/jxl_test.cc", + "jxl/lehmer_code_test.cc", + "jxl/modular_test.cc", + "jxl/opsin_image_test.cc", + "jxl/opsin_inverse_test.cc", + "jxl/padded_bytes_test.cc", + "jxl/passes_test.cc", + "jxl/patch_dictionary_test.cc", + "jxl/preview_test.cc", + "jxl/quant_weights_test.cc", + "jxl/quantizer_test.cc", + "jxl/rational_polynomial_test.cc", + "jxl/render_pipeline/render_pipeline_test.cc", + "jxl/roundtrip_test.cc", + "jxl/simd_util_test.cc", + "jxl/speed_tier_test.cc", + "jxl/splines_test.cc", + "jxl/toc_test.cc", + "jxl/xorshift128plus_test.cc", + "threads/thread_parallel_runner_test.cc", +] + +libjxl_threads_public_headers = [ + "include/jxl/resizable_parallel_runner.h", + "include/jxl/resizable_parallel_runner_cxx.h", + "include/jxl/thread_parallel_runner.h", + "include/jxl/thread_parallel_runner_cxx.h", +] + +libjxl_threads_sources = [ + "threads/resizable_parallel_runner.cc", + "threads/thread_parallel_runner.cc", + "threads/thread_parallel_runner_internal.cc", + "threads/thread_parallel_runner_internal.h", +] diff --git a/third-party/libjxl/libjxl/lib/jxl_lists.cmake b/third-party/libjxl/libjxl/lib/jxl_lists.cmake new file mode 100644 index 0000000000..7813c46618 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_lists.cmake @@ -0,0 +1,636 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# This file is generated, do not modify by manually. +# Run `tools/scripts/build_cleaner.py --update` to regenerate it. + +set(JPEGXL_INTERNAL_BASE_SOURCES + jxl/base/arch_macros.h + jxl/base/bits.h + jxl/base/byte_order.h + jxl/base/cache_aligned.cc + jxl/base/cache_aligned.h + jxl/base/compiler_specific.h + jxl/base/data_parallel.cc + jxl/base/data_parallel.h + jxl/base/float.h + jxl/base/iaca.h + jxl/base/os_macros.h + jxl/base/override.h + jxl/base/padded_bytes.cc + jxl/base/padded_bytes.h + jxl/base/printf_macros.h + jxl/base/random.cc + jxl/base/random.h + jxl/base/sanitizer_definitions.h + jxl/base/scope_guard.h + jxl/base/span.h + jxl/base/status.h +) + +set(JPEGXL_INTERNAL_CODEC_APNG_SOURCES + extras/dec/apng.cc + extras/dec/apng.h + extras/enc/apng.cc + extras/enc/apng.h +) + +set(JPEGXL_INTERNAL_CODEC_EXR_SOURCES + extras/dec/exr.cc + extras/dec/exr.h + extras/enc/exr.cc + extras/enc/exr.h +) + +set(JPEGXL_INTERNAL_CODEC_GIF_SOURCES + extras/dec/gif.cc + extras/dec/gif.h +) + +set(JPEGXL_INTERNAL_CODEC_JPEGLI_SOURCES + extras/dec/jpegli.cc + extras/dec/jpegli.h + extras/enc/jpegli.cc + extras/enc/jpegli.h +) + +set(JPEGXL_INTERNAL_CODEC_JPG_SOURCES + extras/dec/jpg.cc + extras/dec/jpg.h + extras/enc/jpg.cc + extras/enc/jpg.h +) + +set(JPEGXL_INTERNAL_CODEC_JXL_SOURCES + extras/dec/jxl.cc + extras/dec/jxl.h + extras/enc/jxl.cc + extras/enc/jxl.h +) + +set(JPEGXL_INTERNAL_CODEC_NPY_SOURCES + extras/enc/npy.cc + extras/enc/npy.h +) + +set(JPEGXL_INTERNAL_CODEC_PGX_SOURCES + extras/dec/pgx.cc + extras/dec/pgx.h + extras/enc/pgx.cc + extras/enc/pgx.h +) + +set(JPEGXL_INTERNAL_CODEC_PNM_SOURCES + extras/dec/pnm.cc + extras/dec/pnm.h + extras/enc/pnm.cc + extras/enc/pnm.h +) + +set(JPEGXL_INTERNAL_DEC_BOX_SOURCES + jxl/box_content_decoder.cc + jxl/box_content_decoder.h +) + +set(JPEGXL_INTERNAL_DEC_JPEG_SOURCES + jxl/decode_to_jpeg.cc + jxl/decode_to_jpeg.h + jxl/jpeg/dec_jpeg_data.cc + jxl/jpeg/dec_jpeg_data.h + jxl/jpeg/dec_jpeg_data_writer.cc + jxl/jpeg/dec_jpeg_data_writer.h + jxl/jpeg/dec_jpeg_output_chunk.h + jxl/jpeg/dec_jpeg_serialization_state.h + jxl/jpeg/jpeg_data.cc + jxl/jpeg/jpeg_data.h +) + +set(JPEGXL_INTERNAL_DEC_SOURCES + jxl/ac_context.h + jxl/ac_strategy.cc + jxl/ac_strategy.h + jxl/alpha.cc + jxl/alpha.h + jxl/ans_common.cc + jxl/ans_common.h + jxl/ans_params.h + jxl/blending.cc + jxl/blending.h + jxl/chroma_from_luma.cc + jxl/chroma_from_luma.h + jxl/codec_in_out.h + jxl/coeff_order.cc + jxl/coeff_order.h + jxl/coeff_order_fwd.h + jxl/color_encoding_internal.cc + jxl/color_encoding_internal.h + jxl/color_management.cc + jxl/color_management.h + jxl/common.h + jxl/compressed_dc.cc + jxl/compressed_dc.h + jxl/convolve-inl.h + jxl/convolve.h + jxl/convolve_separable5.cc + jxl/convolve_separable7.cc + jxl/convolve_slow.cc + jxl/convolve_symmetric3.cc + jxl/convolve_symmetric5.cc + jxl/dct-inl.h + jxl/dct_block-inl.h + jxl/dct_scales.cc + jxl/dct_scales.h + jxl/dct_util.h + jxl/dec_ans.cc + jxl/dec_ans.h + jxl/dec_bit_reader.h + jxl/dec_cache.cc + jxl/dec_cache.h + jxl/dec_context_map.cc + jxl/dec_context_map.h + jxl/dec_external_image.cc + jxl/dec_external_image.h + jxl/dec_frame.cc + jxl/dec_frame.h + jxl/dec_group.cc + jxl/dec_group.h + jxl/dec_group_border.cc + jxl/dec_group_border.h + jxl/dec_huffman.cc + jxl/dec_huffman.h + jxl/dec_modular.cc + jxl/dec_modular.h + jxl/dec_noise.cc + jxl/dec_noise.h + jxl/dec_patch_dictionary.cc + jxl/dec_patch_dictionary.h + jxl/dec_tone_mapping-inl.h + jxl/dec_transforms-inl.h + jxl/dec_xyb-inl.h + jxl/dec_xyb.cc + jxl/dec_xyb.h + jxl/decode.cc + jxl/entropy_coder.cc + jxl/entropy_coder.h + jxl/epf.cc + jxl/epf.h + jxl/exif.h + jxl/fast_dct-inl.h + jxl/fast_dct.cc + jxl/fast_dct.h + jxl/fast_dct128-inl.h + jxl/fast_dct16-inl.h + jxl/fast_dct256-inl.h + jxl/fast_dct32-inl.h + jxl/fast_dct64-inl.h + jxl/fast_dct8-inl.h + jxl/fast_math-inl.h + jxl/field_encodings.h + jxl/fields.cc + jxl/fields.h + jxl/frame_header.cc + jxl/frame_header.h + jxl/gauss_blur.cc + jxl/gauss_blur.h + jxl/headers.cc + jxl/headers.h + jxl/huffman_table.cc + jxl/huffman_table.h + jxl/icc_codec.cc + jxl/icc_codec.h + jxl/icc_codec_common.cc + jxl/icc_codec_common.h + jxl/image.cc + jxl/image.h + jxl/image_bundle.cc + jxl/image_bundle.h + jxl/image_metadata.cc + jxl/image_metadata.h + jxl/image_ops.h + jxl/inverse_mtf-inl.h + jxl/lehmer_code.h + jxl/loop_filter.cc + jxl/loop_filter.h + jxl/luminance.cc + jxl/luminance.h + jxl/matrix_ops.h + jxl/memory_manager_internal.cc + jxl/memory_manager_internal.h + jxl/modular/encoding/context_predict.h + jxl/modular/encoding/dec_ma.cc + jxl/modular/encoding/dec_ma.h + jxl/modular/encoding/encoding.cc + jxl/modular/encoding/encoding.h + jxl/modular/encoding/ma_common.h + jxl/modular/modular_image.cc + jxl/modular/modular_image.h + jxl/modular/options.h + jxl/modular/transform/palette.cc + jxl/modular/transform/palette.h + jxl/modular/transform/rct.cc + jxl/modular/transform/rct.h + jxl/modular/transform/squeeze.cc + jxl/modular/transform/squeeze.h + jxl/modular/transform/transform.cc + jxl/modular/transform/transform.h + jxl/noise.h + jxl/opsin_params.cc + jxl/opsin_params.h + jxl/passes_state.cc + jxl/passes_state.h + jxl/patch_dictionary_internal.h + jxl/quant_weights.cc + jxl/quant_weights.h + jxl/quantizer-inl.h + jxl/quantizer.cc + jxl/quantizer.h + jxl/rational_polynomial-inl.h + jxl/render_pipeline/low_memory_render_pipeline.cc + jxl/render_pipeline/low_memory_render_pipeline.h + jxl/render_pipeline/render_pipeline.cc + jxl/render_pipeline/render_pipeline.h + jxl/render_pipeline/render_pipeline_stage.h + jxl/render_pipeline/simple_render_pipeline.cc + jxl/render_pipeline/simple_render_pipeline.h + jxl/render_pipeline/stage_blending.cc + jxl/render_pipeline/stage_blending.h + jxl/render_pipeline/stage_chroma_upsampling.cc + jxl/render_pipeline/stage_chroma_upsampling.h + jxl/render_pipeline/stage_epf.cc + jxl/render_pipeline/stage_epf.h + jxl/render_pipeline/stage_from_linear.cc + jxl/render_pipeline/stage_from_linear.h + jxl/render_pipeline/stage_gaborish.cc + jxl/render_pipeline/stage_gaborish.h + jxl/render_pipeline/stage_noise.cc + jxl/render_pipeline/stage_noise.h + jxl/render_pipeline/stage_patches.cc + jxl/render_pipeline/stage_patches.h + jxl/render_pipeline/stage_splines.cc + jxl/render_pipeline/stage_splines.h + jxl/render_pipeline/stage_spot.cc + jxl/render_pipeline/stage_spot.h + jxl/render_pipeline/stage_to_linear.cc + jxl/render_pipeline/stage_to_linear.h + jxl/render_pipeline/stage_tone_mapping.cc + jxl/render_pipeline/stage_tone_mapping.h + jxl/render_pipeline/stage_upsampling.cc + jxl/render_pipeline/stage_upsampling.h + jxl/render_pipeline/stage_write.cc + jxl/render_pipeline/stage_write.h + jxl/render_pipeline/stage_xyb.cc + jxl/render_pipeline/stage_xyb.h + jxl/render_pipeline/stage_ycbcr.cc + jxl/render_pipeline/stage_ycbcr.h + jxl/sanitizers.h + jxl/simd_util-inl.h + jxl/splines.cc + jxl/splines.h + jxl/toc.cc + jxl/toc.h + jxl/transfer_functions-inl.h + jxl/transpose-inl.h + jxl/xorshift128plus-inl.h +) + +set(JPEGXL_INTERNAL_ENC_SOURCES + jxl/butteraugli/butteraugli.cc + jxl/butteraugli/butteraugli.h + jxl/enc_ac_strategy.cc + jxl/enc_ac_strategy.h + jxl/enc_adaptive_quantization.cc + jxl/enc_adaptive_quantization.h + jxl/enc_ans.cc + jxl/enc_ans.h + jxl/enc_ans_params.h + jxl/enc_ar_control_field.cc + jxl/enc_ar_control_field.h + jxl/enc_aux_out.cc + jxl/enc_aux_out.h + jxl/enc_bit_writer.cc + jxl/enc_bit_writer.h + jxl/enc_butteraugli_comparator.cc + jxl/enc_butteraugli_comparator.h + jxl/enc_cache.cc + jxl/enc_cache.h + jxl/enc_chroma_from_luma.cc + jxl/enc_chroma_from_luma.h + jxl/enc_cluster.cc + jxl/enc_cluster.h + jxl/enc_coeff_order.cc + jxl/enc_coeff_order.h + jxl/enc_color_management.cc + jxl/enc_color_management.h + jxl/enc_comparator.cc + jxl/enc_comparator.h + jxl/enc_context_map.cc + jxl/enc_context_map.h + jxl/enc_debug_image.cc + jxl/enc_debug_image.h + jxl/enc_detect_dots.cc + jxl/enc_detect_dots.h + jxl/enc_dot_dictionary.cc + jxl/enc_dot_dictionary.h + jxl/enc_entropy_coder.cc + jxl/enc_entropy_coder.h + jxl/enc_external_image.cc + jxl/enc_external_image.h + jxl/enc_fast_lossless.cc + jxl/enc_fast_lossless.h + jxl/enc_fields.cc + jxl/enc_fields.h + jxl/enc_file.cc + jxl/enc_file.h + jxl/enc_frame.cc + jxl/enc_frame.h + jxl/enc_gaborish.cc + jxl/enc_gaborish.h + jxl/enc_gamma_correct.h + jxl/enc_group.cc + jxl/enc_group.h + jxl/enc_heuristics.cc + jxl/enc_heuristics.h + jxl/enc_huffman.cc + jxl/enc_huffman.h + jxl/enc_huffman_tree.cc + jxl/enc_huffman_tree.h + jxl/enc_icc_codec.cc + jxl/enc_icc_codec.h + jxl/enc_image_bundle.cc + jxl/enc_image_bundle.h + jxl/enc_jxl_skcms.h + jxl/enc_linalg.cc + jxl/enc_linalg.h + jxl/enc_modular.cc + jxl/enc_modular.h + jxl/enc_noise.cc + jxl/enc_noise.h + jxl/enc_optimize.cc + jxl/enc_optimize.h + jxl/enc_params.h + jxl/enc_patch_dictionary.cc + jxl/enc_patch_dictionary.h + jxl/enc_photon_noise.cc + jxl/enc_photon_noise.h + jxl/enc_progressive_split.cc + jxl/enc_progressive_split.h + jxl/enc_quant_weights.cc + jxl/enc_quant_weights.h + jxl/enc_splines.cc + jxl/enc_splines.h + jxl/enc_toc.cc + jxl/enc_toc.h + jxl/enc_transforms-inl.h + jxl/enc_transforms.cc + jxl/enc_transforms.h + jxl/enc_xyb.cc + jxl/enc_xyb.h + jxl/encode.cc + jxl/encode_internal.h + jxl/jpeg/enc_jpeg_data.cc + jxl/jpeg/enc_jpeg_data.h + jxl/jpeg/enc_jpeg_data_reader.cc + jxl/jpeg/enc_jpeg_data_reader.h + jxl/jpeg/enc_jpeg_huffman_decode.cc + jxl/jpeg/enc_jpeg_huffman_decode.h + jxl/modular/encoding/enc_debug_tree.cc + jxl/modular/encoding/enc_debug_tree.h + jxl/modular/encoding/enc_encoding.cc + jxl/modular/encoding/enc_encoding.h + jxl/modular/encoding/enc_ma.cc + jxl/modular/encoding/enc_ma.h + jxl/modular/transform/enc_palette.cc + jxl/modular/transform/enc_palette.h + jxl/modular/transform/enc_rct.cc + jxl/modular/transform/enc_rct.h + jxl/modular/transform/enc_squeeze.cc + jxl/modular/transform/enc_squeeze.h + jxl/modular/transform/enc_transform.cc + jxl/modular/transform/enc_transform.h +) + +set(JPEGXL_INTERNAL_EXTRAS_FOR_TOOLS_SOURCES + extras/codec.cc + extras/codec.h + extras/hlg.cc + extras/hlg.h + extras/metrics.cc + extras/metrics.h + extras/packed_image_convert.cc + extras/packed_image_convert.h + extras/tone_mapping.cc + extras/tone_mapping.h +) + +set(JPEGXL_INTERNAL_EXTRAS_SOURCES + extras/dec/color_description.cc + extras/dec/color_description.h + extras/dec/color_hints.cc + extras/dec/color_hints.h + extras/dec/decode.cc + extras/dec/decode.h + extras/enc/encode.cc + extras/enc/encode.h + extras/exif.cc + extras/exif.h + extras/packed_image.h + extras/size_constraints.h + extras/time.cc + extras/time.h +) + +set(JPEGXL_INTERNAL_GBENCH_SOURCES + extras/tone_mapping_gbench.cc + jxl/dec_external_image_gbench.cc + jxl/enc_external_image_gbench.cc + jxl/gauss_blur_gbench.cc + jxl/splines_gbench.cc + jxl/tf_gbench.cc +) + +set(JPEGXL_INTERNAL_JPEGLI_LIBJPEG_HELPER_FILES + jpegli/libjpeg_test_util.cc + jpegli/libjpeg_test_util.h +) + +set(JPEGXL_INTERNAL_JPEGLI_SOURCES + jpegli/adaptive_quantization.cc + jpegli/adaptive_quantization.h + jpegli/bit_writer.cc + jpegli/bit_writer.h + jpegli/bitstream.cc + jpegli/bitstream.h + jpegli/color_quantize.cc + jpegli/color_quantize.h + jpegli/color_transform.cc + jpegli/color_transform.h + jpegli/common.cc + jpegli/common.h + jpegli/common_internal.h + jpegli/dct-inl.h + jpegli/decode.cc + jpegli/decode.h + jpegli/decode_internal.h + jpegli/decode_marker.cc + jpegli/decode_marker.h + jpegli/decode_scan.cc + jpegli/decode_scan.h + jpegli/destination_manager.cc + jpegli/downsample.cc + jpegli/downsample.h + jpegli/encode.cc + jpegli/encode.h + jpegli/encode_finish.cc + jpegli/encode_finish.h + jpegli/encode_internal.h + jpegli/encode_streaming.cc + jpegli/encode_streaming.h + jpegli/entropy_coding-inl.h + jpegli/entropy_coding.cc + jpegli/entropy_coding.h + jpegli/error.cc + jpegli/error.h + jpegli/huffman.cc + jpegli/huffman.h + jpegli/idct.cc + jpegli/idct.h + jpegli/input.cc + jpegli/input.h + jpegli/memory_manager.cc + jpegli/memory_manager.h + jpegli/quant.cc + jpegli/quant.h + jpegli/render.cc + jpegli/render.h + jpegli/simd.cc + jpegli/simd.h + jpegli/source_manager.cc + jpegli/transpose-inl.h + jpegli/types.h + jpegli/upsample.cc + jpegli/upsample.h +) + +set(JPEGXL_INTERNAL_JPEGLI_TESTLIB_FILES + jpegli/test_params.h + jpegli/test_utils-inl.h + jpegli/test_utils.cc + jpegli/test_utils.h +) + +set(JPEGXL_INTERNAL_JPEGLI_TESTS + jpegli/decode_api_test.cc + jpegli/encode_api_test.cc + jpegli/error_handling_test.cc + jpegli/input_suspension_test.cc + jpegli/output_suspension_test.cc + jpegli/source_manager_test.cc + jpegli/streaming_test.cc + jpegli/transcode_api_test.cc +) + +set(JPEGXL_INTERNAL_JPEGLI_WRAPPER_SOURCES + jpegli/libjpeg_wrapper.cc +) + +set(JPEGXL_INTERNAL_PUBLIC_HEADERS + include/jxl/cms_interface.h + include/jxl/codestream_header.h + include/jxl/color_encoding.h + include/jxl/decode.h + include/jxl/decode_cxx.h + include/jxl/encode.h + include/jxl/encode_cxx.h + include/jxl/memory_manager.h + include/jxl/parallel_runner.h + include/jxl/stats.h + include/jxl/types.h +) + +set(JPEGXL_INTERNAL_TESTLIB_FILES + jxl/dct_for_test.h + jxl/dec_transforms_testonly.cc + jxl/dec_transforms_testonly.h + jxl/fake_parallel_runner_testonly.h + jxl/image_test_utils.h + jxl/render_pipeline/test_render_pipeline_stages.h + jxl/test_image.cc + jxl/test_image.h + jxl/test_utils.cc + jxl/test_utils.h +) + +set(JPEGXL_INTERNAL_TESTS + extras/codec_test.cc + extras/dec/color_description_test.cc + extras/dec/pgx_test.cc + extras/jpegli_test.cc + jxl/ac_strategy_test.cc + jxl/alpha_test.cc + jxl/ans_common_test.cc + jxl/ans_test.cc + jxl/bit_reader_test.cc + jxl/bits_test.cc + jxl/blending_test.cc + jxl/byte_order_test.cc + jxl/coeff_order_test.cc + jxl/color_encoding_internal_test.cc + jxl/color_management_test.cc + jxl/convolve_test.cc + jxl/data_parallel_test.cc + jxl/dct_test.cc + jxl/decode_test.cc + jxl/enc_external_image_test.cc + jxl/enc_gaborish_test.cc + jxl/enc_linalg_test.cc + jxl/enc_optimize_test.cc + jxl/enc_photon_noise_test.cc + jxl/encode_test.cc + jxl/entropy_coder_test.cc + jxl/fast_dct_test.cc + jxl/fast_math_test.cc + jxl/fields_test.cc + jxl/gamma_correct_test.cc + jxl/gauss_blur_test.cc + jxl/gradient_test.cc + jxl/iaca_test.cc + jxl/icc_codec_test.cc + jxl/image_bundle_test.cc + jxl/image_ops_test.cc + jxl/jxl_test.cc + jxl/lehmer_code_test.cc + jxl/modular_test.cc + jxl/opsin_image_test.cc + jxl/opsin_inverse_test.cc + jxl/padded_bytes_test.cc + jxl/passes_test.cc + jxl/patch_dictionary_test.cc + jxl/preview_test.cc + jxl/quant_weights_test.cc + jxl/quantizer_test.cc + jxl/rational_polynomial_test.cc + jxl/render_pipeline/render_pipeline_test.cc + jxl/roundtrip_test.cc + jxl/simd_util_test.cc + jxl/speed_tier_test.cc + jxl/splines_test.cc + jxl/toc_test.cc + jxl/xorshift128plus_test.cc + threads/thread_parallel_runner_test.cc +) + +set(JPEGXL_INTERNAL_THREADS_PUBLIC_HEADERS + include/jxl/resizable_parallel_runner.h + include/jxl/resizable_parallel_runner_cxx.h + include/jxl/thread_parallel_runner.h + include/jxl/thread_parallel_runner_cxx.h +) + +set(JPEGXL_INTERNAL_THREADS_SOURCES + threads/resizable_parallel_runner.cc + threads/thread_parallel_runner.cc + threads/thread_parallel_runner_internal.cc + threads/thread_parallel_runner_internal.h +) diff --git a/third-party/libjxl/libjxl/lib/jxl_tests.cmake b/third-party/libjxl/libjxl/lib/jxl_tests.cmake new file mode 100644 index 0000000000..88c5a89f5c --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_tests.cmake @@ -0,0 +1,84 @@ +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +include(compatibility.cmake) +include(jxl_lists.cmake) + +if(BUILD_TESTING OR JPEGXL_ENABLE_TOOLS) +# Library with test-only code shared between all tests / fuzzers. +add_library(jxl_testlib-static STATIC ${JPEGXL_INTERNAL_TESTLIB_FILES}) +target_compile_options(jxl_testlib-static PRIVATE + ${JPEGXL_INTERNAL_FLAGS} + ${JPEGXL_COVERAGE_FLAGS} +) +target_compile_definitions(jxl_testlib-static PUBLIC + -DTEST_DATA_PATH="${JPEGXL_TEST_DATA_PATH}") +target_include_directories(jxl_testlib-static PUBLIC + "${PROJECT_SOURCE_DIR}" +) +target_link_libraries(jxl_testlib-static + hwy + jxl_extras_nocodec-static + jxl-static +) +endif() + +if(NOT BUILD_TESTING) + return() +endif() + +list(APPEND JPEGXL_INTERNAL_TESTS + # TODO(deymo): Move this to tools/ + ../tools/box/box_test.cc + ../tools/djxl_fuzzer_test.cc +) + +find_package(GTest) + +# Individual test binaries: +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/tests) +foreach (TESTFILE IN LISTS JPEGXL_INTERNAL_TESTS) + # The TESTNAME is the name without the extension or directory. + get_filename_component(TESTNAME ${TESTFILE} NAME_WE) + if(TESTFILE STREQUAL ../tools/djxl_fuzzer_test.cc) + add_executable(${TESTNAME} ${TESTFILE} ../tools/djxl_fuzzer.cc) + else() + add_executable(${TESTNAME} ${TESTFILE}) + endif() + if(JPEGXL_EMSCRIPTEN) + # The emscripten linking step takes too much memory and crashes during the + # wasm-opt step when using -O2 optimization level + set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "\ + -O1 \ + -s USE_LIBPNG=1 \ + -s ALLOW_MEMORY_GROWTH=1 \ + -s SINGLE_FILE=1 \ + -s PROXY_TO_PTHREAD \ + -s EXIT_RUNTIME=1 \ + -s USE_PTHREADS=1 \ + -s NODERAWFS=1 \ + ") + else() + set_target_properties(${TESTNAME} PROPERTIES LINK_FLAGS "${JPEGXL_COVERAGE_LINK_FLAGS}") + endif() + target_compile_options(${TESTNAME} PRIVATE + ${JPEGXL_INTERNAL_FLAGS} + # Add coverage flags to the test binary so code in the private headers of + # the library is also instrumented when running tests that execute it. + ${JPEGXL_COVERAGE_FLAGS} + ) + target_link_libraries(${TESTNAME} + box + gmock + GTest::GTest + GTest::Main + jxl_extras-static + jxl_testlib-static + ) + # Output test targets in the test directory. + set_target_properties(${TESTNAME} PROPERTIES PREFIX "tests/") + if (WIN32 AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set_target_properties(${TESTNAME} PROPERTIES COMPILE_FLAGS "-Wno-error") + endif () + jxl_discover_tests(${TESTNAME}) +endforeach () diff --git a/third-party/libjxl/libjxl/lib/jxl_threads.cmake b/third-party/libjxl/libjxl/lib/jxl_threads.cmake new file mode 100644 index 0000000000..2f5ac17c83 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_threads.cmake @@ -0,0 +1,120 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +find_package(Threads REQUIRED) + +include(jxl_lists.cmake) + +### Define the jxl_threads shared or static target library. The ${target} +# parameter should already be created with add_library(), but this function +# sets all the remaining common properties. +function(_set_jxl_threads _target) + target_compile_options(${_target} PRIVATE ${JPEGXL_INTERNAL_FLAGS}) + target_compile_options(${_target} PUBLIC ${JPEGXL_COVERAGE_FLAGS}) + set_property(TARGET ${_target} PROPERTY POSITION_INDEPENDENT_CODE ON) + + target_include_directories(${_target} + PRIVATE + "${PROJECT_SOURCE_DIR}" + PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/include" + "${CMAKE_CURRENT_BINARY_DIR}/include") + + target_link_libraries(${_target} + PUBLIC ${JPEGXL_COVERAGE_FLAGS} Threads::Threads + ) + + set_target_properties(${_target} PROPERTIES + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 + DEFINE_SYMBOL JXL_THREADS_INTERNAL_LIBRARY_BUILD + ) + + # Always install the library as jxl_threads.{a,so} file without the "-static" + # suffix, except in Windows. + if (NOT WIN32 OR MINGW) + set_target_properties(${_target} PROPERTIES OUTPUT_NAME "jxl_threads") + endif() + install(TARGETS ${_target} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endfunction() + +### Static library. +add_library(jxl_threads-static STATIC ${JPEGXL_INTERNAL_THREADS_SOURCES}) +_set_jxl_threads(jxl_threads-static) + +# Make jxl_threads symbols neither imported nor exported when using the static +# library. These will have hidden visibility anyway in the static library case +# in unix. +target_compile_definitions(jxl_threads-static + PUBLIC -DJXL_THREADS_STATIC_DEFINE) + + +### Public shared library. +if (BUILD_SHARED_LIBS) +add_library(jxl_threads SHARED ${JPEGXL_INTERNAL_THREADS_SOURCES}) +_set_jxl_threads(jxl_threads) + +set_target_properties(jxl_threads PROPERTIES + VERSION ${JPEGXL_LIBRARY_VERSION} + SOVERSION ${JPEGXL_LIBRARY_SOVERSION} + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}" + RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") + + set_target_properties(jxl_threads PROPERTIES + LINK_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version) + if(APPLE) + set_property(TARGET ${target} APPEND_STRING PROPERTY + LINK_FLAGS "-Wl,-exported_symbols_list,${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl_osx.syms") + elseif(WIN32) + # Nothing needed here, we use __declspec(dllexport) (jxl_threads_export.h) + else() + set_property(TARGET jxl_threads APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/jxl/jxl.version") + endif() # APPLE + +# Compile the shared library such that the JXL_THREADS_EXPORT symbols are +# exported. Users of the library will not set this flag and therefore import +# those symbols. +target_compile_definitions(jxl_threads + PRIVATE -DJXL_THREADS_INTERNAL_LIBRARY_BUILD) + +# Generate the jxl/jxl_threads_export.h header, we only need to generate it once +# but we can use it from both libraries. +generate_export_header(jxl_threads + BASE_NAME JXL_THREADS + EXPORT_FILE_NAME include/jxl/jxl_threads_export.h) +else() +add_library(jxl_threads ALIAS jxl_threads-static) +# When not building the shared library generate the jxl_threads_export.h header +# only based on the static target. +generate_export_header(jxl_threads-static + BASE_NAME JXL_THREADS + EXPORT_FILE_NAME include/jxl/jxl_threads_export.h) +endif() # BUILD_SHARED_LIBS + + +### Add a pkg-config file for libjxl_threads. + +# Allow adding prefix if CMAKE_INSTALL_INCLUDEDIR not absolute. +if(IS_ABSOLUTE "${CMAKE_INSTALL_INCLUDEDIR}") + set(PKGCONFIG_TARGET_INCLUDES "${CMAKE_INSTALL_INCLUDEDIR}") +else() + set(PKGCONFIG_TARGET_INCLUDES "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +endif() +# Allow adding prefix if CMAKE_INSTALL_LIBDIR not absolute. +if(IS_ABSOLUTE "${CMAKE_INSTALL_LIBDIR}") + set(PKGCONFIG_TARGET_LIBS "${CMAKE_INSTALL_LIBDIR}") +else() + set(PKGCONFIG_TARGET_LIBS "\${exec_prefix}/${CMAKE_INSTALL_LIBDIR}") +endif() + +set(JPEGXL_THREADS_LIBRARY_REQUIRES "") +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/threads/libjxl_threads.pc.in" + "libjxl_threads.pc" @ONLY) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libjxl_threads.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") diff --git a/third-party/libjxl/libjxl/lib/jxl_vars.bzl b/third-party/libjxl/libjxl/lib/jxl_vars.bzl new file mode 100644 index 0000000000..7efa84cc44 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/jxl_vars.bzl @@ -0,0 +1,46 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +# Extra build variables. + +libjxl_root_package = "__main__" + +libjxl_deps_brotli = ["@brotli//:brotlidec", "@brotli//:brotlienc"] +libjxl_deps_gif = ["@gif//:gif"] +libjxl_deps_gtest = ["@googletest//:gtest_main"] +libjxl_deps_hwy = ["@highway//:hwy"] +libjxl_deps_hwy_nanobenchmark = ["@highway//:nanobenchmark"] +libjxl_deps_hwy_test_util = ["@highway//:hwy_test_util"] +libjxl_deps_jpeg = ["@libjpeg_turbo//:jpeg"] +libjxl_deps_jxl_box = ["//tools:box"] +libjxl_deps_exr = ["@openexr//:OpenEXR"] +libjxl_deps_png = ["@png//:png"] +libjxl_deps_runfiles = ["@bazel_tools//tools/cpp/runfiles"] +libjxl_deps_skcms = ["@skcms//:skcms"] +libjxl_deps_testdata = ["//:testdata"] + +libjxl_test_shards = { + "jpegli/decode_api_test": 10, + "jpegli/encode_api_test": 4, + "jpegli/input_suspension_test": 6, + "jpegli/output_suspension_test": 2, + "jxl/ans_test": 2, + "jxl/linalg_test": 2, + "jxl/modular_test": 4, + "jxl/roundtrip_test": 4, + "jxl/xorshift128plus_test": 2, + "jxl/ac_strategy_test": 10, # TODO(eustas): separate heavy shard + "jxl/dct_test": 32, + "jxl/decode_test": 10, # TODO(eustas): separate heavy shard + "jxl/fast_dct_test": 8, # TODO(eustas): separate ultra-heavy shard + "jxl/fast_math_test": 10, # TODO(eustas): separate heavy shard + "jxl/jxl_test": 10, # TODO(eustas): separate heavy shard + "jxl/render_pipeline/render_pipeline_test": 10, +} + +libjxl_test_timeouts = { + "jxl/fast_dct_test": "long", + "jxl/dct_test": "long", +} diff --git a/third-party/libjxl/libjxl/lib/lib.gni b/third-party/libjxl/libjxl/lib/lib.gni new file mode 120000 index 0000000000..416aa0c9e4 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/lib.gni @@ -0,0 +1 @@ +jxl_lists.bzl \ No newline at end of file diff --git a/third-party/libjxl/libjxl/lib/threads/libjxl_threads.pc.in b/third-party/libjxl/libjxl/lib/threads/libjxl_threads.pc.in new file mode 100644 index 0000000000..50b937a840 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/threads/libjxl_threads.pc.in @@ -0,0 +1,13 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix} +libdir=@PKGCONFIG_TARGET_LIBS@ +includedir=@PKGCONFIG_TARGET_INCLUDES@ + +Name: libjxl_threads +Description: JPEG XL multi-thread runner using std::threads. +Version: @JPEGXL_LIBRARY_VERSION@ +Requires.private: @JPEGXL_THREADS_LIBRARY_REQUIRES@ +Libs: -L${libdir} -ljxl_threads +Libs.private: -lm +Cflags: -I${includedir} +Cflags.private: -DJXL_THREADS_STATIC_DEFINE diff --git a/third-party/libjxl/libjxl/lib/threads/resizable_parallel_runner.cc b/third-party/libjxl/libjxl/lib/threads/resizable_parallel_runner.cc new file mode 100644 index 0000000000..db27286dea --- /dev/null +++ b/third-party/libjxl/libjxl/lib/threads/resizable_parallel_runner.cc @@ -0,0 +1,195 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include +#include +#include +#include +#include +#include + +namespace jpegxl { +namespace { + +// A thread pool that allows changing the number of threads it runs. It also +// runs tasks on the calling thread, which can work better on schedulers for +// heterogeneous architectures. +struct ResizeableParallelRunner { + void SetNumThreads(size_t num) { + if (num > 0) { + num -= 1; + } + { + std::unique_lock l(state_mutex_); + num_desired_workers_ = num; + workers_can_proceed_.notify_all(); + } + if (workers_.size() < num) { + for (size_t i = workers_.size(); i < num; i++) { + workers_.emplace_back([this, i]() { WorkerBody(i); }); + } + } + if (workers_.size() > num) { + for (size_t i = num; i < workers_.size(); i++) { + workers_[i].join(); + } + workers_.resize(num); + } + } + + ~ResizeableParallelRunner() { SetNumThreads(0); } + + JxlParallelRetCode Run(void* jxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start, + uint32_t end) { + if (start + 1 == end) { + JxlParallelRetCode ret = init(jxl_opaque, 1); + if (ret != 0) return ret; + + func(jxl_opaque, start, 0); + return ret; + } + + size_t num_workers = std::min(workers_.size() + 1, end - start); + JxlParallelRetCode ret = init(jxl_opaque, num_workers); + if (ret != 0) { + return ret; + } + + { + std::unique_lock l(state_mutex_); + // Avoid waking up more workers than needed. + max_running_workers_ = end - start - 1; + next_task_ = start; + end_task_ = end; + func_ = func; + jxl_opaque_ = jxl_opaque; + work_available_ = true; + num_running_workers_++; + workers_can_proceed_.notify_all(); + } + + DequeueTasks(0); + + while (true) { + std::unique_lock l(state_mutex_); + if (num_running_workers_ == 0) break; + work_done_.wait(l); + } + + return ret; + } + + private: + void WorkerBody(size_t worker_id) { + while (true) { + { + std::unique_lock l(state_mutex_); + // Worker pool was reduced, resize down. + if (worker_id >= num_desired_workers_) { + return; + } + // Nothing to do this time. + if (!work_available_ || worker_id >= max_running_workers_) { + workers_can_proceed_.wait(l); + continue; + } + num_running_workers_++; + } + DequeueTasks(worker_id + 1); + } + } + + void DequeueTasks(size_t thread_id) { + while (true) { + uint32_t task = next_task_++; + if (task >= end_task_) { + std::unique_lock l(state_mutex_); + num_running_workers_--; + work_available_ = false; + if (num_running_workers_ == 0) { + work_done_.notify_all(); + } + break; + } + func_(jxl_opaque_, task, thread_id); + } + } + + // Checks when the worker has something to do, which can be one of: + // - quitting (when worker_id >= num_desired_workers_) + // - having work available for them (work_available_ is true and worker_id >= + // max_running_workers_) + std::condition_variable workers_can_proceed_; + + // Workers are done, and the main thread can proceed (num_running_workers_ == + // 0) + std::condition_variable work_done_; + + std::vector workers_; + + // Protects all the remaining variables, except for func_, jxl_opaque_ and + // end_task_ (for which only the write by the main thread is protected, and + // subsequent uses by workers happen-after it) and next_task_ (which is + // atomic). + std::mutex state_mutex_; + + // Range of tasks still need to be done. + std::atomic next_task_; + uint32_t end_task_; + + // Function to run and its argument. + JxlParallelRunFunction func_; + void* jxl_opaque_; // not owned + + // Variables that control the workers: + // - work_available_ is set to true after a call to Run() and to false at the + // end of it. + // - num_desired_workers_ represents the number of workers that should be + // present. + // - max_running_workers_ represents the number of workers that should be + // executing tasks. + // - num_running_workers_ represents the number of workers that are executing + // tasks. + size_t num_desired_workers_ = 0; + size_t max_running_workers_ = 0; + size_t num_running_workers_ = 0; + bool work_available_ = false; +}; +} // namespace +} // namespace jpegxl + +extern "C" { +JXL_THREADS_EXPORT JxlParallelRetCode JxlResizableParallelRunner( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) { + return static_cast(runner_opaque) + ->Run(jpegxl_opaque, init, func, start_range, end_range); +} + +JXL_THREADS_EXPORT void* JxlResizableParallelRunnerCreate( + const JxlMemoryManager* memory_manager) { + return new jpegxl::ResizeableParallelRunner(); +} + +JXL_THREADS_EXPORT void JxlResizableParallelRunnerSetThreads( + void* runner_opaque, size_t num_threads) { + static_cast(runner_opaque) + ->SetNumThreads(num_threads); +} + +JXL_THREADS_EXPORT void JxlResizableParallelRunnerDestroy(void* runner_opaque) { + delete static_cast(runner_opaque); +} + +JXL_THREADS_EXPORT uint32_t +JxlResizableParallelRunnerSuggestThreads(uint64_t xsize, uint64_t ysize) { + // ~one thread per group. + return std::min(std::thread::hardware_concurrency(), + xsize * ysize / (256 * 256)); +} +} diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner.cc b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner.cc new file mode 100644 index 0000000000..47b81bdb16 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner.cc @@ -0,0 +1,101 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include + +#include "lib/threads/thread_parallel_runner_internal.h" + +namespace { + +// Default JxlMemoryManager using malloc and free for the jpegxl_threads +// library. Same as the default JxlMemoryManager for the jpegxl library +// itself. + +// Default alloc and free functions. +void* ThreadMemoryManagerDefaultAlloc(void* opaque, size_t size) { + return malloc(size); +} + +void ThreadMemoryManagerDefaultFree(void* opaque, void* address) { + free(address); +} + +// Initializes the memory manager instance with the passed one. The +// MemoryManager passed in |memory_manager| may be NULL or contain NULL +// functions which will be initialized with the default ones. If either alloc +// or free are NULL, then both must be NULL, otherwise this function returns an +// error. +bool ThreadMemoryManagerInit(JxlMemoryManager* self, + const JxlMemoryManager* memory_manager) { + if (memory_manager) { + *self = *memory_manager; + } else { + memset(self, 0, sizeof(*self)); + } + if (!self->alloc != !self->free) { + return false; + } + if (!self->alloc) self->alloc = ThreadMemoryManagerDefaultAlloc; + if (!self->free) self->free = ThreadMemoryManagerDefaultFree; + + return true; +} + +void* ThreadMemoryManagerAlloc(const JxlMemoryManager* memory_manager, + size_t size) { + return memory_manager->alloc(memory_manager->opaque, size); +} + +void ThreadMemoryManagerFree(const JxlMemoryManager* memory_manager, + void* address) { + return memory_manager->free(memory_manager->opaque, address); +} + +} // namespace + +JxlParallelRetCode JxlThreadParallelRunner( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) { + return jpegxl::ThreadParallelRunner::Runner( + runner_opaque, jpegxl_opaque, init, func, start_range, end_range); +} + +/// Starts the given number of worker threads and blocks until they are ready. +/// "num_worker_threads" defaults to one per hyperthread. If zero, all tasks +/// run on the main thread. +void* JxlThreadParallelRunnerCreate(const JxlMemoryManager* memory_manager, + size_t num_worker_threads) { + JxlMemoryManager local_memory_manager; + if (!ThreadMemoryManagerInit(&local_memory_manager, memory_manager)) + return nullptr; + + void* alloc = ThreadMemoryManagerAlloc(&local_memory_manager, + sizeof(jpegxl::ThreadParallelRunner)); + if (!alloc) return nullptr; + // Placement new constructor on allocated memory + jpegxl::ThreadParallelRunner* runner = + new (alloc) jpegxl::ThreadParallelRunner(num_worker_threads); + runner->memory_manager = local_memory_manager; + + return runner; +} + +void JxlThreadParallelRunnerDestroy(void* runner_opaque) { + jpegxl::ThreadParallelRunner* runner = + reinterpret_cast(runner_opaque); + if (runner) { + JxlMemoryManager local_memory_manager = runner->memory_manager; + // Call destructor directly since custom free function is used. + runner->~ThreadParallelRunner(); + ThreadMemoryManagerFree(&local_memory_manager, runner); + } +} + +// Get default value for num_worker_threads parameter of +// InitJxlThreadParallelRunner. +size_t JxlThreadParallelRunnerDefaultNumWorkerThreads() { + return std::thread::hardware_concurrency(); +} diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.cc b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.cc new file mode 100644 index 0000000000..cfc7e22f9f --- /dev/null +++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.cc @@ -0,0 +1,206 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "lib/threads/thread_parallel_runner_internal.h" + +#include + +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) +#include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace +#endif // defined(*_SANITIZER) + +#include + +namespace { + +// Important: JXL_ASSERT does not guarantee running the `condition` code, +// use only for debug mode checks. + +#if JXL_ENABLE_ASSERT +// Exits the program after printing a stack trace when possible. +bool Abort() { +#if defined(ADDRESS_SANITIZER) || defined(MEMORY_SANITIZER) || \ + defined(THREAD_SANITIZER) + // If compiled with any sanitizer print a stack trace. This call doesn't crash + // the program, instead the trap below will crash it also allowing gdb to + // break there. + __sanitizer_print_stack_trace(); +#endif // defined(*_SANITIZER) + +#ifdef _MSC_VER + __debugbreak(); + abort(); +#else + __builtin_trap(); +#endif +} +#define JXL_ASSERT(condition) \ + do { \ + if (!(condition)) { \ + Abort(); \ + } \ + } while (0) +#else +#define JXL_ASSERT(condition) \ + do { \ + } while (0) +#endif +} // namespace + +namespace jpegxl { + +// static +JxlParallelRetCode ThreadParallelRunner::Runner( + void* runner_opaque, void* jpegxl_opaque, JxlParallelRunInit init, + JxlParallelRunFunction func, uint32_t start_range, uint32_t end_range) { + ThreadParallelRunner* self = + static_cast(runner_opaque); + if (start_range > end_range) return -1; + if (start_range == end_range) return 0; + + int ret = init(jpegxl_opaque, std::max(self->num_worker_threads_, 1)); + if (ret != 0) return ret; + + // Use a sequential run when num_worker_threads_ is zero since we have no + // worker threads. + if (self->num_worker_threads_ == 0) { + const size_t thread = 0; + for (uint32_t task = start_range; task < end_range; ++task) { + func(jpegxl_opaque, task, thread); + } + return 0; + } + + if (self->depth_.fetch_add(1, std::memory_order_acq_rel) != 0) { + return -1; // Must not re-enter. + } + + const WorkerCommand worker_command = + (static_cast(start_range) << 32) + end_range; + // Ensure the inputs do not result in a reserved command. + JXL_ASSERT(worker_command != kWorkerWait); + JXL_ASSERT(worker_command != kWorkerOnce); + JXL_ASSERT(worker_command != kWorkerExit); + + self->data_func_ = func; + self->jpegxl_opaque_ = jpegxl_opaque; + self->num_reserved_.store(0, std::memory_order_relaxed); + + self->StartWorkers(worker_command); + self->WorkersReadyBarrier(); + + if (self->depth_.fetch_add(-1, std::memory_order_acq_rel) != 1) { + return -1; + } + return 0; +} + +// static +void ThreadParallelRunner::RunRange(ThreadParallelRunner* self, + const WorkerCommand command, + const int thread) { + const uint32_t begin = command >> 32; + const uint32_t end = command & 0xFFFFFFFF; + const uint32_t num_tasks = end - begin; + const uint32_t num_worker_threads = self->num_worker_threads_; + + // OpenMP introduced several "schedule" strategies: + // "single" (static assignment of exactly one chunk per thread): slower. + // "dynamic" (allocates k tasks at a time): competitive for well-chosen k. + // "guided" (allocates k tasks, decreases k): computing k = remaining/n + // is faster than halving k each iteration. We prefer this strategy + // because it avoids user-specified parameters. + + for (;;) { +#if 0 + // dynamic + const uint32_t my_size = std::max(num_tasks / (num_worker_threads * 4), 1); +#else + // guided + const uint32_t num_reserved = + self->num_reserved_.load(std::memory_order_relaxed); + // It is possible that more tasks are reserved than ready to run. + const uint32_t num_remaining = + num_tasks - std::min(num_reserved, num_tasks); + const uint32_t my_size = + std::max(num_remaining / (num_worker_threads * 4), 1u); +#endif + const uint32_t my_begin = begin + self->num_reserved_.fetch_add( + my_size, std::memory_order_relaxed); + const uint32_t my_end = std::min(my_begin + my_size, begin + num_tasks); + // Another thread already reserved the last task. + if (my_begin >= my_end) { + break; + } + for (uint32_t task = my_begin; task < my_end; ++task) { + self->data_func_(self->jpegxl_opaque_, task, thread); + } + } +} + +// static +void ThreadParallelRunner::ThreadFunc(ThreadParallelRunner* self, + const int thread) { + // Until kWorkerExit command received: + for (;;) { + std::unique_lock lock(self->mutex_); + // Notify main thread that this thread is ready. + if (++self->workers_ready_ == self->num_threads_) { + self->workers_ready_cv_.notify_one(); + } + RESUME_WAIT: + // Wait for a command. + self->worker_start_cv_.wait(lock); + const WorkerCommand command = self->worker_start_command_; + switch (command) { + case kWorkerWait: // spurious wakeup: + goto RESUME_WAIT; // lock still held, avoid incrementing ready. + case kWorkerOnce: + lock.unlock(); + self->data_func_(self->jpegxl_opaque_, thread, thread); + break; + case kWorkerExit: + return; // exits thread + default: + lock.unlock(); + RunRange(self, command, thread); + break; + } + } +} + +ThreadParallelRunner::ThreadParallelRunner(const int num_worker_threads) + : num_worker_threads_(num_worker_threads), + num_threads_(std::max(num_worker_threads, 1)) { + threads_.reserve(num_worker_threads_); + + // Suppress "unused-private-field" warning. + (void)padding1; + (void)padding2; + + // Safely handle spurious worker wakeups. + worker_start_command_ = kWorkerWait; + + for (uint32_t i = 0; i < num_worker_threads_; ++i) { + threads_.emplace_back(ThreadFunc, this, i); + } + + if (num_worker_threads_ != 0) { + WorkersReadyBarrier(); + } +} + +ThreadParallelRunner::~ThreadParallelRunner() { + if (num_worker_threads_ != 0) { + StartWorkers(kWorkerExit); + } + + for (std::thread& thread : threads_) { + JXL_ASSERT(thread.joinable()); + thread.join(); + } +} +} // namespace jpegxl diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.h b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.h new file mode 100644 index 0000000000..199a5f2a8b --- /dev/null +++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_internal.h @@ -0,0 +1,166 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// + +// C++ implementation using std::thread of a ::JxlParallelRunner. + +// The main class in this module, ThreadParallelRunner, implements a static +// method ThreadParallelRunner::Runner than can be passed as a +// JxlParallelRunner when using the JPEG XL library. This uses std::thread +// internally and related synchronization functions. The number of threads +// created is fixed at construction time and the threads are re-used for every +// ThreadParallelRunner::Runner call. Only one concurrent Runner() call per +// instance is allowed at a time. +// +// This is a scalable, lower-overhead thread pool runner, especially suitable +// for data-parallel computations in the fork-join model, where clients need to +// know when all tasks have completed. +// +// This thread pool can efficiently load-balance millions of tasks using an +// atomic counter, thus avoiding per-task virtual or system calls. With 48 +// hyperthreads and 1M tasks that add to an atomic counter, overall runtime is +// 10-20x higher when using std::async, and ~200x for a queue-based thread +// pool. +// +// Usage: +// ThreadParallelRunner runner; +// JxlDecode( +// ... , &ThreadParallelRunner::Runner, static_cast(&runner)); + +#ifndef LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_ +#define LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_ + +#include +#include +#include +#include +#include + +#include +#include //NOLINT +#include //NOLINT +#include //NOLINT +#include + +namespace jpegxl { + +// Main helper class implementing the ::JxlParallelRunner interface. +class ThreadParallelRunner { + public: + // ::JxlParallelRunner interface. + static JxlParallelRetCode Runner(void* runner_opaque, void* jpegxl_opaque, + JxlParallelRunInit init, + JxlParallelRunFunction func, + uint32_t start_range, uint32_t end_range); + + // Starts the given number of worker threads and blocks until they are ready. + // "num_worker_threads" defaults to one per hyperthread. If zero, all tasks + // run on the main thread. + explicit ThreadParallelRunner( + int num_worker_threads = std::thread::hardware_concurrency()); + + // Waits for all threads to exit. + ~ThreadParallelRunner(); + + // Returns maximum number of main/worker threads that may call Func. Useful + // for allocating per-thread storage. + size_t NumThreads() const { return num_threads_; } + + // Runs func(thread, thread) on all thread(s) that may participate in Run. + // If NumThreads() == 0, runs on the main thread with thread == 0, otherwise + // concurrently called by each worker thread in [0, NumThreads()). + template + void RunOnEachThread(const Func& func) { + if (num_worker_threads_ == 0) { + const int thread = 0; + func(thread, thread); + return; + } + + data_func_ = reinterpret_cast(&CallClosure); + jpegxl_opaque_ = const_cast(static_cast(&func)); + StartWorkers(kWorkerOnce); + WorkersReadyBarrier(); + } + + JxlMemoryManager memory_manager; + + private: + // After construction and between calls to Run, workers are "ready", i.e. + // waiting on worker_start_cv_. They are "started" by sending a "command" + // and notifying all worker_start_cv_ waiters. (That is why all workers + // must be ready/waiting - otherwise, the notification will not reach all of + // them and the main thread waits in vain for them to report readiness.) + using WorkerCommand = uint64_t; + + // Special values; all others encode the begin/end parameters. Note that all + // these are no-op ranges (begin >= end) and therefore never used to encode + // ranges. + static constexpr WorkerCommand kWorkerWait = ~1ULL; + static constexpr WorkerCommand kWorkerOnce = ~2ULL; + static constexpr WorkerCommand kWorkerExit = ~3ULL; + + // Calls f(task, thread). Used for type erasure of Func arguments. The + // signature must match JxlParallelRunFunction, hence a void* argument. + template + static void CallClosure(void* f, const uint32_t task, const size_t thread) { + (*reinterpret_cast(f))(task, thread); + } + + void WorkersReadyBarrier() { + std::unique_lock lock(mutex_); + // Typically only a single iteration. + while (workers_ready_ != threads_.size()) { + workers_ready_cv_.wait(lock); + } + workers_ready_ = 0; + + // Safely handle spurious worker wakeups. + worker_start_command_ = kWorkerWait; + } + + // Precondition: all workers are ready. + void StartWorkers(const WorkerCommand worker_command) { + mutex_.lock(); + worker_start_command_ = worker_command; + // Workers will need this lock, so release it before they wake up. + mutex_.unlock(); + worker_start_cv_.notify_all(); + } + + // Attempts to reserve and perform some work from the global range of tasks, + // which is encoded within "command". Returns after all tasks are reserved. + static void RunRange(ThreadParallelRunner* self, const WorkerCommand command, + const int thread); + + static void ThreadFunc(ThreadParallelRunner* self, int thread); + + // Unmodified after ctor, but cannot be const because we call thread::join(). + std::vector threads_; + + const uint32_t num_worker_threads_; // == threads_.size() + const uint32_t num_threads_; + + std::atomic depth_{0}; // detects if Run is re-entered (not supported). + + std::mutex mutex_; // guards both cv and their variables. + std::condition_variable workers_ready_cv_; + uint32_t workers_ready_ = 0; + std::condition_variable worker_start_cv_; + WorkerCommand worker_start_command_; + + // Written by main thread, read by workers (after mutex lock/unlock). + JxlParallelRunFunction data_func_; + void* jpegxl_opaque_; + + // Updated by workers; padding avoids false sharing. + uint8_t padding1[64]; + std::atomic num_reserved_{0}; + uint8_t padding2[64]; +}; + +} // namespace jpegxl + +#endif // LIB_THREADS_THREAD_PARALLEL_RUNNER_INTERNAL_H_ diff --git a/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_test.cc b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_test.cc new file mode 100644 index 0000000000..7c8e602764 --- /dev/null +++ b/third-party/libjxl/libjxl/lib/threads/thread_parallel_runner_test.cc @@ -0,0 +1,122 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include "lib/jxl/base/data_parallel.h" +#include "lib/jxl/test_utils.h" +#include "lib/jxl/testing.h" + +using jxl::test::ThreadPoolForTests; + +namespace jpegxl { +namespace { + +int PopulationCount(uint64_t bits) { + int num_set = 0; + while (bits != 0) { + num_set += bits & 1; + bits >>= 1; + } + return num_set; +} + +// Ensures task parameter is in bounds, every parameter is reached, +// pool can be reused (multiple consecutive Run calls), pool can be destroyed +// (joining with its threads), num_threads=0 works (runs on current thread). +TEST(ThreadParallelRunnerTest, TestPool) { + for (int num_threads = 0; num_threads <= 18; ++num_threads) { + ThreadPoolForTests pool(num_threads); + for (int num_tasks = 0; num_tasks < 32; ++num_tasks) { + std::vector mementos(num_tasks); + for (int begin = 0; begin < 32; ++begin) { + std::fill(mementos.begin(), mementos.end(), 0); + EXPECT_TRUE(RunOnPool( + &pool, begin, begin + num_tasks, jxl::ThreadPool::NoInit, + [begin, num_tasks, &mementos](const int task, const int thread) { + // Parameter is in the given range + EXPECT_GE(task, begin); + EXPECT_LT(task, begin + num_tasks); + + // Store mementos to be sure we visited each task. + mementos.at(task - begin) = 1000 + task; + }, + "TestPool")); + for (int task = begin; task < begin + num_tasks; ++task) { + EXPECT_EQ(1000 + task, mementos.at(task - begin)); + } + } + } + } +} + +// Verify "thread" parameter when processing few tasks. +TEST(ThreadParallelRunnerTest, TestSmallAssignments) { + const int kMaxThreads = 8; + for (int num_threads = 1; num_threads <= kMaxThreads; ++num_threads) { + ThreadPoolForTests pool(num_threads); + + // (Avoid mutex because it may perturb the worker thread scheduling) + std::atomic id_bits{0}; + std::atomic num_calls{0}; + + EXPECT_TRUE(RunOnPool( + &pool, 0, num_threads, jxl::ThreadPool::NoInit, + [&num_calls, num_threads, &id_bits](const int task, const int thread) { + num_calls.fetch_add(1, std::memory_order_relaxed); + + EXPECT_LT(thread, num_threads); + uint64_t bits = id_bits.load(std::memory_order_relaxed); + while ( + !id_bits.compare_exchange_weak(bits, bits | (1ULL << thread))) { + } + }, + "TestSmallAssignments")); + + // Correct number of tasks. + EXPECT_EQ(num_threads, num_calls.load()); + + const int num_participants = PopulationCount(id_bits.load()); + // Can't expect equality because other workers may have woken up too late. + EXPECT_LE(num_participants, num_threads); + } +} + +struct Counter { + Counter() { + // Suppress "unused-field" warning. + (void)padding; + } + void Assimilate(const Counter& victim) { counter += victim.counter; } + int counter = 0; + int padding[31]; +}; + +TEST(ThreadParallelRunnerTest, TestCounter) { + const int kNumThreads = 12; + ThreadPoolForTests pool(kNumThreads); + alignas(128) Counter counters[kNumThreads]; + + const int kNumTasks = kNumThreads * 19; + EXPECT_TRUE(RunOnPool( + &pool, 0, kNumTasks, jxl::ThreadPool::NoInit, + [&counters](const int task, const int thread) { + counters[thread].counter += task; + }, + "TestCounter")); + + int expected = 0; + for (int i = 0; i < kNumTasks; ++i) { + expected += i; + } + + for (int i = 1; i < kNumThreads; ++i) { + counters[0].Assimilate(counters[i]); + } + EXPECT_EQ(expected, counters[0].counter); +} + +} // namespace +} // namespace jpegxl diff --git a/third-party/libjxl/libjxl/plugins/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/CMakeLists.txt new file mode 100644 index 0000000000..bff1bff29d --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/CMakeLists.txt @@ -0,0 +1,21 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +if(NOT MSVC) + option(JPEGXL_ENABLE_PLUGIN_GDKPIXBUF "Enable plugin for GdkPixbuf image loading library" ON) + if(JPEGXL_ENABLE_PLUGIN_GDKPIXBUF) + add_subdirectory(gdk-pixbuf) + endif() +endif() + +option(JPEGXL_ENABLE_PLUGIN_GIMP210 "Enable plugin for GIMP 2.10.x series" ON) +if(JPEGXL_ENABLE_PLUGIN_GIMP210) + add_subdirectory(gimp) +endif() + +option(JPEGXL_ENABLE_PLUGIN_MIME "Enable image/jxl declaration for shared-mime-info" ON) +if(JPEGXL_ENABLE_PLUGIN_MIME) + add_subdirectory(mime) +endif() diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/CMakeLists.txt new file mode 100644 index 0000000000..7b53b98c66 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/CMakeLists.txt @@ -0,0 +1,83 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +find_package(PkgConfig) +pkg_check_modules(Gdk-Pixbuf IMPORTED_TARGET gdk-pixbuf-2.0>=2.36) + +include(GNUInstallDirs) + +if (NOT Gdk-Pixbuf_FOUND) + message(WARNING "GDK Pixbuf development libraries not found, \ + the Gdk-Pixbuf plugin will not be built") + return () +endif () + +add_library(pixbufloader-jxl MODULE pixbufloader-jxl.c) + +# Mark all symbols as hidden by default. The PkgConfig::Gdk-Pixbuf dependency +# will cause fill_info and fill_vtable entry points to be made public. +set_target_properties(pixbufloader-jxl PROPERTIES + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 +) + +# Note: This only needs the decoder library, but we don't install the decoder +# shared library. +target_link_libraries(pixbufloader-jxl jxl jxl_threads lcms2 PkgConfig::Gdk-Pixbuf) + +execute_process(COMMAND ${PKG_CONFIG_EXECUTABLE} gdk-pixbuf-2.0 --variable gdk_pixbuf_moduledir --define-variable=prefix=${CMAKE_INSTALL_PREFIX} OUTPUT_VARIABLE GDK_PIXBUF_MODULEDIR OUTPUT_STRIP_TRAILING_WHITESPACE) +install(TARGETS pixbufloader-jxl DESTINATION "${GDK_PIXBUF_MODULEDIR}") + +# Instead of the following, we might instead add the +# mime type image/jxl to +# /usr/share/thumbnailers/gdk-pixbuf-thumbnailer.thumbnailer +install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/jxl.thumbnailer DESTINATION "${CMAKE_INSTALL_DATADIR}/thumbnailers/") + +if(BUILD_TESTING AND NOT CMAKE_CROSSCOMPILING) + pkg_check_modules(Gdk IMPORTED_TARGET gdk-2.0) + if (Gdk_FOUND) + # Test for loading a .jxl file using the pixbufloader library via GDK. This + # requires to have the image/jxl mime type and loader library configured, + # which we do in a fake environment in the CMAKE_CURRENT_BINARY_DIR. + add_executable(pixbufloader_test pixbufloader_test.cc) + target_link_libraries(pixbufloader_test PkgConfig::Gdk) + + # Create a mime cache for test. + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/mime/mime.cache" + COMMAND env XDG_DATA_HOME=${CMAKE_CURRENT_BINARY_DIR} + xdg-mime install --novendor + "${CMAKE_SOURCE_DIR}/plugins/mime/image-jxl.xml" + DEPENDS "${CMAKE_SOURCE_DIR}/plugins/mime/image-jxl.xml" + ) + add_custom_target(pixbufloader_test_mime + DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/mime/mime.cache" + ) + add_dependencies(pixbufloader_test pixbufloader_test_mime) + + # Use a fake X server to run the test if xvfb is installed. + find_program (XVFB_PROGRAM xvfb-run) + if(XVFB_PROGRAM) + set(XVFB_PROGRAM_PREFIX "${XVFB_PROGRAM};-a") + else() + set(XVFB_PROGRAM_PREFIX "") + endif() + + # libX11.so and libgdk-x11-2.0.so are not compiled with MSAN -> report + # use-of-uninitialized-value for string some internal string value. + # TODO(eustas): investigate direct memory leak (32 bytes). + if (NOT (SANITIZER STREQUAL "msan") AND NOT (SANITIZER STREQUAL "asan")) + add_test( + NAME pixbufloader_test_jxl + COMMAND + ${XVFB_PROGRAM_PREFIX} $ + "${CMAKE_CURRENT_SOURCE_DIR}/loaders_test.cache" + "${CMAKE_SOURCE_DIR}/testdata/jxl/blending/cropped_traffic_light.jxl" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + set_tests_properties(pixbufloader_test_jxl PROPERTIES SKIP_RETURN_CODE 254) + endif() + endif() # Gdk_FOUND +endif() # BUILD_TESTING diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/README.md b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/README.md new file mode 100644 index 0000000000..185919436f --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/README.md @@ -0,0 +1,50 @@ +## JPEG XL GDK Pixbuf + + +The plugin may already have been installed when following the instructions from the +[Installing section of BUILDING.md](../../BUILDING.md#installing), in which case it should +already be in the correct place, e.g. + +```/usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-jxl.so``` + +Otherwise we can copy it manually: + +```bash +sudo cp $your_build_directory/plugins/gdk-pixbuf/libpixbufloader-jxl.so /usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/2.10.0/loaders/libpixbufloader-jxl.so +``` + + +Then we need to update the cache, for example with: + +```bash +sudo /usr/lib/x86_64-linux-gnu/gdk-pixbuf-2.0/gdk-pixbuf-query-loaders --update-cache +``` + +In order to get thumbnails with this, first one has to add the jxl MIME type, see +[../mime/README.md](../mime/README.md). + +Ensure that the thumbnailer file is installed in the correct place, +`/usr/share/thumbnailers/jxl.thumbnailer` or `/usr/local/share/thumbnailers/jxl.thumbnailer`. + +The file should have been copied automatically when following the instructions +in the [Installing section of README.md](../../README.md#installing), but +otherwise it can be copied manually: + +```bash +sudo cp plugins/gdk-pixbuf/jxl.thumbnailer /usr/local/share/thumbnailers/jxl.thumbnailer +``` + +Update the Mime database with +```bash +update-mime --local +``` +or +```bash +sudo update-desktop-database +``` + +Then possibly delete the thumbnail cache with +```bash +rm -r ~/.cache/thumbnails +``` +and restart the application displaying thumbnails, e.g. `nautilus -q` to display thumbnails. diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/jxl.thumbnailer b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/jxl.thumbnailer new file mode 100644 index 0000000000..1bcaab61fc --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/jxl.thumbnailer @@ -0,0 +1,4 @@ +[Thumbnailer Entry] +TryExec=/usr/bin/gdk-pixbuf-thumbnailer +Exec=/usr/bin/gdk-pixbuf-thumbnailer -s %s %u %o +MimeType=image/jxl; diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/loaders_test.cache b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/loaders_test.cache new file mode 100644 index 0000000000..95c62c8fc3 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/loaders_test.cache @@ -0,0 +1,16 @@ +# GdkPixbuf Image Loader Modules file for testing +# Automatically generated file, do not edit +# Created by gdk-pixbuf-query-loaders from gdk-pixbuf-2.42.2 +# +# Generated with: +# GDK_PIXBUF_MODULEDIR=`pwd`/build/plugins/gdk-pixbuf/ gdk-pixbuf-query-loaders +# +# Modified to use the library from the current working directory at runtime. +"./libpixbufloader-jxl.so" +"jxl" 4 "gdk-pixbuf" "JPEG XL image" "BSD-3" +"image/jxl" "" +"jxl" "" +"\377\n" " " 100 +"...\fJXL \r\n\207\n" "zzz " 100 + + diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader-jxl.c b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader-jxl.c new file mode 100644 index 0000000000..28eb140da7 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader-jxl.c @@ -0,0 +1,814 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include +#include + +#include "lcms2.h" + +#define GDK_PIXBUF_ENABLE_BACKEND +#include +#undef GDK_PIXBUF_ENABLE_BACKEND + +G_BEGIN_DECLS + +// Information about a single frame. +typedef struct { + uint64_t duration_ms; + GdkPixbuf *data; + gboolean decoded; +} GdkPixbufJxlAnimationFrame; + +// Represent a whole JPEG XL animation; all its fields are owned; as a GObject, +// the Animation struct itself is reference counted (as are the GdkPixbufs for +// individual frames). +struct _GdkPixbufJxlAnimation { + GdkPixbufAnimation parent_instance; + + // GDK interface implementation callbacks. + GdkPixbufModuleSizeFunc image_size_callback; + GdkPixbufModulePreparedFunc pixbuf_prepared_callback; + GdkPixbufModuleUpdatedFunc area_updated_callback; + gpointer user_data; + + // All frames known so far; a frame is added when the JXL_DEC_FRAME event is + // received from the decoder; initially frame.decoded is FALSE, until + // the JXL_DEC_IMAGE event is received. + GArray *frames; + + // JPEG XL decoder and related structures. + JxlParallelRunner *parallel_runner; + JxlDecoder *decoder; + JxlPixelFormat pixel_format; + + // Decoding is `done` when JXL_DEC_SUCCESS is received; calling + // load_increment afterwards gives an error. + gboolean done; + + // Image information. + size_t xsize; + size_t ysize; + gboolean alpha_premultiplied; + gboolean has_animation; + gboolean has_alpha; + uint64_t total_duration_ms; + uint64_t tick_duration_us; + uint64_t repetition_count; // 0 = loop forever + + gpointer icc_buff; + cmsContext context; + cmsHPROFILE profile, srgb; + cmsHTRANSFORM transform; +}; + +#define GDK_TYPE_PIXBUF_JXL_ANIMATION (gdk_pixbuf_jxl_animation_get_type()) +G_DECLARE_FINAL_TYPE(GdkPixbufJxlAnimation, gdk_pixbuf_jxl_animation, GDK, + JXL_ANIMATION, GdkPixbufAnimation); + +G_DEFINE_TYPE(GdkPixbufJxlAnimation, gdk_pixbuf_jxl_animation, + GDK_TYPE_PIXBUF_ANIMATION); + +// Iterator to a given point in time in the animation; contains a pointer to the +// full animation. +struct _GdkPixbufJxlAnimationIter { + GdkPixbufAnimationIter parent_instance; + GdkPixbufJxlAnimation *animation; + size_t current_frame; + uint64_t time_offset; +}; + +#define GDK_TYPE_PIXBUF_JXL_ANIMATION_ITER \ + (gdk_pixbuf_jxl_animation_iter_get_type()) +G_DECLARE_FINAL_TYPE(GdkPixbufJxlAnimationIter, gdk_pixbuf_jxl_animation_iter, + GDK, JXL_ANIMATION_ITER, GdkPixbufAnimationIter); +G_DEFINE_TYPE(GdkPixbufJxlAnimationIter, gdk_pixbuf_jxl_animation_iter, + GDK_TYPE_PIXBUF_ANIMATION_ITER); + +static void gdk_pixbuf_jxl_animation_init(GdkPixbufJxlAnimation *obj) { + // Suppress "unused function" warnings. + (void)glib_autoptr_cleanup_GdkPixbufJxlAnimation; + (void)GDK_JXL_ANIMATION; + (void)GDK_IS_JXL_ANIMATION; +} + +static gboolean gdk_pixbuf_jxl_animation_is_static_image( + GdkPixbufAnimation *anim) { + GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim; + return !jxl_anim->has_animation; +} + +static GdkPixbuf *gdk_pixbuf_jxl_animation_get_static_image( + GdkPixbufAnimation *anim) { + GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim; + if (jxl_anim->frames == NULL || jxl_anim->frames->len == 0) return NULL; + GdkPixbufJxlAnimationFrame *frame = + &g_array_index(jxl_anim->frames, GdkPixbufJxlAnimationFrame, 0); + return frame->decoded ? frame->data : NULL; +} + +static void gdk_pixbuf_jxl_animation_get_size(GdkPixbufAnimation *anim, + int *width, int *height) { + GdkPixbufJxlAnimation *jxl_anim = (GdkPixbufJxlAnimation *)anim; + if (width) *width = jxl_anim->xsize; + if (height) *height = jxl_anim->ysize; +} + +G_GNUC_BEGIN_IGNORE_DEPRECATIONS +static gboolean gdk_pixbuf_jxl_animation_iter_advance( + GdkPixbufAnimationIter *iter, const GTimeVal *current_time); + +static GdkPixbufAnimationIter *gdk_pixbuf_jxl_animation_get_iter( + GdkPixbufAnimation *anim, const GTimeVal *start_time) { + GdkPixbufJxlAnimationIter *iter = + g_object_new(GDK_TYPE_PIXBUF_JXL_ANIMATION_ITER, NULL); + iter->animation = (GdkPixbufJxlAnimation *)anim; + iter->time_offset = start_time->tv_sec * 1000ULL + start_time->tv_usec / 1000; + g_object_ref(iter->animation); + gdk_pixbuf_jxl_animation_iter_advance((GdkPixbufAnimationIter *)iter, + start_time); + return (GdkPixbufAnimationIter *)iter; +} +G_GNUC_END_IGNORE_DEPRECATIONS + +static void gdk_pixbuf_jxl_animation_finalize(GObject *obj) { + GdkPixbufJxlAnimation *decoder_state = (GdkPixbufJxlAnimation *)obj; + if (decoder_state->frames != NULL) { + for (size_t i = 0; i < decoder_state->frames->len; i++) { + g_object_unref( + g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame, i) + .data); + } + g_array_free(decoder_state->frames, /*free_segment=*/TRUE); + } + JxlResizableParallelRunnerDestroy(decoder_state->parallel_runner); + JxlDecoderDestroy(decoder_state->decoder); + cmsDeleteTransform(decoder_state->transform); + cmsCloseProfile(decoder_state->srgb); + cmsCloseProfile(decoder_state->profile); + cmsDeleteContext(decoder_state->context); + g_free(decoder_state->icc_buff); +} + +static void gdk_pixbuf_jxl_animation_class_init( + GdkPixbufJxlAnimationClass *klass) { + G_OBJECT_CLASS(klass)->finalize = gdk_pixbuf_jxl_animation_finalize; + klass->parent_class.is_static_image = + gdk_pixbuf_jxl_animation_is_static_image; + klass->parent_class.get_static_image = + gdk_pixbuf_jxl_animation_get_static_image; + klass->parent_class.get_size = gdk_pixbuf_jxl_animation_get_size; + klass->parent_class.get_iter = gdk_pixbuf_jxl_animation_get_iter; +} + +static void gdk_pixbuf_jxl_animation_iter_init(GdkPixbufJxlAnimationIter *obj) { + (void)glib_autoptr_cleanup_GdkPixbufJxlAnimationIter; + (void)GDK_JXL_ANIMATION_ITER; + (void)GDK_IS_JXL_ANIMATION_ITER; +} + +static int gdk_pixbuf_jxl_animation_iter_get_delay_time( + GdkPixbufAnimationIter *iter) { + GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter; + if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) { + return 0; + } + return g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame, + jxl_iter->current_frame) + .duration_ms; +} + +static GdkPixbuf *gdk_pixbuf_jxl_animation_iter_get_pixbuf( + GdkPixbufAnimationIter *iter) { + GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter; + if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) { + return NULL; + } + return g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame, + jxl_iter->current_frame) + .data; +} + +static gboolean gdk_pixbuf_jxl_animation_iter_on_currently_loading_frame( + GdkPixbufAnimationIter *iter) { + GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter; + if (jxl_iter->animation->frames->len <= jxl_iter->current_frame) { + return TRUE; + } + return !g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame, + jxl_iter->current_frame) + .decoded; +} + +G_GNUC_BEGIN_IGNORE_DEPRECATIONS +static gboolean gdk_pixbuf_jxl_animation_iter_advance( + GdkPixbufAnimationIter *iter, const GTimeVal *current_time) { + GdkPixbufJxlAnimationIter *jxl_iter = (GdkPixbufJxlAnimationIter *)iter; + size_t old_frame = jxl_iter->current_frame; + + uint64_t current_time_ms = current_time->tv_sec * 1000ULL + + current_time->tv_usec / 1000 - + jxl_iter->time_offset; + + if (jxl_iter->animation->frames->len == 0) { + jxl_iter->current_frame = 0; + } else if (!jxl_iter->animation->done && + current_time_ms >= jxl_iter->animation->total_duration_ms) { + jxl_iter->current_frame = jxl_iter->animation->frames->len - 1; + } else if (jxl_iter->animation->repetition_count != 0 && + current_time_ms > jxl_iter->animation->repetition_count * + jxl_iter->animation->total_duration_ms) { + jxl_iter->current_frame = jxl_iter->animation->frames->len - 1; + } else { + uint64_t total_duration_ms = jxl_iter->animation->total_duration_ms; + // Guard against divide-by-0 in malicious files. + if (total_duration_ms == 0) total_duration_ms = 1; + uint64_t loop_offset = current_time_ms % total_duration_ms; + jxl_iter->current_frame = 0; + while (TRUE) { + uint64_t duration = + g_array_index(jxl_iter->animation->frames, GdkPixbufJxlAnimationFrame, + jxl_iter->current_frame) + .duration_ms; + if (duration >= loop_offset) { + break; + } + loop_offset -= duration; + jxl_iter->current_frame++; + } + } + + return old_frame != jxl_iter->current_frame; +} +G_GNUC_END_IGNORE_DEPRECATIONS + +static void gdk_pixbuf_jxl_animation_iter_finalize(GObject *obj) { + GdkPixbufJxlAnimationIter *iter = (GdkPixbufJxlAnimationIter *)obj; + g_object_unref(iter->animation); +} + +static void gdk_pixbuf_jxl_animation_iter_class_init( + GdkPixbufJxlAnimationIterClass *klass) { + G_OBJECT_CLASS(klass)->finalize = gdk_pixbuf_jxl_animation_iter_finalize; + klass->parent_class.get_delay_time = + gdk_pixbuf_jxl_animation_iter_get_delay_time; + klass->parent_class.get_pixbuf = gdk_pixbuf_jxl_animation_iter_get_pixbuf; + klass->parent_class.on_currently_loading_frame = + gdk_pixbuf_jxl_animation_iter_on_currently_loading_frame; + klass->parent_class.advance = gdk_pixbuf_jxl_animation_iter_advance; +} + +G_END_DECLS + +static gpointer begin_load(GdkPixbufModuleSizeFunc size_func, + GdkPixbufModulePreparedFunc prepare_func, + GdkPixbufModuleUpdatedFunc update_func, + gpointer user_data, GError **error) { + GdkPixbufJxlAnimation *decoder_state = + g_object_new(GDK_TYPE_PIXBUF_JXL_ANIMATION, NULL); + if (decoder_state == NULL) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Creation of the animation state failed"); + return NULL; + } + decoder_state->image_size_callback = size_func; + decoder_state->pixbuf_prepared_callback = prepare_func; + decoder_state->area_updated_callback = update_func; + decoder_state->user_data = user_data; + decoder_state->frames = + g_array_new(/*zero_terminated=*/FALSE, /*clear_=*/TRUE, + sizeof(GdkPixbufJxlAnimationFrame)); + + if (decoder_state->frames == NULL) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Creation of the frame array failed"); + goto cleanup; + } + + if (!(decoder_state->parallel_runner = + JxlResizableParallelRunnerCreate(NULL))) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Creation of the JXL parallel runner failed"); + goto cleanup; + } + + if (!(decoder_state->decoder = JxlDecoderCreate(NULL))) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Creation of the JXL decoder failed"); + goto cleanup; + } + + JxlDecoderStatus status; + + if ((status = JxlDecoderSetParallelRunner( + decoder_state->decoder, JxlResizableParallelRunner, + decoder_state->parallel_runner)) != JXL_DEC_SUCCESS) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlDecoderSetParallelRunner failed: %x", status); + goto cleanup; + } + if ((status = JxlDecoderSubscribeEvents( + decoder_state->decoder, JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME)) != + JXL_DEC_SUCCESS) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlDecoderSubscribeEvents failed: %x", status); + goto cleanup; + } + + decoder_state->pixel_format.data_type = JXL_TYPE_FLOAT; + decoder_state->pixel_format.endianness = JXL_NATIVE_ENDIAN; + + return decoder_state; +cleanup: + JxlResizableParallelRunnerDestroy(decoder_state->parallel_runner); + JxlDecoderDestroy(decoder_state->decoder); + g_object_unref(decoder_state); + return NULL; +} + +static gboolean stop_load(gpointer context, GError **error) { + g_object_unref(context); + return TRUE; +} + +static void draw_pixels(void *context, size_t x, size_t y, size_t num_pixels, + const void *pixels) { + GdkPixbufJxlAnimation *decoder_state = context; + + GdkPixbuf *output = + g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame, + decoder_state->frames->len - 1) + .data; + + guchar *dst = gdk_pixbuf_get_pixels(output) + + decoder_state->pixel_format.num_channels * x + + gdk_pixbuf_get_rowstride(output) * y; + + cmsDoTransform(decoder_state->transform, pixels, dst, num_pixels); +} + +static gboolean load_increment(gpointer context, const guchar *buf, guint size, + GError **error) { + GdkPixbufJxlAnimation *decoder_state = context; + if (decoder_state->done == TRUE) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JXL decoder load_increment called after end of file"); + return FALSE; + } + + JxlDecoderStatus status; + + if ((status = JxlDecoderSetInput(decoder_state->decoder, buf, size)) != + JXL_DEC_SUCCESS) { + // Should never happen if things are done properly. + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JXL decoder logic error: %x", status); + return FALSE; + } + + for (;;) { + status = JxlDecoderProcessInput(decoder_state->decoder); + switch (status) { + case JXL_DEC_NEED_MORE_INPUT: { + JxlDecoderReleaseInput(decoder_state->decoder); + return TRUE; + } + + case JXL_DEC_BASIC_INFO: { + JxlBasicInfo info; + if (JxlDecoderGetBasicInfo(decoder_state->decoder, &info) != + JXL_DEC_SUCCESS) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JXLDecoderGetBasicInfo failed"); + return FALSE; + } + decoder_state->pixel_format.num_channels = info.alpha_bits > 0 ? 4 : 3; + decoder_state->alpha_premultiplied = info.alpha_premultiplied; + decoder_state->xsize = info.xsize; + decoder_state->ysize = info.ysize; + decoder_state->has_animation = info.have_animation; + decoder_state->has_alpha = info.alpha_bits > 0; + if (info.have_animation) { + decoder_state->repetition_count = info.animation.num_loops; + decoder_state->tick_duration_us = 1000000ULL * + info.animation.tps_denominator / + info.animation.tps_numerator; + } + gint width = info.xsize; + gint height = info.ysize; + if (decoder_state->image_size_callback) { + decoder_state->image_size_callback(&width, &height, + decoder_state->user_data); + } + + // GDK convention for signaling being interested only in the basic info. + if (width == 0 || height == 0) { + decoder_state->done = TRUE; + return TRUE; + } + + // Set an appropriate number of threads for the image size. + JxlResizableParallelRunnerSetThreads( + decoder_state->parallel_runner, + JxlResizableParallelRunnerSuggestThreads(info.xsize, info.ysize)); + break; + } + + case JXL_DEC_COLOR_ENCODING: { + // Get the ICC color profile of the pixel data + size_t icc_size; + if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize( + decoder_state->decoder, + JXL_COLOR_PROFILE_TARGET_DATA, &icc_size)) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlDecoderGetICCProfileSize failed"); + return FALSE; + } + if (!(decoder_state->icc_buff = g_malloc(icc_size))) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Allocating ICC profile failed"); + return FALSE; + } + if (JXL_DEC_SUCCESS != + JxlDecoderGetColorAsICCProfile(decoder_state->decoder, + JXL_COLOR_PROFILE_TARGET_DATA, + decoder_state->icc_buff, icc_size)) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlDecoderGetColorAsICCProfile failed"); + return FALSE; + } + decoder_state->context = cmsCreateContext(NULL, NULL); + if (!decoder_state->context) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Failed to create LCMS2 context"); + return FALSE; + } + decoder_state->profile = cmsOpenProfileFromMemTHR( + decoder_state->context, decoder_state->icc_buff, icc_size); + if (!decoder_state->profile) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Invalid ICC profile from JXL image decoder"); + return FALSE; + } + decoder_state->srgb = cmsCreate_sRGBProfileTHR(decoder_state->context); + if (!decoder_state->srgb) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Failed to create sRGB profile"); + return FALSE; + } + decoder_state->transform = cmsCreateTransformTHR( + decoder_state->context, decoder_state->profile, + decoder_state->has_alpha ? TYPE_RGBA_FLT : TYPE_RGB_FLT, + decoder_state->srgb, + decoder_state->has_alpha ? TYPE_RGBA_8 : TYPE_RGB_8, + INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_COPY_ALPHA); + if (!decoder_state->transform) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Failed to create LCMS2 color transform"); + return FALSE; + } + + break; + } + + case JXL_DEC_FRAME: { + // TODO(veluca): support rescaling. + JxlFrameHeader frame_header; + if (JxlDecoderGetFrameHeader(decoder_state->decoder, &frame_header) != + JXL_DEC_SUCCESS) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Failed to retrieve frame info"); + return FALSE; + } + + { + GdkPixbufJxlAnimationFrame frame; + frame.decoded = FALSE; + frame.duration_ms = + frame_header.duration * decoder_state->tick_duration_us / 1000; + decoder_state->total_duration_ms += frame.duration_ms; + frame.data = + gdk_pixbuf_new(GDK_COLORSPACE_RGB, decoder_state->has_alpha, + /*bits_per_sample=*/8, decoder_state->xsize, + decoder_state->ysize); + if (frame.data == NULL) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Failed to allocate output pixel buffer"); + return FALSE; + } + decoder_state->pixel_format.align = + gdk_pixbuf_get_rowstride(frame.data); + g_array_append_val(decoder_state->frames, frame); + } + if (decoder_state->pixbuf_prepared_callback && + decoder_state->frames->len == 1) { + decoder_state->pixbuf_prepared_callback( + g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame, + 0) + .data, + decoder_state->has_animation ? (GdkPixbufAnimation *)decoder_state + : NULL, + decoder_state->user_data); + } + break; + } + + case JXL_DEC_NEED_IMAGE_OUT_BUFFER: { + if (JXL_DEC_SUCCESS != + JxlDecoderSetImageOutCallback(decoder_state->decoder, + &decoder_state->pixel_format, + draw_pixels, decoder_state)) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlDecoderSetImageOutCallback failed"); + return FALSE; + } + break; + } + + case JXL_DEC_FULL_IMAGE: { + // TODO(veluca): consider doing partial updates. + if (decoder_state->area_updated_callback) { + GdkPixbuf *output = g_array_index(decoder_state->frames, + GdkPixbufJxlAnimationFrame, 0) + .data; + decoder_state->area_updated_callback( + output, 0, 0, gdk_pixbuf_get_width(output), + gdk_pixbuf_get_height(output), decoder_state->user_data); + } + g_array_index(decoder_state->frames, GdkPixbufJxlAnimationFrame, + decoder_state->frames->len - 1) + .decoded = TRUE; + break; + } + + case JXL_DEC_SUCCESS: { + decoder_state->done = TRUE; + return TRUE; + } + + default: { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Unexpected JxlDecoderProcessInput return code: %x", + status); + return FALSE; + } + } + } + return TRUE; +} + +static gboolean jxl_is_save_option_supported(const gchar *option_key) { + if (g_strcmp0(option_key, "quality") == 0) { + return TRUE; + } + + return FALSE; +} + +static gboolean jxl_image_saver(FILE *f, GdkPixbuf *pixbuf, gchar **keys, + gchar **values, GError **error) { + long quality = 90; /* default; must be between 0 and 100 */ + double distance; + gboolean save_alpha; + JxlEncoder *encoder; + void *parallel_runner; + JxlEncoderFrameSettings *frame_settings; + JxlBasicInfo output_info; + JxlPixelFormat pixel_format; + JxlColorEncoding color_profile; + JxlEncoderStatus status; + + GByteArray *compressed; + size_t offset = 0; + uint8_t *next_out; + size_t avail_out; + + if (f == NULL || pixbuf == NULL) { + return FALSE; + } + + if (keys && *keys) { + gchar **kiter = keys; + gchar **viter = values; + + while (*kiter) { + if (strcmp(*kiter, "quality") == 0) { + char *endptr = NULL; + quality = strtol(*viter, &endptr, 10); + + if (endptr == *viter) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_BAD_OPTION, + "JXL quality must be a value between 0 and 100; value " + "\"%s\" could not be parsed.", + *viter); + + return FALSE; + } + + if (quality < 0 || quality > 100) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_BAD_OPTION, + "JXL quality must be a value between 0 and 100; value " + "\"%ld\" is not allowed.", + quality); + + return FALSE; + } + } else { + g_warning("Unrecognized parameter (%s) passed to JXL saver.", *kiter); + } + + ++kiter; + ++viter; + } + } + + if (gdk_pixbuf_get_bits_per_sample(pixbuf) != 8) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE, + "Sorry, only 8bit images are supported by this JXL saver"); + return FALSE; + } + + JxlEncoderInitBasicInfo(&output_info); + output_info.have_container = JXL_FALSE; + output_info.xsize = gdk_pixbuf_get_width(pixbuf); + output_info.ysize = gdk_pixbuf_get_height(pixbuf); + output_info.bits_per_sample = 8; + output_info.orientation = JXL_ORIENT_IDENTITY; + output_info.num_color_channels = 3; + + if (output_info.xsize == 0 || output_info.ysize == 0) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_CORRUPT_IMAGE, + "Empty image, nothing to save"); + return FALSE; + } + + save_alpha = gdk_pixbuf_get_has_alpha(pixbuf); + + pixel_format.data_type = JXL_TYPE_UINT8; + pixel_format.endianness = JXL_NATIVE_ENDIAN; + pixel_format.align = gdk_pixbuf_get_rowstride(pixbuf); + + if (save_alpha) { + if (gdk_pixbuf_get_n_channels(pixbuf) != 4) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE, + "Unsupported number of channels"); + return FALSE; + } + + output_info.num_extra_channels = 1; + output_info.alpha_bits = 8; + pixel_format.num_channels = 4; + } else { + if (gdk_pixbuf_get_n_channels(pixbuf) != 3) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_UNKNOWN_TYPE, + "Unsupported number of channels"); + return FALSE; + } + + output_info.num_extra_channels = 0; + output_info.alpha_bits = 0; + pixel_format.num_channels = 3; + } + + encoder = JxlEncoderCreate(NULL); + if (!encoder) { + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Creation of the JXL encoder failed"); + return FALSE; + } + + parallel_runner = JxlResizableParallelRunnerCreate(NULL); + if (!parallel_runner) { + JxlEncoderDestroy(encoder); + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "Creation of the JXL decoder failed"); + return FALSE; + } + + JxlResizableParallelRunnerSetThreads( + parallel_runner, JxlResizableParallelRunnerSuggestThreads( + output_info.xsize, output_info.ysize)); + + status = JxlEncoderSetParallelRunner(encoder, JxlResizableParallelRunner, + parallel_runner); + if (status != JXL_ENC_SUCCESS) { + JxlResizableParallelRunnerDestroy(parallel_runner); + JxlEncoderDestroy(encoder); + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlDecoderSetParallelRunner failed: %x", status); + return FALSE; + } + + if (quality > 99) { + output_info.uses_original_profile = JXL_TRUE; + distance = 0; + } else { + output_info.uses_original_profile = JXL_FALSE; + if (quality >= 30) { + distance = 0.1 + (100 - quality) * 0.09; + } else { + distance = + 53.0 / 3000.0 * quality * quality - 23.0 / 20.0 * quality + 25.0; + } + } + + status = JxlEncoderSetBasicInfo(encoder, &output_info); + if (status != JXL_ENC_SUCCESS) { + JxlResizableParallelRunnerDestroy(parallel_runner); + JxlEncoderDestroy(encoder); + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlEncoderSetBasicInfo failed: %x", status); + return FALSE; + } + + JxlColorEncodingSetToSRGB(&color_profile, JXL_FALSE); + status = JxlEncoderSetColorEncoding(encoder, &color_profile); + if (status != JXL_ENC_SUCCESS) { + JxlResizableParallelRunnerDestroy(parallel_runner); + JxlEncoderDestroy(encoder); + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlEncoderSetColorEncoding failed: %x", status); + return FALSE; + } + + frame_settings = JxlEncoderFrameSettingsCreate(encoder, NULL); + JxlEncoderSetFrameDistance(frame_settings, distance); + JxlEncoderSetFrameLossless(frame_settings, output_info.uses_original_profile); + + status = JxlEncoderAddImageFrame(frame_settings, &pixel_format, + gdk_pixbuf_read_pixels(pixbuf), + gdk_pixbuf_get_byte_length(pixbuf)); + if (status != JXL_ENC_SUCCESS) { + JxlResizableParallelRunnerDestroy(parallel_runner); + JxlEncoderDestroy(encoder); + g_set_error(error, GDK_PIXBUF_ERROR, GDK_PIXBUF_ERROR_FAILED, + "JxlEncoderAddImageFrame failed: %x", status); + return FALSE; + } + + JxlEncoderCloseInput(encoder); + + compressed = g_byte_array_sized_new(4096); + g_byte_array_set_size(compressed, 4096); + do { + next_out = compressed->data + offset; + avail_out = compressed->len - offset; + status = JxlEncoderProcessOutput(encoder, &next_out, &avail_out); + + if (status == JXL_ENC_NEED_MORE_OUTPUT) { + offset = next_out - compressed->data; + g_byte_array_set_size(compressed, compressed->len * 2); + } else if (status == JXL_ENC_ERROR) { + JxlResizableParallelRunnerDestroy(parallel_runner); + JxlEncoderDestroy(encoder); + g_set_error(error, G_FILE_ERROR, 0, "JxlEncoderProcessOutput failed: %x", + status); + return FALSE; + } + } while (status != JXL_ENC_SUCCESS); + + JxlResizableParallelRunnerDestroy(parallel_runner); + JxlEncoderDestroy(encoder); + + g_byte_array_set_size(compressed, next_out - compressed->data); + if (compressed->len > 0) { + fwrite(compressed->data, 1, compressed->len, f); + g_byte_array_free(compressed, TRUE); + return TRUE; + } + + return FALSE; +} + +void fill_vtable(GdkPixbufModule *module) { + module->begin_load = begin_load; + module->stop_load = stop_load; + module->load_increment = load_increment; + module->is_save_option_supported = jxl_is_save_option_supported; + module->save = jxl_image_saver; +} + +void fill_info(GdkPixbufFormat *info) { + static GdkPixbufModulePattern signature[] = { + {"\xFF\x0A", " ", 100}, + {"...\x0CJXL \x0D\x0A\x87\x0A", "zzz ", 100}, + {NULL, NULL, 0}, + }; + + static gchar *mime_types[] = {"image/jxl", NULL}; + + static gchar *extensions[] = {"jxl", NULL}; + + info->name = "jxl"; + info->signature = signature; + info->description = "JPEG XL image"; + info->mime_types = mime_types; + info->extensions = extensions; + info->flags = GDK_PIXBUF_FORMAT_WRITABLE | GDK_PIXBUF_FORMAT_THREADSAFE; + info->license = "BSD-3"; +} diff --git a/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader_test.cc b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader_test.cc new file mode 100644 index 0000000000..5e5642d491 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gdk-pixbuf/pixbufloader_test.cc @@ -0,0 +1,41 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include +#include +#include +#include + +int main(int argc, char* argv[]) { + if (argc != 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + const char* loaders_cache = argv[1]; + const char* filename = argv[2]; + setenv("GDK_PIXBUF_MODULE_FILE", loaders_cache, true); + + // XDG_DATA_HOME is the path where we look for the mime cache. + // XDG_DATA_DIRS directories are used in addition to XDG_DATA_HOME. + setenv("XDG_DATA_HOME", ".", true); + setenv("XDG_DATA_DIRS", "", true); + + if (!gdk_init_check(nullptr, nullptr)) { + fprintf(stderr, "This test requires a DISPLAY\n"); + // Signals ctest that we should mark this test as skipped. + return 254; + } + GError* error = nullptr; + GdkPixbuf* pb = gdk_pixbuf_new_from_file(filename, &error); + if (pb != nullptr) { + g_object_unref(pb); + return 0; + } else { + fprintf(stderr, "Error loading file: %s\n", filename); + g_assert_no_error(error); + return 1; + } +} diff --git a/third-party/libjxl/libjxl/plugins/gimp/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/gimp/CMakeLists.txt new file mode 100644 index 0000000000..f0a49005ed --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/CMakeLists.txt @@ -0,0 +1,28 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +find_package(PkgConfig) +pkg_check_modules(Gimp IMPORTED_TARGET gimp-2.0>=2.10 gimpui-2.0>=2.10) + +if (NOT Gimp_FOUND) + message(WARNING "Gimp development libraries not found, the Gimp plugin will not be built") + return () +endif () + +add_executable(file-jxl WIN32 + common.h + common.cc + file-jxl-load.cc + file-jxl-load.h + file-jxl-save.cc + file-jxl-save.h + file-jxl.cc) +target_link_libraries(file-jxl jxl jxl_threads PkgConfig::Gimp) + +target_include_directories(file-jxl PUBLIC + ${PROJECT_SOURCE_DIR}) # for plugins/gimp absolute paths. + +pkg_get_variable(GIMP_LIB_DIR gimp-2.0 gimplibdir) +install(TARGETS file-jxl RUNTIME DESTINATION "${GIMP_LIB_DIR}/plug-ins/file-jxl/") diff --git a/third-party/libjxl/libjxl/plugins/gimp/common.cc b/third-party/libjxl/libjxl/plugins/gimp/common.cc new file mode 100644 index 0000000000..1a884570cb --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/common.cc @@ -0,0 +1,27 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "plugins/gimp/common.h" + +namespace jxl { + +JpegXlGimpProgress::JpegXlGimpProgress(const char *message) { + cur_progress = 0; + max_progress = 100; + + gimp_progress_init_printf("%s\n", message); +} + +void JpegXlGimpProgress::update() { + gimp_progress_update((float)++cur_progress / (float)max_progress); + return; +} + +void JpegXlGimpProgress::finished() { + gimp_progress_update(1.0); + return; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/plugins/gimp/common.h b/third-party/libjxl/libjxl/plugins/gimp/common.h new file mode 100644 index 0000000000..3fe63c1a47 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/common.h @@ -0,0 +1,45 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef PLUGINS_GIMP_COMMON_H_ +#define PLUGINS_GIMP_COMMON_H_ + +#include +#include +#include + +#include +#include +#include +#include + +#define PLUG_IN_BINARY "file-jxl" +#define SAVE_PROC "file-jxl-save" + +// Defined by both FUIF and glib. +#undef MAX +#undef MIN +#undef CLAMP + +#include +#include + +namespace jxl { + +class JpegXlGimpProgress { + public: + explicit JpegXlGimpProgress(const char *message); + void update(); + void finished(); + + private: + int cur_progress; + int max_progress; + +}; // class JpegXlGimpProgress + +} // namespace jxl + +#endif // PLUGINS_GIMP_COMMON_H_ diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.cc b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.cc new file mode 100644 index 0000000000..ec2ac19e69 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.cc @@ -0,0 +1,486 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "plugins/gimp/file-jxl-load.h" + +#include +#include + +#define _PROFILE_ORIGIN_ JXL_COLOR_PROFILE_TARGET_ORIGINAL +#define _PROFILE_TARGET_ JXL_COLOR_PROFILE_TARGET_DATA +#define LOAD_PROC "file-jxl-load" + +namespace jxl { + +bool SetJpegXlOutBuffer( + std::unique_ptr *dec, + JxlPixelFormat *format, size_t *buffer_size, gpointer *pixels_buffer_1) { + if (JXL_DEC_SUCCESS != + JxlDecoderImageOutBufferSize(dec->get(), format, buffer_size)) { + g_printerr(LOAD_PROC " Error: JxlDecoderImageOutBufferSize failed\n"); + return false; + } + *pixels_buffer_1 = g_malloc(*buffer_size); + if (JXL_DEC_SUCCESS != JxlDecoderSetImageOutBuffer(dec->get(), format, + *pixels_buffer_1, + *buffer_size)) { + g_printerr(LOAD_PROC " Error: JxlDecoderSetImageOutBuffer failed\n"); + return false; + } + return true; +} + +bool LoadJpegXlImage(const gchar *const filename, gint32 *const image_id) { + bool stop_processing = false; + JxlDecoderStatus status = JXL_DEC_NEED_MORE_INPUT; + std::vector icc_profile; + GimpColorProfile *profile_icc = nullptr; + GimpColorProfile *profile_int = nullptr; + bool is_linear = false; + unsigned long xsize = 0, ysize = 0; + long crop_x0 = 0, crop_y0 = 0; + size_t layer_idx = 0; + uint32_t frame_duration = 0; + double tps_denom = 1.f, tps_numer = 1.f; + + gint32 layer; + + gpointer pixels_buffer_1 = nullptr; + gpointer pixels_buffer_2 = nullptr; + size_t buffer_size = 0; + + GimpImageBaseType image_type = GIMP_RGB; + GimpImageType layer_type = GIMP_RGB_IMAGE; + GimpPrecision precision = GIMP_PRECISION_U16_GAMMA; + JxlBasicInfo info = {}; + JxlPixelFormat format = {}; + JxlAnimationHeader animation = {}; + JxlBlendMode blend_mode = JXL_BLEND_BLEND; + char *frame_name = nullptr; // will be realloced + size_t frame_name_len = 0; + + format.num_channels = 4; + format.data_type = JXL_TYPE_FLOAT; + format.endianness = JXL_NATIVE_ENDIAN; + format.align = 0; + + bool is_gray = false; + + JpegXlGimpProgress gimp_load_progress( + ("Opening JPEG XL file:" + std::string(filename)).c_str()); + gimp_load_progress.update(); + + // read file + std::ifstream instream(filename, std::ios::in | std::ios::binary); + std::vector compressed((std::istreambuf_iterator(instream)), + std::istreambuf_iterator()); + instream.close(); + + gimp_load_progress.update(); + + // multi-threaded parallel runner. + auto runner = JxlResizableParallelRunnerMake(nullptr); + + auto dec = JxlDecoderMake(nullptr); + if (JXL_DEC_SUCCESS != + JxlDecoderSubscribeEvents( + dec.get(), JXL_DEC_BASIC_INFO | JXL_DEC_COLOR_ENCODING | + JXL_DEC_FULL_IMAGE | JXL_DEC_FRAME_PROGRESSION | + JXL_DEC_FRAME)) { + g_printerr(LOAD_PROC " Error: JxlDecoderSubscribeEvents failed\n"); + return false; + } + + if (JXL_DEC_SUCCESS != JxlDecoderSetParallelRunner(dec.get(), + JxlResizableParallelRunner, + runner.get())) { + g_printerr(LOAD_PROC " Error: JxlDecoderSetParallelRunner failed\n"); + return false; + } + // TODO: make this work with coalescing set to false, while handling frames + // with duration 0 and references to earlier frames correctly. + if (JXL_DEC_SUCCESS != JxlDecoderSetCoalescing(dec.get(), JXL_TRUE)) { + g_printerr(LOAD_PROC " Error: JxlDecoderSetCoalescing failed\n"); + return false; + } + + // grand decode loop... + JxlDecoderSetInput(dec.get(), compressed.data(), compressed.size()); + + if (JXL_DEC_SUCCESS != JxlDecoderSetProgressiveDetail( + dec.get(), JxlProgressiveDetail::kPasses)) { + g_printerr(LOAD_PROC " Error: JxlDecoderSetProgressiveDetail failed\n"); + return false; + } + + while (true) { + gimp_load_progress.update(); + + if (!stop_processing) status = JxlDecoderProcessInput(dec.get()); + + if (status == JXL_DEC_BASIC_INFO) { + if (JXL_DEC_SUCCESS != JxlDecoderGetBasicInfo(dec.get(), &info)) { + g_printerr(LOAD_PROC " Error: JxlDecoderGetBasicInfo failed\n"); + return false; + } + + xsize = info.xsize; + ysize = info.ysize; + if (info.have_animation) { + animation = info.animation; + tps_denom = animation.tps_denominator; + tps_numer = animation.tps_numerator; + } + + JxlResizableParallelRunnerSetThreads( + runner.get(), JxlResizableParallelRunnerSuggestThreads(xsize, ysize)); + } else if (status == JXL_DEC_COLOR_ENCODING) { + // check for ICC profile + size_t icc_size = 0; + JxlColorEncoding color_encoding; + if (JXL_DEC_SUCCESS != + JxlDecoderGetColorAsEncodedProfile(dec.get(), _PROFILE_ORIGIN_, + &color_encoding)) { + // Attempt to load ICC profile when no internal color encoding + if (JXL_DEC_SUCCESS != JxlDecoderGetICCProfileSize( + dec.get(), _PROFILE_ORIGIN_, &icc_size)) { + g_printerr(LOAD_PROC + " Warning: JxlDecoderGetICCProfileSize failed\n"); + } + + if (icc_size > 0) { + icc_profile.resize(icc_size); + if (JXL_DEC_SUCCESS != JxlDecoderGetColorAsICCProfile( + dec.get(), _PROFILE_ORIGIN_, + icc_profile.data(), icc_profile.size())) { + g_printerr(LOAD_PROC + " Warning: JxlDecoderGetColorAsICCProfile failed\n"); + } + + profile_icc = gimp_color_profile_new_from_icc_profile( + icc_profile.data(), icc_profile.size(), nullptr); + + if (profile_icc) { + is_linear = gimp_color_profile_is_linear(profile_icc); + g_printerr(LOAD_PROC " Info: Color profile is_linear = %d\n", + is_linear); + } else { + g_printerr(LOAD_PROC " Warning: Failed to read ICC profile.\n"); + } + } else { + g_printerr(LOAD_PROC " Warning: Empty ICC data.\n"); + } + } + + // Internal color profile detection... + if (JXL_DEC_SUCCESS == + JxlDecoderGetColorAsEncodedProfile(dec.get(), _PROFILE_TARGET_, + &color_encoding)) { + g_printerr(LOAD_PROC " Info: Internal color encoding detected.\n"); + + // figure out linearity of internal profile + switch (color_encoding.transfer_function) { + case JXL_TRANSFER_FUNCTION_LINEAR: + is_linear = true; + break; + + case JXL_TRANSFER_FUNCTION_709: + case JXL_TRANSFER_FUNCTION_PQ: + case JXL_TRANSFER_FUNCTION_HLG: + case JXL_TRANSFER_FUNCTION_GAMMA: + case JXL_TRANSFER_FUNCTION_DCI: + case JXL_TRANSFER_FUNCTION_SRGB: + is_linear = false; + break; + + case JXL_TRANSFER_FUNCTION_UNKNOWN: + default: + if (profile_icc) { + g_printerr(LOAD_PROC + " Info: Unknown transfer function. " + "ICC profile is present."); + } else { + g_printerr(LOAD_PROC + " Info: Unknown transfer function. " + "No ICC profile present."); + } + break; + } + + switch (color_encoding.color_space) { + case JXL_COLOR_SPACE_RGB: + if (color_encoding.white_point == JXL_WHITE_POINT_D65 && + color_encoding.primaries == JXL_PRIMARIES_SRGB) { + if (is_linear) { + profile_int = gimp_color_profile_new_rgb_srgb_linear(); + } else { + profile_int = gimp_color_profile_new_rgb_srgb(); + } + } else if (!is_linear && + color_encoding.white_point == JXL_WHITE_POINT_D65 && + (color_encoding.primaries_green_xy[0] == 0.2100 || + color_encoding.primaries_green_xy[1] == 0.7100)) { + // Probably Adobe RGB + profile_int = gimp_color_profile_new_rgb_adobe(); + } else if (profile_icc) { + g_printerr(LOAD_PROC + " Info: Unknown RGB colorspace. " + "Using ICC profile.\n"); + } else { + g_printerr(LOAD_PROC + " Info: Unknown RGB colorspace. " + "Treating as sRGB.\n"); + if (is_linear) { + profile_int = gimp_color_profile_new_rgb_srgb_linear(); + } else { + profile_int = gimp_color_profile_new_rgb_srgb(); + } + } + break; + + case JXL_COLOR_SPACE_GRAY: + is_gray = true; + if (!profile_icc || + color_encoding.white_point == JXL_WHITE_POINT_D65) { + if (is_linear) { + profile_int = gimp_color_profile_new_d65_gray_linear(); + } else { + profile_int = gimp_color_profile_new_d65_gray_srgb_trc(); + } + } + break; + case JXL_COLOR_SPACE_XYB: + case JXL_COLOR_SPACE_UNKNOWN: + default: + if (profile_icc) { + g_printerr(LOAD_PROC + " Info: Unknown colorspace. Using ICC profile.\n"); + } else { + g_error( + LOAD_PROC + " Warning: Unknown colorspace. Treating as sRGB profile.\n"); + + if (is_linear) { + profile_int = gimp_color_profile_new_rgb_srgb_linear(); + } else { + profile_int = gimp_color_profile_new_rgb_srgb(); + } + } + break; + } + } + + // set pixel format + if (info.num_color_channels > 1) { + if (info.alpha_bits == 0) { + image_type = GIMP_RGB; + layer_type = GIMP_RGB_IMAGE; + format.num_channels = info.num_color_channels; + } else { + image_type = GIMP_RGB; + layer_type = GIMP_RGBA_IMAGE; + format.num_channels = info.num_color_channels + 1; + } + } else if (info.num_color_channels == 1) { + if (info.alpha_bits == 0) { + image_type = GIMP_GRAY; + layer_type = GIMP_GRAY_IMAGE; + format.num_channels = info.num_color_channels; + } else { + image_type = GIMP_GRAY; + layer_type = GIMP_GRAYA_IMAGE; + format.num_channels = info.num_color_channels + 1; + } + } + + // Set image bit depth and linearity + if (info.bits_per_sample <= 8) { + if (is_linear) { + precision = GIMP_PRECISION_U8_LINEAR; + } else { + precision = GIMP_PRECISION_U8_GAMMA; + } + } else if (info.bits_per_sample <= 16) { + if (info.exponent_bits_per_sample > 0) { + if (is_linear) { + precision = GIMP_PRECISION_HALF_LINEAR; + } else { + precision = GIMP_PRECISION_HALF_GAMMA; + } + } else if (is_linear) { + precision = GIMP_PRECISION_U16_LINEAR; + } else { + precision = GIMP_PRECISION_U16_GAMMA; + } + } else { + if (info.exponent_bits_per_sample > 0) { + if (is_linear) { + precision = GIMP_PRECISION_FLOAT_LINEAR; + } else { + precision = GIMP_PRECISION_FLOAT_GAMMA; + } + } else if (is_linear) { + precision = GIMP_PRECISION_U32_LINEAR; + } else { + precision = GIMP_PRECISION_U32_GAMMA; + } + } + + // create new image + if (is_linear) { + *image_id = gimp_image_new_with_precision(xsize, ysize, image_type, + GIMP_PRECISION_FLOAT_LINEAR); + } else { + *image_id = gimp_image_new_with_precision(xsize, ysize, image_type, + GIMP_PRECISION_FLOAT_GAMMA); + } + + if (profile_int) { + gimp_image_set_color_profile(*image_id, profile_int); + } else if (!profile_icc) { + g_printerr(LOAD_PROC " Warning: No color profile.\n"); + } + } else if (status == JXL_DEC_NEED_IMAGE_OUT_BUFFER) { + // get image from decoder in FLOAT + format.data_type = JXL_TYPE_FLOAT; + if (!SetJpegXlOutBuffer(&dec, &format, &buffer_size, &pixels_buffer_1)) + return false; + } else if (status == JXL_DEC_FULL_IMAGE) { + // create and insert layer + gchar *layer_name; + if (layer_idx == 0 && !info.have_animation) { + layer_name = g_strdup_printf("Background"); + } else { + const GString *blend_null_flag = g_string_new(""); + const GString *blend_replace_flag = g_string_new(" (replace)"); + const GString *blend_combine_flag = g_string_new(" (combine)"); + GString *blend; + if (blend_mode == JXL_BLEND_REPLACE) { + blend = (GString *)blend_replace_flag; + } else if (blend_mode == JXL_BLEND_BLEND) { + blend = (GString *)blend_combine_flag; + } else { + blend = (GString *)blend_null_flag; + } + char *temp_frame_name = nullptr; + bool must_free_frame_name = false; + if (frame_name_len == 0) { + temp_frame_name = g_strdup_printf("Frame %lu", layer_idx + 1); + must_free_frame_name = true; + } else { + temp_frame_name = frame_name; + } + double fduration = frame_duration * 1000.f * tps_denom / tps_numer; + layer_name = g_strdup_printf("%s (%.15gms)%s", temp_frame_name, + fduration, blend->str); + if (must_free_frame_name) free(temp_frame_name); + } + layer = gimp_layer_new(*image_id, layer_name, xsize, ysize, layer_type, + /*opacity=*/100, + gimp_image_get_default_new_layer_mode(*image_id)); + + gimp_image_insert_layer(*image_id, layer, /*parent_id=*/-1, + /*position=*/0); + + pixels_buffer_2 = g_malloc(buffer_size); + GeglBuffer *buffer = gimp_drawable_get_buffer(layer); + const Babl *destination_format = gegl_buffer_set_format(buffer, nullptr); + + std::string babl_format_str = ""; + if (is_gray) { + babl_format_str += "Y'"; + } else { + babl_format_str += "R'G'B'"; + } + if (info.alpha_bits > 0) { + babl_format_str += "A"; + } + babl_format_str += " float"; + + const Babl *source_format = babl_format(babl_format_str.c_str()); + + babl_process(babl_fish(source_format, destination_format), + pixels_buffer_1, pixels_buffer_2, xsize * ysize); + + gegl_buffer_set(buffer, GEGL_RECTANGLE(0, 0, xsize, ysize), 0, nullptr, + pixels_buffer_2, GEGL_AUTO_ROWSTRIDE); + gimp_item_transform_translate(layer, crop_x0, crop_y0); + + g_clear_object(&buffer); + g_free(pixels_buffer_1); + g_free(pixels_buffer_2); + if (stop_processing) status = JXL_DEC_SUCCESS; + g_free(layer_name); + layer_idx++; + } else if (status == JXL_DEC_FRAME) { + JxlFrameHeader frame_header; + if (JxlDecoderGetFrameHeader(dec.get(), &frame_header) != + JXL_DEC_SUCCESS) { + g_printerr(LOAD_PROC " Error: JxlDecoderSetImageOutBuffer failed\n"); + return false; + } + xsize = frame_header.layer_info.xsize; + ysize = frame_header.layer_info.ysize; + crop_x0 = frame_header.layer_info.crop_x0; + crop_y0 = frame_header.layer_info.crop_y0; + frame_duration = frame_header.duration; + blend_mode = frame_header.layer_info.blend_info.blendmode; + if (blend_mode != JXL_BLEND_BLEND && blend_mode != JXL_BLEND_REPLACE) { + g_printerr( + LOAD_PROC + " Warning: JxlDecoderGetFrameHeader: Unhandled blend mode: %d\n", + blend_mode); + } + if ((frame_name_len = frame_header.name_length) > 0) { + frame_name = (char *)realloc(frame_name, frame_name_len); + if (JXL_DEC_SUCCESS != + JxlDecoderGetFrameName(dec.get(), frame_name, frame_name_len)) { + g_printerr(LOAD_PROC "Error: JxlDecoderGetFrameName failed"); + return false; + }; + } + } else if (status == JXL_DEC_SUCCESS) { + // All decoding successfully finished. + // It's not required to call JxlDecoderReleaseInput(dec.get()) + // since the decoder will be destroyed. + break; + } else if (status == JXL_DEC_NEED_MORE_INPUT || + status == JXL_DEC_FRAME_PROGRESSION) { + stop_processing = status != JXL_DEC_FRAME_PROGRESSION; + if (JxlDecoderFlushImage(dec.get()) == JXL_DEC_SUCCESS) { + status = JXL_DEC_FULL_IMAGE; + continue; + } + g_printerr(LOAD_PROC " Error: Already provided all input\n"); + return false; + } else if (status == JXL_DEC_ERROR) { + g_printerr(LOAD_PROC " Error: Decoder error\n"); + return false; + } else { + g_printerr(LOAD_PROC " Error: Unknown decoder status\n"); + return false; + } + } // end grand decode loop + + gimp_load_progress.update(); + + if (profile_icc) { + gimp_image_set_color_profile(*image_id, profile_icc); + } + + gimp_load_progress.update(); + + // TODO(xiota): Add option to keep image as float + if (info.bits_per_sample < 32) { + gimp_image_convert_precision(*image_id, precision); + } + + gimp_image_set_filename(*image_id, filename); + + gimp_load_progress.finished(); + return true; +} + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.h b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.h new file mode 100644 index 0000000000..ef5b92fef6 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-load.h @@ -0,0 +1,17 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef PLUGINS_GIMP_FILE_JXL_LOAD_H_ +#define PLUGINS_GIMP_FILE_JXL_LOAD_H_ + +#include "plugins/gimp/common.h" + +namespace jxl { + +bool LoadJpegXlImage(const gchar* filename, gint32* image_id); + +} // namespace jxl + +#endif // PLUGINS_GIMP_FILE_JXL_LOAD_H_ diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.cc b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.cc new file mode 100644 index 0000000000..f6702283e9 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.cc @@ -0,0 +1,893 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include "plugins/gimp/file-jxl-save.h" + +#include +#include + +#include +#include + +#include "gobject/gsignal.h" + +#define PLUG_IN_BINARY "file-jxl" +#define SAVE_PROC "file-jxl-save" + +#define SCALE_WIDTH 200 + +namespace jxl { + +namespace { + +#ifndef g_clear_signal_handler +// g_clear_signal_handler was added in glib 2.62 +void g_clear_signal_handler(gulong* handler, gpointer instance) { + if (handler != nullptr && *handler != 0) { + g_signal_handler_disconnect(instance, *handler); + *handler = 0; + } +} +#endif // g_clear_signal_handler + +class JpegXlSaveOpts { + public: + float distance; + float quality; + + bool lossless = false; + bool is_linear = false; + bool has_alpha = false; + bool is_gray = false; + bool icc_attached = false; + + bool advanced_mode = false; + bool use_container = true; + bool save_exif = false; + int encoding_effort = 7; + int faster_decoding = 0; + + std::string babl_format_str = "RGB u16"; + std::string babl_type_str = "u16"; + std::string babl_model_str = "RGB"; + + JxlPixelFormat pixel_format; + JxlBasicInfo basic_info; + + // functions + JpegXlSaveOpts(); + + bool SetDistance(float dist); + bool SetQuality(float qual); + bool SetDimensions(int x, int y); + bool SetNumChannels(int channels); + + bool UpdateDistance(); + bool UpdateQuality(); + + bool SetModel(bool is_linear_); + + bool UpdateBablFormat(); + bool SetBablModel(std::string model); + bool SetBablType(std::string type); + + bool SetPrecision(int gimp_precision); + + private: +}; // class JpegXlSaveOpts + +JpegXlSaveOpts jxl_save_opts; + +class JpegXlSaveGui { + public: + bool SaveDialog(); + + private: + GtkWidget* toggle_lossless = nullptr; + GtkAdjustment* entry_distance = nullptr; + GtkAdjustment* entry_quality = nullptr; + GtkAdjustment* entry_effort = nullptr; + GtkAdjustment* entry_faster = nullptr; + GtkWidget* frame_advanced = nullptr; + GtkWidget* toggle_no_xyb = nullptr; + GtkWidget* toggle_raw = nullptr; + gulong handle_toggle_lossless = 0; + gulong handle_entry_quality = 0; + gulong handle_entry_distance = 0; + + static bool GuiOnChangeQuality(GtkAdjustment* adj_qual, void* this_pointer); + + static bool GuiOnChangeDistance(GtkAdjustment* adj_dist, void* this_pointer); + + static bool GuiOnChangeEffort(GtkAdjustment* adj_effort); + static bool GuiOnChangeLossless(GtkWidget* toggle, void* this_pointer); + static bool GuiOnChangeCodestream(GtkWidget* toggle); + static bool GuiOnChangeNoXYB(GtkWidget* toggle); + + static bool GuiOnChangeAdvancedMode(GtkWidget* toggle, void* this_pointer); +}; // class JpegXlSaveGui + +JpegXlSaveGui jxl_save_gui; + +bool JpegXlSaveGui::GuiOnChangeQuality(GtkAdjustment* adj_qual, + void* this_pointer) { + JpegXlSaveGui* self = static_cast(this_pointer); + + g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance); + g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality); + g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless); + + GtkAdjustment* adj_dist = self->entry_distance; + jxl_save_opts.SetQuality(gtk_adjustment_get_value(adj_qual)); + gtk_adjustment_set_value(adj_dist, jxl_save_opts.distance); + + self->handle_toggle_lossless = g_signal_connect( + self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self); + self->handle_entry_distance = + g_signal_connect(self->entry_distance, "value-changed", + G_CALLBACK(GuiOnChangeDistance), self); + self->handle_entry_quality = + g_signal_connect(self->entry_quality, "value-changed", + G_CALLBACK(GuiOnChangeQuality), self); + return true; +} + +bool JpegXlSaveGui::GuiOnChangeDistance(GtkAdjustment* adj_dist, + void* this_pointer) { + JpegXlSaveGui* self = static_cast(this_pointer); + GtkAdjustment* adj_qual = self->entry_quality; + + g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance); + g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality); + g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless); + + jxl_save_opts.SetDistance(gtk_adjustment_get_value(adj_dist)); + gtk_adjustment_set_value(adj_qual, jxl_save_opts.quality); + + if (!(jxl_save_opts.distance < 0.001)) { + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_lossless), + false); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false); + } + + self->handle_toggle_lossless = g_signal_connect( + self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self); + self->handle_entry_distance = + g_signal_connect(self->entry_distance, "value-changed", + G_CALLBACK(GuiOnChangeDistance), self); + self->handle_entry_quality = + g_signal_connect(self->entry_quality, "value-changed", + G_CALLBACK(GuiOnChangeQuality), self); + return true; +} + +bool JpegXlSaveGui::GuiOnChangeEffort(GtkAdjustment* adj_effort) { + float new_effort = 10 - gtk_adjustment_get_value(adj_effort); + jxl_save_opts.encoding_effort = new_effort; + return true; +} + +bool JpegXlSaveGui::GuiOnChangeLossless(GtkWidget* toggle, void* this_pointer) { + JpegXlSaveGui* self = static_cast(this_pointer); + GtkAdjustment* adj_distance = self->entry_distance; + GtkAdjustment* adj_quality = self->entry_quality; + GtkAdjustment* adj_effort = self->entry_effort; + + jxl_save_opts.lossless = + gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle)); + + g_clear_signal_handler(&self->handle_entry_distance, self->entry_distance); + g_clear_signal_handler(&self->handle_entry_quality, self->entry_quality); + g_clear_signal_handler(&self->handle_toggle_lossless, self->toggle_lossless); + + if (jxl_save_opts.lossless) { + gtk_adjustment_set_value(adj_quality, 100.0); + gtk_adjustment_set_value(adj_distance, 0.0); + jxl_save_opts.distance = 0; + jxl_save_opts.UpdateQuality(); + gtk_adjustment_set_value(adj_effort, 7); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), true); + } else { + gtk_adjustment_set_value(adj_quality, 90.0); + gtk_adjustment_set_value(adj_distance, 1.0); + jxl_save_opts.distance = 1.0; + jxl_save_opts.UpdateQuality(); + gtk_adjustment_set_value(adj_effort, 3); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false); + } + self->handle_toggle_lossless = g_signal_connect( + self->toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), self); + self->handle_entry_distance = + g_signal_connect(self->entry_distance, "value-changed", + G_CALLBACK(GuiOnChangeDistance), self); + self->handle_entry_quality = + g_signal_connect(self->entry_quality, "value-changed", + G_CALLBACK(GuiOnChangeQuality), self); + return true; +} + +bool JpegXlSaveGui::GuiOnChangeCodestream(GtkWidget* toggle) { + jxl_save_opts.use_container = + !gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle)); + return true; +} + +bool JpegXlSaveGui::GuiOnChangeNoXYB(GtkWidget* toggle) { + jxl_save_opts.basic_info.uses_original_profile = + gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle)); + return true; +} + +bool JpegXlSaveGui::GuiOnChangeAdvancedMode(GtkWidget* toggle, + void* this_pointer) { + JpegXlSaveGui* self = static_cast(this_pointer); + jxl_save_opts.advanced_mode = + gtk_toggle_button_get_active(GTK_TOGGLE_BUTTON(toggle)); + + gtk_widget_set_sensitive(self->frame_advanced, jxl_save_opts.advanced_mode); + + if (!jxl_save_opts.advanced_mode) { + jxl_save_opts.basic_info.uses_original_profile = false; + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_no_xyb), false); + + jxl_save_opts.use_container = true; + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(self->toggle_raw), false); + + jxl_save_opts.faster_decoding = 0; + gtk_adjustment_set_value(GTK_ADJUSTMENT(self->entry_faster), 0); + } + return true; +} + +bool JpegXlSaveGui::SaveDialog() { + gboolean run; + GtkWidget* dialog; + GtkWidget* content_area; + GtkWidget* main_vbox; + GtkWidget* frame; + GtkWidget* toggle; + GtkWidget* table; + GtkWidget* vbox; + GtkWidget* separator; + + // initialize export dialog + gimp_ui_init(PLUG_IN_BINARY, true); + dialog = gimp_export_dialog_new("JPEG XL", PLUG_IN_BINARY, SAVE_PROC); + + gtk_window_set_resizable(GTK_WINDOW(dialog), false); + content_area = gimp_export_dialog_get_content_area(dialog); + + main_vbox = gtk_vbox_new(false, 6); + gtk_container_set_border_width(GTK_CONTAINER(main_vbox), 6); + gtk_box_pack_start(GTK_BOX(content_area), main_vbox, true, true, 0); + gtk_widget_show(main_vbox); + + // Standard Settings Frame + frame = gtk_frame_new(nullptr); + gtk_frame_set_shadow_type(GTK_FRAME(frame), GTK_SHADOW_ETCHED_IN); + gtk_box_pack_start(GTK_BOX(main_vbox), frame, false, false, 0); + gtk_widget_show(frame); + + vbox = gtk_vbox_new(false, 6); + gtk_container_set_border_width(GTK_CONTAINER(vbox), 6); + gtk_container_add(GTK_CONTAINER(frame), vbox); + gtk_widget_show(vbox); + + // Layout Table + table = gtk_table_new(20, 3, false); + gtk_table_set_col_spacings(GTK_TABLE(table), 6); + gtk_box_pack_start(GTK_BOX(vbox), table, false, false, 0); + gtk_widget_show(table); + + // Distance Slider + static gchar distance_help[] = + "Butteraugli distance target. Suggested values:" + "\n\td\u00A0=\u00A00.3\tExcellent" + "\n\td\u00A0=\u00A01\tVery Good" + "\n\td\u00A0=\u00A02\tGood" + "\n\td\u00A0=\u00A03\tFair" + "\n\td\u00A0=\u00A06\tPoor"; + + entry_distance = (GtkAdjustment*)gimp_scale_entry_new( + GTK_TABLE(table), 0, 0, "Distance", SCALE_WIDTH, 0, + jxl_save_opts.distance, 0.0, 15.0, 0.001, 1.0, 3, true, 0.0, 0.0, + distance_help, SAVE_PROC); + gimp_scale_entry_set_logarithmic((GtkObject*)entry_distance, true); + + // Quality Slider + static gchar quality_help[] = + "JPEG-style Quality is remapped to distance. " + "Values roughly match libjpeg quality settings."; + entry_quality = (GtkAdjustment*)gimp_scale_entry_new( + GTK_TABLE(table), 0, 1, "Quality", SCALE_WIDTH, 0, jxl_save_opts.quality, + 8.26, 100.0, 1.0, 10.0, 2, true, 0.0, 0.0, quality_help, SAVE_PROC); + + // Distance and Quality Signals + handle_entry_distance = g_signal_connect( + entry_distance, "value-changed", G_CALLBACK(GuiOnChangeDistance), this); + handle_entry_quality = g_signal_connect(entry_quality, "value-changed", + G_CALLBACK(GuiOnChangeQuality), this); + + // ---------- + separator = gtk_vseparator_new(); + gtk_table_attach(GTK_TABLE(table), separator, 0, 2, 2, 3, GTK_EXPAND, + GTK_EXPAND, 9, 9); + gtk_widget_show(separator); + + // Encoding Effort / Speed + static gchar effort_help[] = + "Adjust encoding speed. Higher values are faster because " + "the encoder uses less effort to hit distance targets. " + "As\u00A0a\u00A0result, image quality may be decreased. " + "Default\u00A0=\u00A03."; + entry_effort = (GtkAdjustment*)gimp_scale_entry_new( + GTK_TABLE(table), 0, 3, "Speed", SCALE_WIDTH, 0, + 10 - jxl_save_opts.encoding_effort, 1, 9, 1, 2, 0, true, 0.0, 0.0, + effort_help, SAVE_PROC); + + // effort signal + g_signal_connect(entry_effort, "value-changed", G_CALLBACK(GuiOnChangeEffort), + nullptr); + + // ---------- + separator = gtk_vseparator_new(); + gtk_table_attach(GTK_TABLE(table), separator, 0, 2, 4, 5, GTK_EXPAND, + GTK_EXPAND, 9, 9); + gtk_widget_show(separator); + + // Lossless Mode Convenience Checkbox + static gchar lossless_help[] = + "Compress using modular lossless mode. " + "Speed\u00A0is adjusted to improve performance."; + toggle_lossless = gtk_check_button_new_with_label("Lossless Mode"); + gimp_help_set_help_data(toggle_lossless, lossless_help, nullptr); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_lossless), + jxl_save_opts.lossless); + gtk_table_attach_defaults(GTK_TABLE(table), toggle_lossless, 0, 2, 5, 6); + gtk_widget_show(toggle_lossless); + + // lossless signal + handle_toggle_lossless = g_signal_connect( + toggle_lossless, "toggled", G_CALLBACK(GuiOnChangeLossless), this); + + // ---------- + separator = gtk_vseparator_new(); + gtk_box_pack_start(GTK_BOX(main_vbox), separator, false, false, 1); + gtk_widget_show(separator); + + // Advanced Settings Frame + frame_advanced = gtk_frame_new("Advanced Settings"); + gimp_help_set_help_data(frame_advanced, + "Some advanced settings may produce malformed files.", + nullptr); + gtk_frame_set_shadow_type(GTK_FRAME(frame_advanced), GTK_SHADOW_ETCHED_IN); + gtk_box_pack_start(GTK_BOX(main_vbox), frame_advanced, true, true, 0); + gtk_widget_show(frame_advanced); + + gtk_widget_set_sensitive(frame_advanced, false); + + vbox = gtk_vbox_new(false, 6); + gtk_container_set_border_width(GTK_CONTAINER(vbox), 6); + gtk_container_add(GTK_CONTAINER(frame_advanced), vbox); + gtk_widget_show(vbox); + + // uses_original_profile + static gchar uses_original_profile_help[] = + "Prevents conversion to the XYB colorspace. " + "File sizes are approximately doubled."; + toggle_no_xyb = gtk_check_button_new_with_label("Do not use XYB colorspace"); + gimp_help_set_help_data(toggle_no_xyb, uses_original_profile_help, nullptr); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_no_xyb), + jxl_save_opts.basic_info.uses_original_profile); + gtk_box_pack_start(GTK_BOX(vbox), toggle_no_xyb, false, false, 0); + gtk_widget_show(toggle_no_xyb); + + g_signal_connect(toggle_no_xyb, "toggled", G_CALLBACK(GuiOnChangeNoXYB), + nullptr); + + // save raw codestream + static gchar codestream_help[] = + "Save the raw codestream, without a container. " + "The container is required for metadata and some other features."; + toggle_raw = gtk_check_button_new_with_label("Save Raw Codestream"); + gimp_help_set_help_data(toggle_raw, codestream_help, nullptr); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle_raw), + !jxl_save_opts.use_container); + gtk_box_pack_start(GTK_BOX(vbox), toggle_raw, false, false, 0); + gtk_widget_show(toggle_raw); + + g_signal_connect(toggle_raw, "toggled", G_CALLBACK(GuiOnChangeCodestream), + nullptr); + + // ---------- + separator = gtk_vseparator_new(); + gtk_box_pack_start(GTK_BOX(vbox), separator, false, false, 1); + gtk_widget_show(separator); + + // Faster Decoding / Decoding Speed + static gchar faster_help[] = + "Improve decoding speed at the expense of quality. " + "Default\u00A0=\u00A00."; + table = gtk_table_new(1, 3, false); + gtk_table_set_col_spacings(GTK_TABLE(table), 6); + gtk_container_add(GTK_CONTAINER(vbox), table); + gtk_widget_show(table); + + entry_faster = (GtkAdjustment*)gimp_scale_entry_new( + GTK_TABLE(table), 0, 0, "Faster Decoding", SCALE_WIDTH, 0, + jxl_save_opts.faster_decoding, 0, 4, 1, 1, 0, true, 0.0, 0.0, faster_help, + SAVE_PROC); + + // Faster Decoding Signals + g_signal_connect(entry_faster, "value-changed", + G_CALLBACK(gimp_int_adjustment_update), + &jxl_save_opts.faster_decoding); + + // Enable Advanced Settings + frame = gtk_frame_new(nullptr); + gtk_frame_set_shadow_type(GTK_FRAME(frame), GTK_SHADOW_NONE); + gtk_box_pack_start(GTK_BOX(main_vbox), frame, true, true, 0); + gtk_widget_show(frame); + + vbox = gtk_vbox_new(false, 6); + gtk_container_set_border_width(GTK_CONTAINER(vbox), 6); + gtk_container_add(GTK_CONTAINER(frame), vbox); + gtk_widget_show(vbox); + + static gchar advanced_help[] = + "Some advanced settings may produce malformed files."; + toggle = gtk_check_button_new_with_label("Enable Advanced Settings"); + gimp_help_set_help_data(toggle, advanced_help, nullptr); + gtk_toggle_button_set_active(GTK_TOGGLE_BUTTON(toggle), + jxl_save_opts.advanced_mode); + gtk_box_pack_start(GTK_BOX(vbox), toggle, false, false, 0); + gtk_widget_show(toggle); + + g_signal_connect(toggle, "toggled", G_CALLBACK(GuiOnChangeAdvancedMode), + this); + + // show dialog + gtk_widget_show(dialog); + + GtkAllocation allocation; + gtk_widget_get_allocation(dialog, &allocation); + + int height = allocation.height; + gtk_widget_set_size_request(dialog, height * 1.5, height); + + run = (gimp_dialog_run(GIMP_DIALOG(dialog)) == GTK_RESPONSE_OK); + gtk_widget_destroy(dialog); + + return run; +} // JpegXlSaveGui::SaveDialog + +JpegXlSaveOpts::JpegXlSaveOpts() { + SetDistance(1.0); + + pixel_format.num_channels = 4; + pixel_format.data_type = JXL_TYPE_FLOAT; + pixel_format.endianness = JXL_NATIVE_ENDIAN; + pixel_format.align = 0; + + JxlEncoderInitBasicInfo(&basic_info); + return; +} // JpegXlSaveOpts constructor + +bool JpegXlSaveOpts::SetModel(bool is_linear_) { + int channels; + std::string model; + + if (is_gray) { + channels = 1; + if (is_linear_) { + model = "Y"; + } else { + model = "Y'"; + } + } else { + channels = 3; + if (is_linear_) { + model = "RGB"; + } else { + model = "R'G'B'"; + } + } + if (has_alpha) { + SetBablModel(model + "A"); + SetNumChannels(channels + 1); + } else { + SetBablModel(model); + SetNumChannels(channels); + } + return true; +} // JpegXlSaveOpts::SetModel + +bool JpegXlSaveOpts::SetDistance(float dist) { + distance = dist; + return UpdateQuality(); +} + +bool JpegXlSaveOpts::SetQuality(float qual) { + quality = qual; + return UpdateDistance(); +} + +bool JpegXlSaveOpts::UpdateQuality() { + float qual; + + if (distance < 0.1) { + qual = 100; + } else if (distance > 6.4) { + qual = -5.0 / 53.0 * sqrt(6360.0 * distance - 39975.0) + 1725.0 / 53.0; + lossless = false; + } else { + qual = 100 - (distance - 0.1) / 0.09; + lossless = false; + } + + if (qual < 0) { + quality = 0.0; + } else if (qual >= 100) { + quality = 100.0; + } else { + quality = qual; + } + + return true; +} + +bool JpegXlSaveOpts::UpdateDistance() { + float dist; + if (quality >= 30) { + dist = 0.1 + (100 - quality) * 0.09; + } else { + dist = 53.0 / 3000.0 * quality * quality - 23.0 / 20.0 * quality + 25.0; + } + + if (dist > 25) { + distance = 25; + } else { + distance = dist; + } + return true; +} + +bool JpegXlSaveOpts::SetDimensions(int x, int y) { + basic_info.xsize = x; + basic_info.ysize = y; + return true; +} + +bool JpegXlSaveOpts::SetNumChannels(int channels) { + switch (channels) { + case 1: + pixel_format.num_channels = 1; + basic_info.num_color_channels = 1; + basic_info.num_extra_channels = 0; + basic_info.alpha_bits = 0; + basic_info.alpha_exponent_bits = 0; + break; + case 2: + pixel_format.num_channels = 2; + basic_info.num_color_channels = 1; + basic_info.num_extra_channels = 1; + basic_info.alpha_bits = int(std::fmin(16, basic_info.bits_per_sample)); + basic_info.alpha_exponent_bits = 0; + break; + case 3: + pixel_format.num_channels = 3; + basic_info.num_color_channels = 3; + basic_info.num_extra_channels = 0; + basic_info.alpha_bits = 0; + basic_info.alpha_exponent_bits = 0; + break; + case 4: + pixel_format.num_channels = 4; + basic_info.num_color_channels = 3; + basic_info.num_extra_channels = 1; + basic_info.alpha_bits = int(std::fmin(16, basic_info.bits_per_sample)); + basic_info.alpha_exponent_bits = 0; + break; + default: + SetNumChannels(3); + } // switch + return true; +} // JpegXlSaveOpts::SetNumChannels + +bool JpegXlSaveOpts::UpdateBablFormat() { + babl_format_str = babl_model_str + " " + babl_type_str; + return true; +} + +bool JpegXlSaveOpts::SetBablModel(std::string model) { + babl_model_str = std::move(model); + return UpdateBablFormat(); +} + +bool JpegXlSaveOpts::SetBablType(std::string type) { + babl_type_str = std::move(type); + return UpdateBablFormat(); +} + +bool JpegXlSaveOpts::SetPrecision(int gimp_precision) { + switch (gimp_precision) { + case GIMP_PRECISION_HALF_GAMMA: + case GIMP_PRECISION_HALF_LINEAR: + basic_info.bits_per_sample = 16; + basic_info.exponent_bits_per_sample = 5; + break; + + // UINT32 not supported by encoder; using FLOAT instead + case GIMP_PRECISION_U32_GAMMA: + case GIMP_PRECISION_U32_LINEAR: + case GIMP_PRECISION_FLOAT_GAMMA: + case GIMP_PRECISION_FLOAT_LINEAR: + basic_info.bits_per_sample = 32; + basic_info.exponent_bits_per_sample = 8; + break; + + case GIMP_PRECISION_U16_GAMMA: + case GIMP_PRECISION_U16_LINEAR: + basic_info.bits_per_sample = 16; + basic_info.exponent_bits_per_sample = 0; + break; + + default: + case GIMP_PRECISION_U8_LINEAR: + case GIMP_PRECISION_U8_GAMMA: + basic_info.bits_per_sample = 8; + basic_info.exponent_bits_per_sample = 0; + break; + } + return true; +} // JpegXlSaveOpts::SetPrecision + +} // namespace + +bool SaveJpegXlImage(const gint32 image_id, const gint32 drawable_id, + const gint32 orig_image_id, const gchar* const filename) { + if (!jxl_save_gui.SaveDialog()) { + return true; + } + + gint32 nlayers; + gint32* layers; + gint32 duplicate = gimp_image_duplicate(image_id); + + JpegXlGimpProgress gimp_save_progress( + ("Saving JPEG XL file:" + std::string(filename)).c_str()); + gimp_save_progress.update(); + + // try to get ICC color profile... + std::vector icc; + + GimpColorProfile* profile = gimp_image_get_effective_color_profile(image_id); + jxl_save_opts.is_gray = gimp_color_profile_is_gray(profile); + jxl_save_opts.is_linear = gimp_color_profile_is_linear(profile); + + profile = gimp_image_get_color_profile(image_id); + if (profile) { + g_printerr(SAVE_PROC " Info: Extracting ICC Profile...\n"); + gsize icc_size; + const guint8* const icc_bytes = + gimp_color_profile_get_icc_profile(profile, &icc_size); + + icc.assign(icc_bytes, icc_bytes + icc_size); + } else { + g_printerr(SAVE_PROC " Info: No ICC profile. Exporting image anyway.\n"); + } + + gimp_save_progress.update(); + + jxl_save_opts.SetDimensions(gimp_image_width(image_id), + gimp_image_height(image_id)); + + jxl_save_opts.SetPrecision(gimp_image_get_precision(image_id)); + layers = gimp_image_get_layers(duplicate, &nlayers); + + for (int i = 0; i < nlayers; i++) { + if (gimp_drawable_has_alpha(layers[i])) { + jxl_save_opts.has_alpha = true; + break; + } + } + + gimp_save_progress.update(); + + // layers need to match image size, for now + for (int i = 0; i < nlayers; i++) { + gimp_layer_resize_to_image_size(layers[i]); + } + + // treat layers as animation frames, for now + if (nlayers > 1) { + jxl_save_opts.basic_info.have_animation = true; + jxl_save_opts.basic_info.animation.tps_numerator = 100; + } + + gimp_save_progress.update(); + + // multi-threaded parallel runner. + auto runner = JxlResizableParallelRunnerMake(nullptr); + + JxlResizableParallelRunnerSetThreads( + runner.get(), + JxlResizableParallelRunnerSuggestThreads(jxl_save_opts.basic_info.xsize, + jxl_save_opts.basic_info.ysize)); + + auto enc = JxlEncoderMake(/*memory_manager=*/nullptr); + JxlEncoderUseContainer(enc.get(), jxl_save_opts.use_container); + + if (JXL_ENC_SUCCESS != JxlEncoderSetParallelRunner(enc.get(), + JxlResizableParallelRunner, + runner.get())) { + g_printerr(SAVE_PROC " Error: JxlEncoderSetParallelRunner failed\n"); + return false; + } + + // this sets some basic_info properties + jxl_save_opts.SetModel(jxl_save_opts.is_linear); + + if (JXL_ENC_SUCCESS != + JxlEncoderSetBasicInfo(enc.get(), &jxl_save_opts.basic_info)) { + g_printerr(SAVE_PROC " Error: JxlEncoderSetBasicInfo failed\n"); + return false; + } + + // try to use ICC profile + if (!icc.empty() && !jxl_save_opts.is_gray) { + if (JXL_ENC_SUCCESS == + JxlEncoderSetICCProfile(enc.get(), icc.data(), icc.size())) { + jxl_save_opts.icc_attached = true; + } else { + g_printerr(SAVE_PROC " Warning: JxlEncoderSetICCProfile failed.\n"); + jxl_save_opts.basic_info.uses_original_profile = false; + jxl_save_opts.lossless = false; + } + } else { + g_printerr(SAVE_PROC " Warning: Using internal profile.\n"); + jxl_save_opts.basic_info.uses_original_profile = false; + jxl_save_opts.lossless = false; + } + + // set up internal color profile + JxlColorEncoding color_encoding = {}; + + if (jxl_save_opts.is_linear) { + JxlColorEncodingSetToLinearSRGB(&color_encoding, jxl_save_opts.is_gray); + } else { + JxlColorEncodingSetToSRGB(&color_encoding, jxl_save_opts.is_gray); + } + + if (JXL_ENC_SUCCESS != + JxlEncoderSetColorEncoding(enc.get(), &color_encoding)) { + g_printerr(SAVE_PROC " Warning: JxlEncoderSetColorEncoding failed\n"); + } + + // set encoder options + JxlEncoderFrameSettings* frame_settings; + frame_settings = JxlEncoderFrameSettingsCreate(enc.get(), nullptr); + + JxlEncoderFrameSettingsSetOption(frame_settings, JXL_ENC_FRAME_SETTING_EFFORT, + jxl_save_opts.encoding_effort); + JxlEncoderFrameSettingsSetOption(frame_settings, + JXL_ENC_FRAME_SETTING_DECODING_SPEED, + jxl_save_opts.faster_decoding); + + // lossless mode + if (jxl_save_opts.lossless || jxl_save_opts.distance < 0.01) { + if (jxl_save_opts.basic_info.exponent_bits_per_sample > 0) { + // lossless mode doesn't work well with floating point + jxl_save_opts.distance = 0.01; + jxl_save_opts.lossless = false; + JxlEncoderSetFrameLossless(frame_settings, false); + JxlEncoderSetFrameDistance(frame_settings, 0.01); + } else { + JxlEncoderSetFrameDistance(frame_settings, 0); + JxlEncoderSetFrameLossless(frame_settings, true); + } + } else { + jxl_save_opts.lossless = false; + JxlEncoderSetFrameLossless(frame_settings, false); + JxlEncoderSetFrameDistance(frame_settings, jxl_save_opts.distance); + } + + // convert precision and colorspace + if (jxl_save_opts.is_linear && + jxl_save_opts.basic_info.bits_per_sample < 32) { + gimp_image_convert_precision(duplicate, GIMP_PRECISION_FLOAT_LINEAR); + } else { + gimp_image_convert_precision(duplicate, GIMP_PRECISION_FLOAT_GAMMA); + } + + // process layers and compress into JXL + size_t buffer_size = + jxl_save_opts.basic_info.xsize * jxl_save_opts.basic_info.ysize * + jxl_save_opts.pixel_format.num_channels * 4; // bytes per sample + + for (int i = nlayers - 1; i >= 0; i--) { + gimp_save_progress.update(); + + // copy image into buffer... + gpointer pixels_buffer_1; + gpointer pixels_buffer_2; + pixels_buffer_1 = g_malloc(buffer_size); + pixels_buffer_2 = g_malloc(buffer_size); + + gimp_layer_resize_to_image_size(layers[i]); + + GeglBuffer* buffer = gimp_drawable_get_buffer(layers[i]); + + // using gegl_buffer_set_format to get the format because + // gegl_buffer_get_format doesn't always get the original format + const Babl* native_format = gegl_buffer_set_format(buffer, nullptr); + + gegl_buffer_get(buffer, + GEGL_RECTANGLE(0, 0, jxl_save_opts.basic_info.xsize, + jxl_save_opts.basic_info.ysize), + 1.0, native_format, pixels_buffer_1, GEGL_AUTO_ROWSTRIDE, + GEGL_ABYSS_NONE); + g_clear_object(&buffer); + + // use babl to fix gamma mismatch issues + jxl_save_opts.SetModel(jxl_save_opts.is_linear); + jxl_save_opts.pixel_format.data_type = JXL_TYPE_FLOAT; + jxl_save_opts.SetBablType("float"); + const Babl* destination_format = + babl_format(jxl_save_opts.babl_format_str.c_str()); + + babl_process( + babl_fish(native_format, destination_format), pixels_buffer_1, + pixels_buffer_2, + jxl_save_opts.basic_info.xsize * jxl_save_opts.basic_info.ysize); + + gimp_save_progress.update(); + + // send layer to encoder + if (JXL_ENC_SUCCESS != + JxlEncoderAddImageFrame(frame_settings, &jxl_save_opts.pixel_format, + pixels_buffer_2, buffer_size)) { + g_printerr(SAVE_PROC " Error: JxlEncoderAddImageFrame failed\n"); + return false; + } + } + + JxlEncoderCloseInput(enc.get()); + + // get data from encoder + std::vector compressed; + compressed.resize(262144); + uint8_t* next_out = compressed.data(); + size_t avail_out = compressed.size(); + + JxlEncoderStatus process_result = JXL_ENC_NEED_MORE_OUTPUT; + while (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + gimp_save_progress.update(); + + process_result = JxlEncoderProcessOutput(enc.get(), &next_out, &avail_out); + if (process_result == JXL_ENC_NEED_MORE_OUTPUT) { + size_t offset = next_out - compressed.data(); + compressed.resize(compressed.size() + 262144); + next_out = compressed.data() + offset; + avail_out = compressed.size() - offset; + } + } + compressed.resize(next_out - compressed.data()); + + if (JXL_ENC_SUCCESS != process_result) { + g_printerr(SAVE_PROC " Error: JxlEncoderProcessOutput failed\n"); + return false; + } + + // write file + std::ofstream outstream(filename, std::ios::out | std::ios::binary); + copy(compressed.begin(), compressed.end(), + std::ostream_iterator(outstream)); + + gimp_save_progress.finished(); + return true; +} // SaveJpegXlImage() + +} // namespace jxl diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.h b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.h new file mode 100644 index 0000000000..c9d0e8091f --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl-save.h @@ -0,0 +1,18 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#ifndef PLUGINS_GIMP_FILE_JXL_SAVE_H_ +#define PLUGINS_GIMP_FILE_JXL_SAVE_H_ + +#include "plugins/gimp/common.h" + +namespace jxl { + +bool SaveJpegXlImage(gint32 image_id, gint32 drawable_id, gint32 orig_image_id, + const gchar* filename); + +} // namespace jxl + +#endif // PLUGINS_GIMP_FILE_JXL_SAVE_H_ diff --git a/third-party/libjxl/libjxl/plugins/gimp/file-jxl.cc b/third-party/libjxl/libjxl/plugins/gimp/file-jxl.cc new file mode 100644 index 0000000000..743495a2e0 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/gimp/file-jxl.cc @@ -0,0 +1,157 @@ +// Copyright (c) the JPEG XL Project Authors. All rights reserved. +// +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +#include + +#include + +#include "plugins/gimp/common.h" +#include "plugins/gimp/file-jxl-load.h" +#include "plugins/gimp/file-jxl-save.h" + +namespace jxl { +namespace { + +constexpr char kLoadProc[] = "file-jxl-load"; +constexpr char kSaveProc[] = "file-jxl-save"; + +void Query() { + { + static char run_mode_name[] = "run-mode"; + static char run_mode_description[] = "Run mode"; + static char filename_name[] = "filename"; + static char filename_description[] = "The name of the file to load"; + static char raw_filename_name[] = "raw-filename"; + static char raw_filename_description[] = + "The name of the file, as entered by the user"; + static const GimpParamDef load_args[] = { + {GIMP_PDB_INT32, run_mode_name, run_mode_description}, + {GIMP_PDB_STRING, filename_name, filename_description}, + {GIMP_PDB_STRING, raw_filename_name, raw_filename_description}, + }; + static char image_name[] = "image"; + static char image_description[] = "Loaded image"; + static const GimpParamDef load_return_vals[] = { + {GIMP_PDB_IMAGE, image_name, image_description}, + }; + + gimp_install_procedure( + /*name=*/kLoadProc, /*blurb=*/"Loads JPEG XL image files", + /*help=*/"Loads JPEG XL image files", /*author=*/"JPEG XL Project", + /*copyright=*/"JPEG XL Project", /*date=*/"2019", + /*menu_label=*/"JPEG XL image", /*image_types=*/nullptr, + /*type=*/GIMP_PLUGIN, /*n_params=*/G_N_ELEMENTS(load_args), + /*n_return_vals=*/G_N_ELEMENTS(load_return_vals), /*params=*/load_args, + /*return_vals=*/load_return_vals); + gimp_register_file_handler_mime(kLoadProc, "image/jxl"); + gimp_register_magic_load_handler( + kLoadProc, "jxl", "", + "0,string,\xFF\x0A," + "0,string,\\000\\000\\000\x0CJXL\\040\\015\\012\x87\\012"); + } + + { + static char run_mode_name[] = "run-mode"; + static char run_mode_description[] = "Run mode"; + static char image_name[] = "image"; + static char image_description[] = "Input image"; + static char drawable_name[] = "drawable"; + static char drawable_description[] = "Drawable to save"; + static char filename_name[] = "filename"; + static char filename_description[] = "The name of the file to save"; + static char raw_filename_name[] = "raw-filename"; + static char raw_filename_description[] = "The name of the file to save"; + static const GimpParamDef save_args[] = { + {GIMP_PDB_INT32, run_mode_name, run_mode_description}, + {GIMP_PDB_IMAGE, image_name, image_description}, + {GIMP_PDB_DRAWABLE, drawable_name, drawable_description}, + {GIMP_PDB_STRING, filename_name, filename_description}, + {GIMP_PDB_STRING, raw_filename_name, raw_filename_description}, + }; + + gimp_install_procedure( + /*name=*/kSaveProc, /*blurb=*/"Saves JPEG XL image files", + /*help=*/"Saves JPEG XL image files", /*author=*/"JPEG XL Project", + /*copyright=*/"JPEG XL Project", /*date=*/"2019", + /*menu_label=*/"JPEG XL image", /*image_types=*/"RGB*, GRAY*", + /*type=*/GIMP_PLUGIN, /*n_params=*/G_N_ELEMENTS(save_args), + /*n_return_vals=*/0, /*params=*/save_args, + /*return_vals=*/nullptr); + gimp_register_file_handler_mime(kSaveProc, "image/jxl"); + gimp_register_save_handler(kSaveProc, "jxl", ""); + } +} + +void Run(const gchar* const name, const gint nparams, + const GimpParam* const params, gint* const nreturn_vals, + GimpParam** const return_vals) { + gegl_init(nullptr, nullptr); + + static GimpParam values[2]; + + *nreturn_vals = 1; + *return_vals = values; + + values[0].type = GIMP_PDB_STATUS; + values[0].data.d_status = GIMP_PDB_EXECUTION_ERROR; + + if (strcmp(name, kLoadProc) == 0) { + if (nparams != 3) { + values[0].data.d_status = GIMP_PDB_CALLING_ERROR; + return; + } + + const gchar* const filename = params[1].data.d_string; + gint32 image_id; + if (!LoadJpegXlImage(filename, &image_id)) { + values[0].data.d_status = GIMP_PDB_EXECUTION_ERROR; + return; + } + + *nreturn_vals = 2; + values[0].data.d_status = GIMP_PDB_SUCCESS; + values[1].type = GIMP_PDB_IMAGE; + values[1].data.d_image = image_id; + } else if (strcmp(name, kSaveProc) == 0) { + if (nparams != 5) { + values[0].data.d_status = GIMP_PDB_CALLING_ERROR; + return; + } + + gint32 image_id = params[1].data.d_image; + gint32 drawable_id = params[2].data.d_drawable; + const gchar* const filename = params[3].data.d_string; + const gint32 orig_image_id = image_id; + const GimpExportReturn export_result = gimp_export_image( + &image_id, &drawable_id, "JPEG XL", + static_cast(GIMP_EXPORT_CAN_HANDLE_RGB | + GIMP_EXPORT_CAN_HANDLE_GRAY | + GIMP_EXPORT_CAN_HANDLE_ALPHA)); + switch (export_result) { + case GIMP_EXPORT_CANCEL: + values[0].data.d_status = GIMP_PDB_CANCEL; + return; + case GIMP_EXPORT_IGNORE: + break; + case GIMP_EXPORT_EXPORT: + break; + } + if (!SaveJpegXlImage(image_id, drawable_id, orig_image_id, filename)) { + return; + } + if (image_id != orig_image_id) { + gimp_image_delete(image_id); + } + values[0].data.d_status = GIMP_PDB_SUCCESS; + } +} + +} // namespace +} // namespace jxl + +static const GimpPlugInInfo PLUG_IN_INFO = {nullptr, nullptr, &jxl::Query, + &jxl::Run}; + +MAIN() diff --git a/third-party/libjxl/libjxl/plugins/mime/CMakeLists.txt b/third-party/libjxl/libjxl/plugins/mime/CMakeLists.txt new file mode 100644 index 0000000000..6f2a0f919c --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/mime/CMakeLists.txt @@ -0,0 +1,6 @@ +# Copyright (c) the JPEG XL Project Authors. All rights reserved. +# +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +install(FILES image-jxl.xml DESTINATION share/mime/packages/) diff --git a/third-party/libjxl/libjxl/plugins/mime/README.md b/third-party/libjxl/libjxl/plugins/mime/README.md new file mode 100644 index 0000000000..4d398c7b90 --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/mime/README.md @@ -0,0 +1,37 @@ +## :warning: Not needed anymore + +As `image/jxl` is now supported by [shared-mine-info 2.2](https://gitlab.freedesktop.org/xdg/shared-mime-info/-/releases/2.2), it should not be necessary anymore to install this plugin. + +You can test if your system correctly understand the MIME type of JPEG XL image by obtaining a JPEG XL image, e.g. with +```bash +wget https://raw.githubusercontent.com/libjxl/conformance/master/testcases/bicycles/input.jxl +``` +and with that sample JPEG XL file `input.jxl` (or any other valid JPEG XL file), run any of the following commands: +```bash +xdg-mime query filetype input.jxl +file --mime-type input.jxl +mimetype input.jxl +``` +If the output contains `image/jxl` you are all set! + + +## JPEG XL MIME type + +If not already installed by the [Installing section of BUILDING.md](../../BUILDING.md#installing), then it can be done manually: + +### Install +```bash +sudo xdg-mime install --novendor image-jxl.xml +``` + +Then run: +``` +update-mime --local +``` + + +### Uninstall +```bash +sudo xdg-mime uninstall image-jxl.xml +``` + diff --git a/third-party/libjxl/libjxl/plugins/mime/image-jxl.xml b/third-party/libjxl/libjxl/plugins/mime/image-jxl.xml new file mode 100644 index 0000000000..cab9018c7d --- /dev/null +++ b/third-party/libjxl/libjxl/plugins/mime/image-jxl.xml @@ -0,0 +1,13 @@ + + + + JPEG XL image + image JPEG XL + JPEG XL afbeelding + + + + + + + diff --git a/third-party/libjxl/libjxl/third_party/CMakeLists.txt b/third-party/libjxl/libjxl/third_party/CMakeLists.txt new file mode 100644 index 0000000000..d22441f668 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/CMakeLists.txt @@ -0,0 +1,175 @@ +# Copyright (c) the JPEG XL Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if((SANITIZER STREQUAL "asan") OR (SANITIZER STREQUAL "msan")) + set(BUILD_TESTING OFF) +endif() + +# Highway +set(HWY_SYSTEM_GTEST ON CACHE INTERNAL "") +set(HWY_FORCE_STATIC_LIBS ON CACHE INTERNAL "") +set(HWY_ENABLE_CONTRIB OFF CACHE INTERNAL "") +set(HWY_ENABLE_EXAMPLES OFF CACHE INTERNAL "") +set(HWY_ENABLE_TESTS OFF CACHE INTERNAL "") +if((SANITIZER STREQUAL "asan") OR (SANITIZER STREQUAL "msan")) + set(HWY_ENABLE_INSTALL OFF CACHE INTERNAL "") +endif() +if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/highway/CMakeLists.txt" AND + NOT JPEGXL_FORCE_SYSTEM_HWY) + add_subdirectory(highway) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/highway/LICENSE" + ${PROJECT_BINARY_DIR}/LICENSE.highway COPYONLY) +else() + find_package(HWY 1.0.6) + if (NOT HWY_FOUND) + message(FATAL_ERROR + "Highway library (hwy) not found. Install libhwy-dev or download it " + "to third_party/highway from https://github.com/google/highway . " + "Highway is required to build JPEG XL. You can run " + "${PROJECT_SOURCE_DIR}/deps.sh to download this dependency.") + endif() + if(JPEGXL_DEP_LICENSE_DIR) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/libhwy-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.highway COPYONLY) + endif() # JPEGXL_DEP_LICENSE_DIR +endif() + +# brotli +if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/brotli/c/include/brotli/decode.h" OR + JPEGXL_FORCE_SYSTEM_BROTLI) + find_package(Brotli) + if (NOT Brotli_FOUND) + message(FATAL_ERROR + "Brotli not found, install brotli-dev or download brotli source code to" + " third_party/brotli from https://github.com/google/brotli. You can use" + " ${PROJECT_SOURCE_DIR}/deps.sh to download this dependency.") + endif () + if(JPEGXL_DEP_LICENSE_DIR) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/libbrotli-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.brotli COPYONLY) + endif() # JPEGXL_DEP_LICENSE_DIR +else() + # Compile brotli from sources. + set(BROTLI_DISABLE_TESTS ON CACHE STRING "Disable Brotli tests") + # Override default "no-install" policy. + if((NOT SANITIZER STREQUAL "asan") AND (NOT SANITIZER STREQUAL "msan")) + set(BROTLI_BUNDLED_MODE OFF CACHE INTERNAL "") + endif() + add_subdirectory(brotli) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/brotli/LICENSE" + ${PROJECT_BINARY_DIR}/LICENSE.brotli COPYONLY) + if(APPLE) + if(NOT DEFINED CMAKE_MACOSX_RPATH) + # Use @rpath in install_name when CMAKE_MACOSX_RPATH is not set. + set_property(TARGET brotlienc PROPERTY MACOSX_RPATH TRUE) + set_property(TARGET brotlidec PROPERTY MACOSX_RPATH TRUE) + set_property(TARGET brotlicommon PROPERTY MACOSX_RPATH TRUE) + endif() + if((NOT DEFINED CMAKE_MACOSX_RPATH) OR CMAKE_MACOSX_RPATH) + # Set library search path when @rpath is used. + if(NOT DEFINED CMAKE_INSTALL_RPATH) + set_property(TARGET brotlienc PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set_property(TARGET brotlidec PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set_property(TARGET brotlicommon PROPERTY INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + endif() + else() + # Set conventional install_name when @rpath is not used. + if(NOT DEFINED CMAKE_INSTALL_NAME_DIR) + set_property(TARGET brotlienc PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") + set_property(TARGET brotlidec PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") + set_property(TARGET brotlicommon PROPERTY INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") + endif() + endif() + endif() # APPLE +endif() + +# *cms +if (JPEGXL_ENABLE_SKCMS) + if( NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/skcms/skcms.h" ) + message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the " + "build dependencies.") + endif() + include(skcms.cmake) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/skcms/LICENSE" + ${PROJECT_BINARY_DIR}/LICENSE.skcms COPYONLY) +endif () +if (JPEGXL_ENABLE_VIEWERS OR NOT JPEGXL_ENABLE_SKCMS OR JPEGXL_ENABLE_PLUGINS) + if( NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/lcms/.git" OR JPEGXL_FORCE_SYSTEM_LCMS2 ) + find_package(LCMS2 2.13) + if ( NOT LCMS2_FOUND ) + message(FATAL_ERROR "Please install lcms2 or run git submodule update --init") + endif () + else() + include(lcms2.cmake) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lcms/COPYING" + ${PROJECT_BINARY_DIR}/LICENSE.lcms COPYONLY) + endif() +endif() + +# libpng +if (JPEGXL_BUNDLE_LIBPNG AND JPEGXL_EMSCRIPTEN) + if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/CMakeLists.txt") + message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the " + "build dependencies.") + endif() + file(COPY "${CMAKE_CURRENT_SOURCE_DIR}/libpng/scripts/pnglibconf.h.prebuilt" DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}/libpng") + file(RENAME "${CMAKE_CURRENT_SOURCE_DIR}/libpng/pnglibconf.h.prebuilt" "${CMAKE_CURRENT_SOURCE_DIR}/libpng/pnglibconf.h") + set(ZLIB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/zlib/") + set(ZLIB_LIBRARY "") + set(PNG_FOUND YES PARENT_SCOPE) + set(PNG_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/" PARENT_SCOPE) + set(PNG_LIBRARIES "" PARENT_SCOPE) +elseif (JPEGXL_BUNDLE_LIBPNG) + if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/CMakeLists.txt") + message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the " + "build dependencies.") + endif() + add_subdirectory(zlib) + set(PNG_STATIC ON CACHE BOOL "") + set(PNG_EXECUTABLES OFF CACHE BOOL "") + set(PNG_BUILD_ZLIB ON CACHE BOOL "") + set(PNG_TESTS OFF CACHE BOOL "") + set(SKIP_INSTALL_ALL ON CACHE BOOL "") + set(ZLIB_INCLUDE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/zlib/") + set(ZLIB_LIBRARY zlibstatic) + add_subdirectory(libpng EXCLUDE_FROM_ALL) + set(PNG_FOUND YES PARENT_SCOPE) + set(PNG_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libpng/" PARENT_SCOPE) + set(PNG_LIBRARIES png_static PARENT_SCOPE) + set_property(TARGET png_static PROPERTY POSITION_INDEPENDENT_CODE ON) + set_property(TARGET zlibstatic PROPERTY POSITION_INDEPENDENT_CODE ON) + if(JPEGXL_DEP_LICENSE_DIR) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libpng/LICENSE" + ${PROJECT_BINARY_DIR}/LICENSE.libpng COPYONLY) + endif() +else() + find_package(PNG) + if(PNG_FOUND AND JPEGXL_DEP_LICENSE_DIR) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/zlib1g-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.zlib COPYONLY) + configure_file("${JPEGXL_DEP_LICENSE_DIR}/libpng-dev/copyright" + ${PROJECT_BINARY_DIR}/LICENSE.libpng COPYONLY) + endif() # JPEGXL_DEP_LICENSE_DIR +endif() + +# sjpeg +if (JPEGXL_ENABLE_SJPEG) + if (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/sjpeg/CMakeLists.txt") + message(FATAL_ERROR "Please run ${PROJECT_SOURCE_DIR}/deps.sh to fetch the " + "build dependencies.") + endif() + include(sjpeg.cmake) + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/sjpeg/COPYING" + ${PROJECT_BINARY_DIR}/LICENSE.sjpeg COPYONLY) +endif () diff --git a/third-party/libjxl/libjxl/third_party/brotli/BUILD b/third-party/libjxl/libjxl/third_party/brotli/BUILD new file mode 100644 index 0000000000..07a6793054 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/BUILD @@ -0,0 +1,147 @@ +# Description: +# Brotli is a generic-purpose lossless compression algorithm. + +load(":compiler_config_setting.bzl", "create_msvc_config") + +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # MIT + +exports_files(["LICENSE"]) + +config_setting( + name = "darwin", + values = {"cpu": "darwin"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "darwin_x86_64", + values = {"cpu": "darwin_x86_64"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "windows_msvc", + values = {"cpu": "x64_windows_msvc"}, + visibility = ["//visibility:public"], +) + +config_setting( + name = "windows_msys", + values = {"cpu": "x64_windows_msys"}, + visibility = ["//visibility:public"], +) + +create_msvc_config() + +STRICT_C_OPTIONS = select({ + ":msvc": [], + "//conditions:default": [ + "--pedantic-errors", + "-Wall", + "-Wconversion", + "-Werror", + "-Wextra", + "-Wlong-long", + "-Wmissing-declarations", + "-Wmissing-prototypes", + "-Wno-strict-aliasing", + "-Wshadow", + "-Wsign-compare", + ], +}) + +filegroup( + name = "public_headers", + srcs = glob(["c/include/brotli/*.h"]), +) + +filegroup( + name = "common_headers", + srcs = glob(["c/common/*.h"]), +) + +filegroup( + name = "common_sources", + srcs = glob(["c/common/*.c"]), +) + +filegroup( + name = "dec_headers", + srcs = glob(["c/dec/*.h"]), +) + +filegroup( + name = "dec_sources", + srcs = glob(["c/dec/*.c"]), +) + +filegroup( + name = "enc_headers", + srcs = glob(["c/enc/*.h"]), +) + +filegroup( + name = "enc_sources", + srcs = glob(["c/enc/*.c"]), +) + +cc_library( + name = "brotli_inc", + hdrs = [":public_headers"], + copts = STRICT_C_OPTIONS, + strip_include_prefix = "c/include", +) + +cc_library( + name = "brotlicommon", + srcs = [":common_sources"], + hdrs = [":common_headers"], + copts = STRICT_C_OPTIONS, + deps = [":brotli_inc"], +) + +cc_library( + name = "brotlidec", + srcs = [":dec_sources"], + hdrs = [":dec_headers"], + copts = STRICT_C_OPTIONS, + deps = [":brotlicommon"], +) + +cc_library( + name = "brotlienc", + srcs = [":enc_sources"], + hdrs = [":enc_headers"], + copts = STRICT_C_OPTIONS, + linkopts = select({ + ":msvc": [], + "//conditions:default": ["-lm"], + }), + deps = [":brotlicommon"], +) + +cc_binary( + name = "brotli", + srcs = ["c/tools/brotli.c"], + copts = STRICT_C_OPTIONS, + linkstatic = 1, + deps = [ + ":brotlidec", + ":brotlienc", + ], +) + +filegroup( + name = "dictionary", + srcs = ["c/common/dictionary.bin"], +) diff --git a/third-party/libjxl/libjxl/third_party/brotli/CMakeLists.txt b/third-party/libjxl/libjxl/third_party/brotli/CMakeLists.txt new file mode 100644 index 0000000000..9e4cf40830 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/CMakeLists.txt @@ -0,0 +1,401 @@ +# Available CMake versions: +# - Ubuntu 18.04 LTS (deprecated on GitHub Actions) : 3.10.4 +# - Solaris 11.4 SRU 15 : 3.15 +cmake_minimum_required(VERSION 3.10.4) + +# Since this project's version is loaded from other files, this policy +# will help suppress the warning generated by cmake. +# This policy is set because we can't provide "VERSION" in "project" command. +# Use `cmake --help-policy CMP0048` for more information. +cmake_policy(SET CMP0048 NEW) +project(brotli C) + +option(BUILD_SHARED_LIBS "Build shared libraries" ON) + +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "Setting build type to Release as none was specified.") + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build." FORCE) +else() + message(STATUS "Build type is '${CMAKE_BUILD_TYPE}'") +endif() + +include(CheckCSourceCompiles) +check_c_source_compiles( + "#if defined(__EMSCRIPTEN__) + int main() {return 0;} + #endif" + BROTLI_EMSCRIPTEN +) +if (BROTLI_EMSCRIPTEN) + message("-- Compiler is EMSCRIPTEN") +else() + message("-- Compiler is not EMSCRIPTEN") +endif() + +# If Brotli is being bundled in another project, we don't want to +# install anything. However, we want to let people override this, so +# we'll use the BROTLI_BUNDLED_MODE variable to let them do that; just +# set it to OFF in your project before you add_subdirectory(brotli). +get_directory_property(BROTLI_PARENT_DIRECTORY PARENT_DIRECTORY) +if(NOT DEFINED BROTLI_BUNDLED_MODE) + # Bundled mode hasn't been set one way or the other, set the default + # depending on whether or not we are the top-level project. + if(BROTLI_PARENT_DIRECTORY) + set(BROTLI_BUNDLED_MODE ON) + else() + set(BROTLI_BUNDLED_MODE OFF) + endif() +endif() +mark_as_advanced(BROTLI_BUNDLED_MODE) + +include(GNUInstallDirs) + +# Parse version information from common/version.h. Normally we would +# define these values here and write them out to configuration file(s) +# (i.e., config.h), but in this case we parse them from +# common/version.h to be less intrusive. +function(hex_to_dec HEXADECIMAL DECIMAL) + string(TOUPPER "${HEXADECIMAL}" _tail) + set(_decimal 0) + string(LENGTH "${_tail}" _tail_length) + while (_tail_length GREATER 0) + math(EXPR _decimal "${_decimal} * 16") + string(SUBSTRING "${_tail}" 0 1 _digit) + string(SUBSTRING "${_tail}" 1 -1 _tail) + if (_digit STREQUAL "A") + math(EXPR _decimal "${_decimal} + 10") + elseif (_digit STREQUAL "B") + math(EXPR _decimal "${_decimal} + 11") + elseif (_digit STREQUAL "C") + math(EXPR _decimal "${_decimal} + 12") + elseif (_digit STREQUAL "D") + math(EXPR _decimal "${_decimal} + 13") + elseif (_digit STREQUAL "E") + math(EXPR _decimal "${_decimal} + 14") + elseif (_digit STREQUAL "F") + math(EXPR _decimal "${_decimal} + 15") + else() + math(EXPR _decimal "${_decimal} + ${_digit}") + endif() + string(LENGTH "${_tail}" _tail_length) + endwhile() + set(${DECIMAL} ${_decimal} PARENT_SCOPE) +endfunction(hex_to_dec) + +# Version information +file(STRINGS "c/common/version.h" _brotli_version_line REGEX "^#define BROTLI_VERSION (0x[0-9a-fA-F]+)$") +string(REGEX REPLACE "^#define BROTLI_VERSION 0x([0-9a-fA-F]+)$" "\\1" _brotli_version_hex "${_brotli_version_line}") +hex_to_dec("${_brotli_version_hex}" _brotli_version) +math(EXPR BROTLI_VERSION_MAJOR "${_brotli_version} >> 24") +math(EXPR BROTLI_VERSION_MINOR "(${_brotli_version} >> 12) & 4095") +math(EXPR BROTLI_VERSION_PATCH "${_brotli_version} & 4095") +set(BROTLI_VERSION "${BROTLI_VERSION_MAJOR}.${BROTLI_VERSION_MINOR}.${BROTLI_VERSION_PATCH}") +mark_as_advanced(BROTLI_VERSION BROTLI_VERSION_MAJOR BROTLI_VERSION_MINOR BROTLI_VERSION_PATCH) + +# ABI Version information +file(STRINGS "c/common/version.h" _brotli_abi_info_line REGEX "^#define BROTLI_ABI_VERSION (0x[0-9a-fA-F]+)$") +string(REGEX REPLACE "^#define BROTLI_ABI_VERSION 0x([0-9a-fA-F]+)$" "\\1" _brotli_abi_info_hex "${_brotli_abi_info_line}") +hex_to_dec("${_brotli_abi_info_hex}" _brotli_abi_info) +math(EXPR BROTLI_ABI_CURRENT "${_brotli_abi_info} >> 24") +math(EXPR BROTLI_ABI_REVISION "(${_brotli_abi_info} >> 12) & 4095") +math(EXPR BROTLI_ABI_AGE "${_brotli_abi_info} & 4095") +math(EXPR BROTLI_ABI_COMPATIBILITY "${BROTLI_ABI_CURRENT} - ${BROTLI_ABI_AGE}") +mark_as_advanced(BROTLI_ABI_CURRENT BROTLI_ABI_REVISION BROTLI_ABI_AGE BROTLI_ABI_COMPATIBILITY) + +if (ENABLE_SANITIZER) + set(CMAKE_C_FLAGS " ${CMAKE_C_FLAGS} -fsanitize=${ENABLE_SANITIZER}") + set(CMAKE_CXX_FLAGS " ${CMAKE_CXX_FLAGS} -fsanitize=${ENABLE_SANITIZER}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=${ENABLE_SANITIZER}") +endif () + +include(CheckFunctionExists) +set(LIBM_LIBRARY) +CHECK_FUNCTION_EXISTS(log2 LOG2_RES) +if(NOT LOG2_RES) + set(orig_req_libs "${CMAKE_REQUIRED_LIBRARIES}") + set(CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES};m") + CHECK_FUNCTION_EXISTS(log2 LOG2_LIBM_RES) + if(LOG2_LIBM_RES) + set(LIBM_LIBRARY "m") + add_definitions(-DBROTLI_HAVE_LOG2=1) + else() + add_definitions(-DBROTLI_HAVE_LOG2=0) + endif() + + set(CMAKE_REQUIRED_LIBRARIES "${orig_req_libs}") + unset(LOG2_LIBM_RES) + unset(orig_req_libs) +else() + add_definitions(-DBROTLI_HAVE_LOG2=1) +endif() +unset(LOG2_RES) + +set(BROTLI_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/c/include") +mark_as_advanced(BROTLI_INCLUDE_DIRS) + +set(BROTLI_LIBRARIES_CORE brotlienc brotlidec brotlicommon) +set(BROTLI_LIBRARIES ${BROTLI_LIBRARIES_CORE} ${LIBM_LIBRARY}) +mark_as_advanced(BROTLI_LIBRARIES) + +if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") + add_definitions(-DOS_LINUX) +elseif(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + add_definitions(-DOS_FREEBSD) +elseif(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + add_definitions(-DOS_MACOSX) + set(CMAKE_MACOS_RPATH TRUE) + set(CMAKE_INSTALL_NAME_DIR "${CMAKE_INSTALL_PREFIX}/lib") +endif() + +function(transform_sources_list INPUT_FILE OUTPUT_FILE) + file(READ ${INPUT_FILE} TEXT) + string(REGEX REPLACE "\\\\\n" "~continuation~" TEXT ${TEXT}) + string(REGEX REPLACE "([a-zA-Z_][a-zA-Z0-9_]*)[\t ]*=[\t ]*([^\n]*)" "SET(\\1 \\2)" TEXT ${TEXT}) + string(REPLACE "~continuation~" "\n" TEXT ${TEXT}) + file(WRITE ${OUTPUT_FILE} ${TEXT}) +endfunction() + +transform_sources_list("scripts/sources.lst" "${CMAKE_CURRENT_BINARY_DIR}/sources.lst.cmake") +include("${CMAKE_CURRENT_BINARY_DIR}/sources.lst.cmake") + +if(BROTLI_EMSCRIPTEN) + set(BUILD_SHARED_LIBS OFF) +endif() + +add_library(brotlicommon ${BROTLI_COMMON_C}) +add_library(brotlidec ${BROTLI_DEC_C}) +add_library(brotlienc ${BROTLI_ENC_C}) + +# Older CMake versions does not understand INCLUDE_DIRECTORIES property. +include_directories(${BROTLI_INCLUDE_DIRS}) + +if(BUILD_SHARED_LIBS) + foreach(lib ${BROTLI_LIBRARIES_CORE}) + target_compile_definitions(${lib} PUBLIC "BROTLI_SHARED_COMPILATION" ) + string(TOUPPER "${lib}" LIB) + set_target_properties (${lib} PROPERTIES DEFINE_SYMBOL "${LIB}_SHARED_COMPILATION") + endforeach() +endif() + +foreach(lib ${BROTLI_LIBRARIES_CORE}) + target_link_libraries(${lib} ${LIBM_LIBRARY}) + set_property(TARGET ${lib} APPEND PROPERTY INCLUDE_DIRECTORIES ${BROTLI_INCLUDE_DIRS}) + set_target_properties(${lib} PROPERTIES + VERSION "${BROTLI_ABI_COMPATIBILITY}.${BROTLI_ABI_AGE}.${BROTLI_ABI_REVISION}" + SOVERSION "${BROTLI_ABI_COMPATIBILITY}") + if(NOT BROTLI_EMSCRIPTEN) + set_target_properties(${lib} PROPERTIES POSITION_INDEPENDENT_CODE TRUE) + endif() + set_property(TARGET ${lib} APPEND PROPERTY INTERFACE_INCLUDE_DIRECTORIES "$") +endforeach() + +if(NOT BROTLI_EMSCRIPTEN) +target_link_libraries(brotlidec brotlicommon) +target_link_libraries(brotlienc brotlicommon) +endif() + +# For projects stuck on older versions of CMake, this will set the +# BROTLI_INCLUDE_DIRS and BROTLI_LIBRARIES variables so they still +# have a relatively easy way to use Brotli: +# +# include_directories(${BROTLI_INCLUDE_DIRS}) +# target_link_libraries(foo ${BROTLI_LIBRARIES}) +if(BROTLI_PARENT_DIRECTORY) + set(BROTLI_INCLUDE_DIRS "${BROTLI_INCLUDE_DIRS}" PARENT_SCOPE) + set(BROTLI_LIBRARIES "${BROTLI_LIBRARIES}" PARENT_SCOPE) +endif() + +# Build the brotli executable +add_executable(brotli ${BROTLI_CLI_C}) +target_link_libraries(brotli ${BROTLI_LIBRARIES}) + +# Installation +if(NOT BROTLI_BUNDLED_MODE) + install( + TARGETS brotli + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + ) + + install( + TARGETS ${BROTLI_LIBRARIES_CORE} + ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" + ) + + install( + DIRECTORY ${BROTLI_INCLUDE_DIRS}/brotli + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" + ) +endif() # BROTLI_BUNDLED_MODE + +# Tests + +# If we're targeting Windows but not running on Windows, we need Wine +# to run the tests... +if(NOT BROTLI_DISABLE_TESTS) + if(WIN32 AND NOT CMAKE_HOST_WIN32) + find_program(BROTLI_WRAPPER NAMES wine) + + if(NOT BROTLI_WRAPPER) + message(STATUS "wine not found, disabling tests") + set(BROTLI_DISABLE_TESTS TRUE) + endif() + endif() +endif() + +# If our compiler is a cross-compiler that we know about (arm/aarch64), +# then we need to use qemu to execute the tests. +if(NOT BROTLI_DISABLE_TESTS) + if ("${CMAKE_C_COMPILER}" MATCHES "^.*/arm-linux-gnueabihf-.*$") + message(STATUS "Detected arm-linux-gnueabihf cross-compilation") + set(BROTLI_WRAPPER "qemu-arm") + set(BROTLI_WRAPPER_LD_PREFIX "/usr/arm-linux-gnueabihf") + endif() + + if ("${CMAKE_C_COMPILER}" MATCHES "^.*/arm-linux-gnueabi-.*$") + message(STATUS "Detected arm-linux-gnueabi cross-compilation") + set(BROTLI_WRAPPER "qemu-arm") + set(BROTLI_WRAPPER_LD_PREFIX "/usr/arm-linux-gnueabi") + endif() + + if ("${CMAKE_C_COMPILER}" MATCHES "^.*/aarch64-linux-gnu-.*$") + message(STATUS "Detected aarch64-linux-gnu cross-compilation") + set(BROTLI_WRAPPER "qemu-aarch64") + set(BROTLI_WRAPPER_LD_PREFIX "/usr/aarch64-linux-gnu") + endif() +endif() + +if(NOT BROTLI_DISABLE_TESTS) + include(CTest) + enable_testing() + + set(ROUNDTRIP_INPUTS + tests/testdata/alice29.txt + tests/testdata/asyoulik.txt + tests/testdata/lcet10.txt + tests/testdata/plrabn12.txt + c/enc/encode.c + c/common/dictionary.h + c/dec/decode.c) + + foreach(INPUT ${ROUNDTRIP_INPUTS}) + get_filename_component(OUTPUT_NAME "${INPUT}" NAME) + + set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}") + set(INPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${INPUT}") + + if (EXISTS "${INPUT_FILE}") + foreach(quality 1 6 9 11) + add_test(NAME "${BROTLI_TEST_PREFIX}roundtrip/${INPUT}/${quality}" + COMMAND "${CMAKE_COMMAND}" + -DBROTLI_WRAPPER=${BROTLI_WRAPPER} + -DBROTLI_WRAPPER_LD_PREFIX=${BROTLI_WRAPPER_LD_PREFIX} + -DBROTLI_CLI=$ + -DQUALITY=${quality} + -DINPUT=${INPUT_FILE} + -DOUTPUT=${OUTPUT_FILE}.${quality} + -P ${CMAKE_CURRENT_SOURCE_DIR}/tests/run-roundtrip-test.cmake) + endforeach() + else() + message(WARNING "Test file ${INPUT} does not exist.") + endif() + endforeach() + + file(GLOB_RECURSE + COMPATIBILITY_INPUTS + RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} + tests/testdata/*.compressed*) + + foreach(INPUT ${COMPATIBILITY_INPUTS}) + add_test(NAME "${BROTLI_TEST_PREFIX}compatibility/${INPUT}" + COMMAND "${CMAKE_COMMAND}" + -DBROTLI_WRAPPER=${BROTLI_WRAPPER} + -DBROTLI_WRAPPER_LD_PREFIX=${BROTLI_WRAPPER_LD_PREFIX} + -DBROTLI_CLI=$ + -DINPUT=${CMAKE_CURRENT_SOURCE_DIR}/${INPUT} + -P ${CMAKE_CURRENT_SOURCE_DIR}/tests/run-compatibility-test.cmake) + endforeach() +endif() + +# Generate a pkg-config files + +function(generate_pkg_config_path outvar path) + string(LENGTH "${path}" path_length) + + set(path_args ${ARGV}) + list(REMOVE_AT path_args 0 1) + list(LENGTH path_args path_args_remaining) + + set("${outvar}" "${path}") + + while(path_args_remaining GREATER 1) + list(GET path_args 0 name) + list(GET path_args 1 value) + + get_filename_component(value_full "${value}" ABSOLUTE) + string(LENGTH "${value}" value_length) + + if(path_length EQUAL value_length AND path STREQUAL value) + set("${outvar}" "\${${name}}") + break() + elseif(path_length GREATER value_length) + # We might be in a subdirectory of the value, but we have to be + # careful about a prefix matching but not being a subdirectory + # (for example, /usr/lib64 is not a subdirectory of /usr/lib). + # We'll do this by making sure the next character is a directory + # separator. + string(SUBSTRING "${path}" ${value_length} 1 sep) + if(sep STREQUAL "/") + string(SUBSTRING "${path}" 0 ${value_length} s) + if(s STREQUAL value) + string(SUBSTRING "${path}" "${value_length}" -1 suffix) + set("${outvar}" "\${${name}}${suffix}") + break() + endif() + endif() + endif() + + list(REMOVE_AT path_args 0 1) + list(LENGTH path_args path_args_remaining) + endwhile() + + set("${outvar}" "${${outvar}}" PARENT_SCOPE) +endfunction(generate_pkg_config_path) + +function(transform_pc_file INPUT_FILE OUTPUT_FILE VERSION) + file(READ ${INPUT_FILE} TEXT) + + set(PREFIX "${CMAKE_INSTALL_PREFIX}") + string(REGEX REPLACE "@prefix@" "${PREFIX}" TEXT ${TEXT}) + string(REGEX REPLACE "@exec_prefix@" "${PREFIX}" TEXT ${TEXT}) + + generate_pkg_config_path(LIBDIR "${CMAKE_INSTALL_FULL_LIBDIR}" prefix "${PREFIX}") + string(REGEX REPLACE "@libdir@" "${LIBDIR}" TEXT ${TEXT}) + + generate_pkg_config_path(INCLUDEDIR "${CMAKE_INSTALL_FULL_INCLUDEDIR}" prefix "${PREFIX}") + string(REGEX REPLACE "@includedir@" "${INCLUDEDIR}" TEXT ${TEXT}) + + string(REGEX REPLACE "@PACKAGE_VERSION@" "${VERSION}" TEXT ${TEXT}) + + file(WRITE ${OUTPUT_FILE} ${TEXT}) +endfunction() + +transform_pc_file("scripts/libbrotlicommon.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libbrotlicommon.pc" "${BROTLI_VERSION}") + +transform_pc_file("scripts/libbrotlidec.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libbrotlidec.pc" "${BROTLI_VERSION}") + +transform_pc_file("scripts/libbrotlienc.pc.in" "${CMAKE_CURRENT_BINARY_DIR}/libbrotlienc.pc" "${BROTLI_VERSION}") + +if(NOT BROTLI_BUNDLED_MODE) + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libbrotlicommon.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libbrotlidec.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") + install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libbrotlienc.pc" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig") +endif() # BROTLI_BUNDLED_MODE + +if (ENABLE_COVERAGE STREQUAL "yes") + SETUP_TARGET_FOR_COVERAGE(coverage test coverage) +endif () diff --git a/third-party/libjxl/libjxl/third_party/brotli/CONTRIBUTING.md b/third-party/libjxl/libjxl/third_party/brotli/CONTRIBUTING.md new file mode 100644 index 0000000000..a00e37d17f --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/CONTRIBUTING.md @@ -0,0 +1,27 @@ +Want to contribute? Great! First, read this page (including the small print at +the end). + +### Before you contribute +Before we can use your code, you must sign the +[Google Individual Contributor License Agreement] +(https://cla.developers.google.com/about/google-individual) +(CLA), which you can do online. The CLA is necessary mainly because you own the +copyright to your changes, even after your contribution becomes part of our +codebase, so we need your permission to use and distribute your code. We also +need to be sure of various other things—for instance that you'll tell us if you +know that your code infringes on other people's patents. You don't have to sign +the CLA until after you've submitted your code for review and a member has +approved it, but you must do it before we can put your code into our codebase. +Before you start working on a larger contribution, you should get in touch with +us first through the issue tracker with your idea so that we can help out and +possibly guide you. Coordinating up front makes it much easier to avoid +frustration later on. + +### Code reviews +All submissions, including submissions by project members, require review. We +use Github pull requests for this purpose. + +### The small print +Contributions made by corporations are covered by a different agreement than +the one above, the [Software Grant and Corporate Contributor License Agreement] +(https://cla.developers.google.com/about/google-corporate). diff --git a/third-party/libjxl/libjxl/third_party/brotli/LICENSE b/third-party/libjxl/libjxl/third_party/brotli/LICENSE new file mode 100644 index 0000000000..33b7cdd2db --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/third-party/libjxl/libjxl/third_party/brotli/MANIFEST.in b/third-party/libjxl/libjxl/third_party/brotli/MANIFEST.in new file mode 100644 index 0000000000..ff8d600656 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/MANIFEST.in @@ -0,0 +1,19 @@ +include CONTRIBUTING.md +include c/common/*.c +include c/common/*.h +include c/dec/*.c +include c/dec/*.h +include c/enc/*.c +include c/enc/*.h +include c/include/brotli/*.h +include LICENSE +include MANIFEST.in +include python/_brotli.cc +include python/bro.py +include python/brotli.py +include python/README.md +include python/tests/* +include README.md +include setup.py +include tests/testdata/* +include c/tools/brotli.c diff --git a/third-party/libjxl/libjxl/third_party/brotli/Makefile b/third-party/libjxl/libjxl/third_party/brotli/Makefile new file mode 100644 index 0000000000..4890940907 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/Makefile @@ -0,0 +1,55 @@ +OS := $(shell uname) +LIBSOURCES = $(wildcard c/common/*.c) $(wildcard c/dec/*.c) \ + $(wildcard c/enc/*.c) +SOURCES = $(LIBSOURCES) c/tools/brotli.c +BINDIR = bin +OBJDIR = $(BINDIR)/obj +LIBOBJECTS = $(addprefix $(OBJDIR)/, $(LIBSOURCES:.c=.o)) +OBJECTS = $(addprefix $(OBJDIR)/, $(SOURCES:.c=.o)) +LIB_A = libbrotli.a +EXECUTABLE = brotli +DIRS = $(OBJDIR)/c/common $(OBJDIR)/c/dec $(OBJDIR)/c/enc \ + $(OBJDIR)/c/tools $(BINDIR)/tmp +CFLAGS += -O2 +ifeq ($(os), Darwin) + CPPFLAGS += -DOS_MACOSX +endif + +ifneq ($(strip $(CROSS_COMPILE)), ) + CC=$(CROSS_COMPILE)-gcc + ARCH=$(firstword $(subst -, ,$(CROSS_COMPILE))) + BROTLI_WRAPPER="qemu-$(ARCH) -L /usr/$(CROSS_COMPILE)" +endif + +# The arm-linux-gnueabi compiler defaults to Armv5. Since we only support Armv7 +# and beyond, we need to select Armv7 explicitly with march. +ifeq ($(ARCH), arm) + CFLAGS += -march=armv7-a -mfloat-abi=hard -mfpu=neon +endif + +all: test + @: + +.PHONY: all clean test + +$(DIRS): + mkdir -p $@ + +$(EXECUTABLE): $(OBJECTS) + $(CC) $(LDFLAGS) $(OBJECTS) -lm -o $(BINDIR)/$(EXECUTABLE) + +lib: $(LIBOBJECTS) + rm -f $(LIB_A) + ar -crs $(LIB_A) $(LIBOBJECTS) + +test: $(EXECUTABLE) + tests/compatibility_test.sh $(BROTLI_WRAPPER) + tests/roundtrip_test.sh $(BROTLI_WRAPPER) + +clean: + rm -rf $(BINDIR) $(LIB_A) + +.SECONDEXPANSION: +$(OBJECTS): $$(patsubst %.o,%.c,$$(patsubst $$(OBJDIR)/%,%,$$@)) | $(DIRS) + $(CC) $(CFLAGS) $(CPPFLAGS) -Ic/include \ + -c $(patsubst %.o,%.c,$(patsubst $(OBJDIR)/%,%,$@)) -o $@ diff --git a/third-party/libjxl/libjxl/third_party/brotli/Makefile.am b/third-party/libjxl/libjxl/third_party/brotli/Makefile.am new file mode 100644 index 0000000000..ace7a8506e --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/Makefile.am @@ -0,0 +1,38 @@ +AUTOMAKE_OPTIONS = foreign nostdinc subdir-objects + +ACLOCAL_AMFLAGS = -I m4 + +# Actual ABI version is substituted by bootstrap +LIBBROTLI_VERSION_INFO = -version-info 0:0:0 + +bin_PROGRAMS = brotli +lib_LTLIBRARIES = libbrotlicommon.la libbrotlidec.la libbrotlienc.la + +include scripts/sources.lst + +brotliincludedir = $(includedir)/brotli +brotliinclude_HEADERS = $(BROTLI_INCLUDE) + +AM_CFLAGS = -I$(top_srcdir)/c/include + +brotli_SOURCES = $(BROTLI_CLI_C) +brotli_LDADD = libbrotlidec.la libbrotlienc.la libbrotlicommon.la -lm +#brotli_LDFLAGS = -static + +libbrotlicommon_la_SOURCES = $(BROTLI_COMMON_C) $(BROTLI_COMMON_H) +libbrotlicommon_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS) +libbrotlidec_la_SOURCES = $(BROTLI_DEC_C) $(BROTLI_DEC_H) +libbrotlidec_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS) +libbrotlidec_la_LIBADD = libbrotlicommon.la -lm +libbrotlienc_la_SOURCES = $(BROTLI_ENC_C) $(BROTLI_ENC_H) +libbrotlienc_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS) +libbrotlienc_la_LIBADD = libbrotlicommon.la -lm + +pkgconfigdir = $(libdir)/pkgconfig +pkgconfig_DATA = \ + scripts/libbrotlicommon.pc \ + scripts/libbrotlidec.pc \ + scripts/libbrotlienc.pc +pkgincludedir= $(brotliincludedir) + +dist_doc_DATA = README diff --git a/third-party/libjxl/libjxl/third_party/brotli/README b/third-party/libjxl/libjxl/third_party/brotli/README new file mode 100644 index 0000000000..dea7291306 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/README @@ -0,0 +1,15 @@ +BROTLI DATA COMPRESSION LIBRARY + +Brotli is a generic-purpose lossless compression algorithm that compresses data +using a combination of a modern variant of the LZ77 algorithm, Huffman coding +and 2nd order context modeling, with a compression ratio comparable to the best +currently available general-purpose compression methods. It is similar in speed +with deflate but offers more dense compression. + +The specification of the Brotli Compressed Data Format is defined in RFC 7932 +https://tools.ietf.org/html/rfc7932 + +Brotli is open-sourced under the MIT License, see the LICENSE file. + +Brotli mailing list: +https://groups.google.com/forum/#!forum/brotli diff --git a/third-party/libjxl/libjxl/third_party/brotli/README.md b/third-party/libjxl/libjxl/third_party/brotli/README.md new file mode 100644 index 0000000000..0f905e3f2c --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/README.md @@ -0,0 +1,110 @@ +

Brotli

+ +# SECURITY NOTE + +Please consider updating brotli to version 1.0.9 (latest). + +Version 1.0.9 contains a fix to "integer overflow" problem. This happens when "one-shot" decoding API is used (or input chunk for streaming API is not limited), input size (chunk size) is larger than 2GiB, and input contains uncompressed blocks. After the overflow happens, `memcpy` is invoked with a gigantic `num` value, that will likely cause the crash. + +### Introduction + +Brotli is a generic-purpose lossless compression algorithm that compresses data +using a combination of a modern variant of the LZ77 algorithm, Huffman coding +and 2nd order context modeling, with a compression ratio comparable to the best +currently available general-purpose compression methods. It is similar in speed +with deflate but offers more dense compression. + +The specification of the Brotli Compressed Data Format is defined in [RFC 7932](https://tools.ietf.org/html/rfc7932). + +Brotli is open-sourced under the MIT License, see the LICENSE file. + +> **Please note:** brotli is a "stream" format; it does not contain +> meta-information, like checksums or uncompresssed data length. It is possible +> to modify "raw" ranges of the compressed stream and the decoder will not +> notice that. + +Brotli mailing list: +https://groups.google.com/forum/#!forum/brotli + +![GitHub Actions Build Status](https://github.com/google/brotli/actions/workflows/build_test.yml/badge.svg) +[![Fuzzing Status](https://oss-fuzz-build-logs.storage.googleapis.com/badges/brotli.svg)](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#brotli) + +### Build instructions + +#### Vcpkg + +You can download and install brotli using the [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager: + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + ./vcpkg install brotli + +The brotli port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. + +#### Autotools-style CMake + +[configure-cmake](https://github.com/nemequ/configure-cmake) is an +autotools-style configure script for CMake-based projects (not supported on Windows). + +The basic commands to build, test and install brotli are: + + $ mkdir out && cd out + $ ../configure-cmake + $ make + $ make test + $ make install + +By default, debug binaries are built. To generate "release" `Makefile` specify `--disable-debug` option to `configure-cmake`. + +#### Bazel + +See [Bazel](http://www.bazel.build/) + +#### CMake + +The basic commands to build and install brotli are: + + $ mkdir out && cd out + $ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./installed .. + $ cmake --build . --config Release --target install + +You can use other [CMake](https://cmake.org/) configuration. + +#### Premake5 + +See [Premake5](https://premake.github.io/) + +#### Python + +To install the latest release of the Python module, run the following: + + $ pip install brotli + +To install the tip-of-the-tree version, run: + + $ pip install --upgrade git+https://github.com/google/brotli + +See the [Python readme](python/README.md) for more details on installing +from source, development, and testing. + +### Benchmarks +* [Squash Compression Benchmark](https://quixdb.github.io/squash-benchmark/) / [Unstable Squash Compression Benchmark](https://quixdb.github.io/squash-benchmark/unstable/) +* [Large Text Compression Benchmark](http://mattmahoney.net/dc/text.html) +* [Lzturbo Benchmark](https://sites.google.com/site/powturbo/home/benchmark) + +### Related projects +> **Disclaimer:** Brotli authors take no responsibility for the third party projects mentioned in this section. + +Independent [decoder](https://github.com/madler/brotli) implementation by Mark Adler, based entirely on format specification. + +JavaScript port of brotli [decoder](https://github.com/devongovett/brotli.js). Could be used directly via `npm install brotli` + +Hand ported [decoder / encoder](https://github.com/dominikhlbg/BrotliHaxe) in haxe by Dominik Homberger. Output source code: JavaScript, PHP, Python, Java and C# + +7Zip [plugin](https://github.com/mcmilk/7-Zip-Zstd) + +Dart [native bindings](https://github.com/thosakwe/brotli) + +Dart compression framework with [fast FFI-based Brotli implementation](https://pub.dev/documentation/es_compression/latest/brotli/brotli-library.html) with ready-to-use prebuilt binaries for Win/Linux/Mac diff --git a/third-party/libjxl/libjxl/third_party/brotli/SECURITY.md b/third-party/libjxl/libjxl/third_party/brotli/SECURITY.md new file mode 100644 index 0000000000..c2a44c6666 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/SECURITY.md @@ -0,0 +1,6 @@ +### Reporting + +To report a security issue, please use [https://g.co/vulnz](https://g.co/vulnz). +We use g.co/vulnz for our intake, and do coordination and disclosure here on +GitHub (including using GitHub Security Advisory). The Google Security Team will +respond within 5 working days of your report on g.co/vulnz. diff --git a/third-party/libjxl/libjxl/third_party/brotli/WORKSPACE b/third-party/libjxl/libjxl/third_party/brotli/WORKSPACE new file mode 100644 index 0000000000..75f376828f --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/WORKSPACE @@ -0,0 +1,21 @@ +workspace(name = "org_brotli") + +local_repository( + name = "ignore_org_brotli_go", + path = "go", +) + +local_repository( + name = "ignore_org_brotli_java", + path = "java", +) + +local_repository( + name = "ignore_org_brotli_js", + path = "js", +) + +local_repository( + name = "ignore_org_brotli_research", + path = "research", +) diff --git a/third-party/libjxl/libjxl/third_party/brotli/bootstrap b/third-party/libjxl/libjxl/third_party/brotli/bootstrap new file mode 100644 index 0000000000..1da6d60f78 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/bootstrap @@ -0,0 +1,35 @@ +#!/bin/sh -e + +REQUIRED='is required, but not installed.' +bc -v >/dev/null 2>&1 || { echo >&2 "'bc' $REQUIRED"; exit 1; } +[ "x`echo hello | sed s/hello/world/ 2>/dev/null`" = "xworld" ] || { echo >&2 "'sed' $REQUIRED"; exit 1; } +autoreconf --version >/dev/null 2>&1 || { echo >&2 "'autoconf' $REQUIRED"; exit 1; } + +# Determine which flag sed uses for extended regular expressions. +# -E is POSIX. -r is for GNU sed older than 4.2. +echo hello | sed -E s/hello/world/ >/dev/null 2>&1 && SED_ERE=-E || SED_ERE=-r + +# If libtool is not installed -> "error: Libtool library used but 'LIBTOOL' is undefined" + +if [ ! -e "./m4" ]; then +mkdir m4 2>/dev/null +fi + +BROTLI_ABI_HEX=`sed -n 's/#define BROTLI_ABI_VERSION 0x//p' c/common/version.h` +BROTLI_ABI_INT=`echo "ibase=16;$BROTLI_ABI_HEX" | bc` +BROTLI_ABI_CURRENT=`echo "scale=0;$BROTLI_ABI_INT / 16777216" | bc` +BROTLI_ABI_REVISION=`echo "scale=0;$BROTLI_ABI_INT / 4096 % 4096" | bc` +BROTLI_ABI_AGE=`echo "scale=0;$BROTLI_ABI_INT % 4096" | bc` +BROTLI_ABI_INFO="$BROTLI_ABI_CURRENT:$BROTLI_ABI_REVISION:$BROTLI_ABI_AGE" + +BROTLI_VERSION_HEX=`sed -n 's/#define BROTLI_VERSION 0x//p' c/common/version.h` +BROTLI_VERSION_INT=`echo "ibase=16;$BROTLI_VERSION_HEX" | bc` +BROTLI_VERSION_MAJOR=`echo "scale=0;$BROTLI_VERSION_INT / 16777216" | bc` +BROTLI_VERSION_MINOR=`echo "scale=0;$BROTLI_VERSION_INT / 4096 % 4096" | bc` +BROTLI_VERSION_PATCH=`echo "scale=0;$BROTLI_VERSION_INT % 4096" | bc` +BROTLI_VERSION="$BROTLI_VERSION_MAJOR.$BROTLI_VERSION_MINOR.$BROTLI_VERSION_PATCH" + +sed -i.bak "$SED_ERE" "s/[0-9]+:[0-9]+:[0-9]+/$BROTLI_ABI_INFO/" Makefile.am +sed -i.bak "$SED_ERE" "s/\[[0-9]+\.[0-9]+\.[0-9]+\]/[$BROTLI_VERSION]/" configure.ac + +autoreconf --install --force --symlink || exit $? diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.c new file mode 100644 index 0000000000..89866b1505 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.c @@ -0,0 +1,15 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +#include "constants.h" + +const BrotliPrefixCodeRange + _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = { + {1, 2}, {5, 2}, {9, 2}, {13, 2}, {17, 3}, {25, 3}, + {33, 3}, {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4}, + {113, 5}, {145, 5}, {177, 5}, {209, 5}, {241, 6}, {305, 6}, + {369, 7}, {497, 8}, {753, 9}, {1265, 10}, {2289, 11}, {4337, 12}, + {8433, 13}, {16625, 24}}; diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.h new file mode 100644 index 0000000000..31e5bd376e --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/constants.h @@ -0,0 +1,201 @@ +/* Copyright 2016 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/** + * @file + * Common constants used in decoder and encoder API. + */ + +#ifndef BROTLI_COMMON_CONSTANTS_H_ +#define BROTLI_COMMON_CONSTANTS_H_ + +#include +#include + +#include "platform.h" + +/* Specification: 7.3. Encoding of the context map */ +#define BROTLI_CONTEXT_MAP_MAX_RLE 16 + +/* Specification: 2. Compressed representation overview */ +#define BROTLI_MAX_NUMBER_OF_BLOCK_TYPES 256 + +/* Specification: 3.3. Alphabet sizes: insert-and-copy length */ +#define BROTLI_NUM_LITERAL_SYMBOLS 256 +#define BROTLI_NUM_COMMAND_SYMBOLS 704 +#define BROTLI_NUM_BLOCK_LEN_SYMBOLS 26 +#define BROTLI_MAX_CONTEXT_MAP_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + \ + BROTLI_CONTEXT_MAP_MAX_RLE) +#define BROTLI_MAX_BLOCK_TYPE_SYMBOLS (BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 2) + +/* Specification: 3.5. Complex prefix codes */ +#define BROTLI_REPEAT_PREVIOUS_CODE_LENGTH 16 +#define BROTLI_REPEAT_ZERO_CODE_LENGTH 17 +#define BROTLI_CODE_LENGTH_CODES (BROTLI_REPEAT_ZERO_CODE_LENGTH + 1) +/* "code length of 8 is repeated" */ +#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8 + +/* "Large Window Brotli" */ + +/** + * The theoretical maximum number of distance bits specified for large window + * brotli, for 64-bit encoders and decoders. Even when in practice 32-bit + * encoders and decoders only support up to 30 max distance bits, the value is + * set to 62 because it affects the large window brotli file format. + * Specifically, it affects the encoding of simple huffman tree for distances, + * see Specification RFC 7932 chapter 3.4. + */ +#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U +#define BROTLI_LARGE_MIN_WBITS 10 +/** + * The maximum supported large brotli window bits by the encoder and decoder. + * Large window brotli allows up to 62 bits, however the current encoder and + * decoder, designed for 32-bit integers, only support up to 30 bits maximum. + */ +#define BROTLI_LARGE_MAX_WBITS 30 + +/* Specification: 4. Encoding of distances */ +#define BROTLI_NUM_DISTANCE_SHORT_CODES 16 +/** + * Maximal number of "postfix" bits. + * + * Number of "postfix" bits is stored as 2 bits in meta-block header. + */ +#define BROTLI_MAX_NPOSTFIX 3 +#define BROTLI_MAX_NDIRECT 120 +#define BROTLI_MAX_DISTANCE_BITS 24U +#define BROTLI_DISTANCE_ALPHABET_SIZE(NPOSTFIX, NDIRECT, MAXNBITS) ( \ + BROTLI_NUM_DISTANCE_SHORT_CODES + (NDIRECT) + \ + ((MAXNBITS) << ((NPOSTFIX) + 1))) +/* BROTLI_NUM_DISTANCE_SYMBOLS == 1128 */ +#define BROTLI_NUM_DISTANCE_SYMBOLS \ + BROTLI_DISTANCE_ALPHABET_SIZE( \ + BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS) + +/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932 + brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and + NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */ +#define BROTLI_MAX_DISTANCE 0x3FFFFFC + +/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit + allows safe distance calculation without overflows, given the distance + alphabet size is limited to corresponding size + (see kLargeWindowDistanceCodeLimits). */ +#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC + + +/* Specification: 4. Encoding of Literal Insertion Lengths and Copy Lengths */ +#define BROTLI_NUM_INS_COPY_CODES 24 + +/* 7.1. Context modes and context ID lookup for literals */ +/* "context IDs for literals are in the range of 0..63" */ +#define BROTLI_LITERAL_CONTEXT_BITS 6 + +/* 7.2. Context ID for distances */ +#define BROTLI_DISTANCE_CONTEXT_BITS 2 + +/* 9.1. Format of the Stream Header */ +/* Number of slack bytes for window size. Don't confuse + with BROTLI_NUM_DISTANCE_SHORT_CODES. */ +#define BROTLI_WINDOW_GAP 16 +#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP) + +typedef struct BrotliDistanceCodeLimit { + uint32_t max_alphabet_size; + uint32_t max_distance; +} BrotliDistanceCodeLimit; + +/* This function calculates maximal size of distance alphabet, such that the + distances greater than the given values can not be represented. + + This limits are designed to support fast and safe 32-bit decoders. + "32-bit" means that signed integer values up to ((1 << 31) - 1) could be + safely expressed. + + Brotli distance alphabet symbols do not represent consecutive distance + ranges. Each distance alphabet symbol (excluding direct distances and short + codes), represent interleaved (for NPOSTFIX > 0) range of distances. + A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved + range. Two consecutive groups require the same amount of "extra bits". + + It is important that distance alphabet represents complete "groups". + To avoid complex logic on encoder side about interleaved ranges + it was decided to restrict both sides to complete distance code "groups". + */ +BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit( + uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) { + BrotliDistanceCodeLimit result; + /* Marking this function as unused, because not all files + including "constants.h" use it -> compiler warns about that. */ + BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit); + if (max_distance <= ndirect) { + /* This case never happens / exists only for the sake of completeness. */ + result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES; + result.max_distance = max_distance; + return result; + } else { + /* The first prohibited value. */ + uint32_t forbidden_distance = max_distance + 1; + /* Subtract "directly" encoded region. */ + uint32_t offset = forbidden_distance - ndirect - 1; + uint32_t ndistbits = 0; + uint32_t tmp; + uint32_t half; + uint32_t group; + /* Postfix for the last dcode in the group. */ + uint32_t postfix = (1u << npostfix) - 1; + uint32_t extra; + uint32_t start; + /* Remove postfix and "head-start". */ + offset = (offset >> npostfix) + 4; + /* Calculate the number of distance bits. */ + tmp = offset / 2; + /* Poor-man's log2floor, to avoid extra dependencies. */ + while (tmp != 0) {ndistbits++; tmp = tmp >> 1;} + /* One bit is covered with subrange addressing ("half"). */ + ndistbits--; + /* Find subrange. */ + half = (offset >> ndistbits) & 1; + /* Calculate the "group" part of dcode. */ + group = ((ndistbits - 1) << 1) | half; + /* Calculated "group" covers the prohibited distance value. */ + if (group == 0) { + /* This case is added for correctness; does not occur for limit > 128. */ + result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES; + result.max_distance = ndirect; + return result; + } + /* Decrement "group", so it is the last permitted "group". */ + group--; + /* After group was decremented, ndistbits and half must be recalculated. */ + ndistbits = (group >> 1) + 1; + /* The last available distance in the subrange has all extra bits set. */ + extra = (1u << ndistbits) - 1; + /* Calculate region start. NB: ndistbits >= 1. */ + start = (1u << (ndistbits + 1)) - 4; + /* Move to subregion. */ + start += (group & 1) << ndistbits; + /* Calculate the alphabet size. */ + result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect + + BROTLI_NUM_DISTANCE_SHORT_CODES + 1; + /* Calculate the maximal distance representable by alphabet. */ + result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1; + return result; + } +} + +/* Represents the range of values belonging to a prefix code: + [offset, offset + 2^nbits) */ +typedef struct { + uint16_t offset; + uint8_t nbits; +} BrotliPrefixCodeRange; + +/* "Soft-private", it is exported, but not "advertised" as API. */ +BROTLI_COMMON_API extern const BrotliPrefixCodeRange + _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS]; + +#endif /* BROTLI_COMMON_CONSTANTS_H_ */ diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/context.c b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.c new file mode 100644 index 0000000000..7f9c958699 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.c @@ -0,0 +1,156 @@ +#include "context.h" + +#include + +/* Common context lookup table for all context modes. */ +const uint8_t _kBrotliContextLookupTable[2048] = { + /* CONTEXT_LSB6, last byte. */ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + + /* CONTEXT_LSB6, second last byte, */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + /* CONTEXT_MSB6, last byte. */ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, + 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, + 28, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, + 32, 32, 32, 32, 33, 33, 33, 33, 34, 34, 34, 34, 35, 35, 35, 35, + 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 39, 39, 39, 39, + 40, 40, 40, 40, 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, + 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, + 48, 48, 48, 48, 49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, + 52, 52, 52, 52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, + 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, + 60, 60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63, + + /* CONTEXT_MSB6, second last byte, */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + /* CONTEXT_UTF8, last byte. */ + /* ASCII range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, + 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, + 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, + 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, + 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, + /* UTF8 continuation byte range. */ + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + /* UTF8 lead byte range. */ + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, + + /* CONTEXT_UTF8 second last byte. */ + /* ASCII range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, + /* UTF8 continuation byte range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* UTF8 lead byte range. */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + + /* CONTEXT_SIGNED, last byte, same as the above values shifted by 3 bits. */ + 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 56, + + /* CONTEXT_SIGNED, second last byte. */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, +}; diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/context.h b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.h new file mode 100644 index 0000000000..685a279dc0 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/context.h @@ -0,0 +1,113 @@ +/* Copyright 2013 Google Inc. All Rights Reserved. + + Distributed under MIT license. + See file LICENSE for detail or copy at https://opensource.org/licenses/MIT +*/ + +/* Lookup table to map the previous two bytes to a context id. + + There are four different context modeling modes defined here: + CONTEXT_LSB6: context id is the least significant 6 bits of the last byte, + CONTEXT_MSB6: context id is the most significant 6 bits of the last byte, + CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text, + CONTEXT_SIGNED: second-order context model tuned for signed integers. + + If |p1| and |p2| are the previous two bytes, and |mode| is current context + mode, we calculate the context as: + + context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256]. + + For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters + (i.e. < 128), this will be equivalent to + + context = 4 * context1(p1) + context2(p2), + + where context1 is based on the previous byte in the following way: + + 0 : non-ASCII control + 1 : \t, \n, \r + 2 : space + 3 : other punctuation + 4 : " ' + 5 : % + 6 : ( < [ { + 7 : ) > ] } + 8 : , ; : + 9 : . + 10 : = + 11 : number + 12 : upper-case vowel + 13 : upper-case consonant + 14 : lower-case vowel + 15 : lower-case consonant + + and context2 is based on the second last byte: + + 0 : control, space + 1 : punctuation + 2 : upper-case letter, number + 3 : lower-case letter + + If the last byte is ASCII, and the second last byte is not (in a valid UTF8 + stream it will be a continuation byte, value between 128 and 191), the + context is the same as if the second last byte was an ASCII control or space. + + If the last byte is a UTF8 lead byte (value >= 192), then the next byte will + be a continuation byte and the context id is 2 or 3 depending on the LSB of + the last byte and to a lesser extent on the second last byte if it is ASCII. + + If the last byte is a UTF8 continuation byte, the second last byte can be: + - continuation byte: the next byte is probably ASCII or lead byte (assuming + 4-byte UTF8 characters are rare) and the context id is 0 or 1. + - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1 + - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3 + + The possible value combinations of the previous two bytes, the range of + context ids and the type of the next byte is summarized in the table below: + + |--------\-----------------------------------------------------------------| + | \ Last byte | + | Second \---------------------------------------------------------------| + | last byte \ ASCII | cont. byte | lead byte | + | \ (0-127) | (128-191) | (192-) | + |=============|===================|=====================|==================| + | ASCII | next: ASCII/lead | not valid | next: cont. | + | (0-127) | context: 4 - 63 | | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. | + | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: ASCII/lead | not valid | + | (192-207) | | context: 0 - 1 | | + |-------------|-------------------|---------------------|------------------| + | lead byte | not valid | next: cont. | not valid | + | (208-) | | context: 2 - 3 | | + |-------------|-------------------|---------------------|------------------| +*/ + +#ifndef BROTLI_COMMON_CONTEXT_H_ +#define BROTLI_COMMON_CONTEXT_H_ + +#include +#include + +typedef enum ContextType { + CONTEXT_LSB6 = 0, + CONTEXT_MSB6 = 1, + CONTEXT_UTF8 = 2, + CONTEXT_SIGNED = 3 +} ContextType; + +/* "Soft-private", it is exported, but not "advertised" as API. */ +/* Common context lookup table for all context modes. */ +BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048]; + +typedef const uint8_t* ContextLut; + +/* typeof(MODE) == ContextType; returns ContextLut */ +#define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9]) + +/* typeof(LUT) == ContextLut */ +#define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2]) + +#endif /* BROTLI_COMMON_CONTEXT_H_ */ diff --git a/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin new file mode 100644 index 0000000000..a585c0e292 --- /dev/null +++ b/third-party/libjxl/libjxl/third_party/brotli/c/common/dictionary.bin @@ -0,0 +1,432 @@ +timedownlifeleftbackcodedatashowonlysitecityopenjustlikefreeworktextyearoverbodyloveformbookplaylivelinehelphomesidemorewordlongthemviewfindpagedaysfullheadtermeachareafromtruemarkableuponhighdatelandnewsevennextcasebothpostusedmadehandherewhatnameLinkblogsizebaseheldmakemainuser') +holdendswithNewsreadweresigntakehavegameseencallpathwellplusmenufilmpartjointhislistgoodneedwayswestjobsmindalsologorichuseslastteamarmyfoodkingwilleastwardbestfirePageknowaway.pngmovethanloadgiveselfnotemuchfeedmanyrockicononcelookhidediedHomerulehostajaxinfoclublawslesshalfsomesuchzone100%onescareTimeracebluefourweekfacehopegavehardlostwhenparkkeptpassshiproomHTMLplanTypedonesavekeepflaglinksoldfivetookratetownjumpthusdarkcardfilefearstaykillthatfallautoever.comtalkshopvotedeepmoderestturnbornbandfellroseurl(skinrolecomeactsagesmeetgold.jpgitemvaryfeltthensenddropViewcopy1.0"stopelseliestourpack.gifpastcss?graymean>rideshotlatesaidroadvar feeljohnrickportfast'UA-deadpoorbilltypeU.S.woodmust2px;Inforankwidewantwalllead[0];paulwavesure$('#waitmassarmsgoesgainlangpaid!-- lockunitrootwalkfirmwifexml"songtest20pxkindrowstoolfontmailsafestarmapscorerainflowbabyspansays4px;6px;artsfootrealwikiheatsteptriporg/lakeweaktoldFormcastfansbankveryrunsjulytask1px;goalgrewslowedgeid="sets5px;.js?40pxif (soonseatnonetubezerosentreedfactintogiftharm18pxcamehillboldzoomvoideasyringfillpeakinitcost3px;jacktagsbitsrolleditknewnearironfreddiskwentsoilputs/js/holyT22:ISBNT20:adamsees

json', 'contT21: RSSloopasiamoon

soulLINEfortcartT14:

80px!--<9px;T04:mike:46ZniceinchYorkricezh:'));puremageparatonebond:37Z_of_']);000,zh:tankyardbowlbush:56ZJava30px +|} +%C3%:34ZjeffEXPIcashvisagolfsnowzh:quer.csssickmeatmin.binddellhirepicsrent:36ZHTTP-201fotowolfEND xbox:54ZBODYdick; +} +exit:35Zvarsbeat'});diet999;anne}}sonyguysfuckpipe|- +!002)ndow[1];[]; +Log salt + bangtrimbath){ +00px +});ko:feesad> s:// [];tollplug(){ +{ + .js'200pdualboat.JPG); +}quot); + +'); + +} 201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037201320122011201020092008200720062005200420032002200120001999199819971996199519941993199219911990198919881987198619851984198319821981198019791978197719761975197419731972197119701969196819671966196519641963196219611960195919581957195619551954195319521951195010001024139400009999comomásesteestaperotodohacecadaañobiendíaasívidacasootroforosolootracualdijosidograntipotemadebealgoquéestonadatrespococasabajotodasinoaguapuesunosantediceluisellamayozonaamorpisoobraclicellodioshoracasiзанаомрарутанепоотизнодотожеонихНаеебымыВысовывоНообПолиниРФНеМытыОнимдаЗаДаНуОбтеИзейнуммТыужفيأنمامعكلأورديافىهولملكاولهبسالإنهيأيقدهلثمبهلوليبلايبكشيامأمنتبيلنحبهممشوشfirstvideolightworldmediawhitecloseblackrightsmallbooksplacemusicfieldorderpointvalueleveltableboardhousegroupworksyearsstatetodaywaterstartstyledeathpowerphonenighterrorinputabouttermstitletoolseventlocaltimeslargewordsgamesshortspacefocusclearmodelblockguideradiosharewomenagainmoneyimagenamesyounglineslatercolorgreenfront&watchforcepricerulesbeginaftervisitissueareasbelowindextotalhourslabelprintpressbuiltlinksspeedstudytradefoundsenseundershownformsrangeaddedstillmovedtakenaboveflashfixedoftenotherviewschecklegalriveritemsquickshapehumanexistgoingmoviethirdbasicpeacestagewidthloginideaswrotepagesusersdrivestorebreaksouthvoicesitesmonthwherebuildwhichearthforumthreesportpartyClicklowerlivesclasslayerentrystoryusagesoundcourtyour birthpopuptypesapplyImagebeinguppernoteseveryshowsmeansextramatchtrackknownearlybegansuperpapernorthlearngivennamedendedTermspartsGroupbrandusingwomanfalsereadyaudiotakeswhile.com/livedcasesdailychildgreatjudgethoseunitsneverbroadcoastcoverapplefilescyclesceneplansclickwritequeenpieceemailframeolderphotolimitcachecivilscaleenterthemetheretouchboundroyalaskedwholesincestock namefaithheartemptyofferscopeownedmightalbumthinkbloodarraymajortrustcanonunioncountvalidstoneStyleLoginhappyoccurleft:freshquitefilmsgradeneedsurbanfightbasishoverauto;route.htmlmixedfinalYour slidetopicbrownalonedrawnsplitreachRightdatesmarchquotegoodsLinksdoubtasyncthumballowchiefyouthnovel10px;serveuntilhandsCheckSpacequeryjamesequaltwice0,000Startpanelsongsroundeightshiftworthpostsleadsweeksavoidthesemilesplanesmartalphaplantmarksratesplaysclaimsalestextsstarswrong

thing.org/multiheardPowerstandtokensolid(thisbringshipsstafftriedcallsfullyfactsagentThis //-->adminegyptEvent15px;Emailtrue"crossspentblogsbox">notedleavechinasizesguestrobotheavytrue,sevengrandcrimesignsawaredancephase> + + +name=diegopage swiss--> + +#fff;">Log.com"treatsheet) && 14px;sleepntentfiledja:id="cName"worseshots-box-delta +<bears:48Z spendbakershops= "";php">ction13px;brianhellosize=o=%2F joinmaybe, fjsimg" ")[0]MTopBType"newlyDanskczechtrailknowsfaq">zh-cn10); +-1");type=bluestrulydavis.js';> + +form jesus100% menu. + +walesrisksumentddingb-likteachgif" vegasdanskeestishqipsuomisobredesdeentretodospuedeañosestátienehastaotrospartedondenuevohacerformamismomejormundoaquídíassóloayudafechatodastantomenosdatosotrassitiomuchoahoralugarmayorestoshorastenerantesfotosestaspaísnuevasaludforosmedioquienmesespoderchileserávecesdecirjoséestarventagrupohechoellostengoamigocosasnivelgentemismaairesjuliotemashaciafavorjuniolibrepuntobuenoautorabrilbuenatextomarzosaberlistaluegocómoenerojuegoperúhaberestoynuncamujervalorfueralibrogustaigualvotoscasosguíapuedosomosavisousteddebennochebuscafaltaeurosseriedichocursoclavecasasleónplazolargoobrasvistaapoyojuntotratavistocrearcampohemoscincocargopisosordenhacenáreadiscopedrocercapuedapapelmenorútilclarojorgecalleponertardenadiemarcasigueellassiglocochemotosmadreclaserestoniñoquedapasarbancohijosviajepabloéstevienereinodejarfondocanalnorteletracausatomarmanoslunesautosvillavendopesartipostengamarcollevapadreunidovamoszonasambosbandamariaabusomuchasubirriojavivirgradochicaallíjovendichaestantalessalirsuelopesosfinesllamabuscoéstalleganegroplazahumorpagarjuntadobleislasbolsabañohablaluchaÁreadicenjugarnotasvalleallácargadolorabajoestégustomentemariofirmacostofichaplatahogarartesleyesaquelmuseobasespocosmitadcielochicomiedoganarsantoetapadebesplayaredessietecortecoreadudasdeseoviejodeseaaguas"domaincommonstatuseventsmastersystemactionbannerremovescrollupdateglobalmediumfilternumberchangeresultpublicscreenchoosenormaltravelissuessourcetargetspringmodulemobileswitchphotosborderregionitselfsocialactivecolumnrecordfollowtitle>eitherlengthfamilyfriendlayoutauthorcreatereviewsummerserverplayedplayerexpandpolicyformatdoublepointsseriespersonlivingdesignmonthsforcesuniqueweightpeopleenergynaturesearchfigurehavingcustomoffsetletterwindowsubmitrendergroupsuploadhealthmethodvideosschoolfutureshadowdebatevaluesObjectothersrightsleaguechromesimplenoticesharedendingseasonreportonlinesquarebuttonimagesenablemovinglatestwinterFranceperiodstrongrepeatLondondetailformeddemandsecurepassedtoggleplacesdevicestaticcitiesstreamyellowattackstreetflighthiddeninfo">openedusefulvalleycausesleadersecretseconddamagesportsexceptratingsignedthingseffectfieldsstatesofficevisualeditorvolumeReportmuseummoviesparentaccessmostlymother" id="marketgroundchancesurveybeforesymbolmomentspeechmotioninsidematterCenterobjectexistsmiddleEuropegrowthlegacymannerenoughcareeransweroriginportalclientselectrandomclosedtopicscomingfatheroptionsimplyraisedescapechosenchurchdefinereasoncorneroutputmemoryiframepolicemodelsNumberduringoffersstyleskilledlistedcalledsilvermargindeletebetterbrowselimitsGlobalsinglewidgetcenterbudgetnowrapcreditclaimsenginesafetychoicespirit-stylespreadmakingneededrussiapleaseextentScriptbrokenallowschargedividefactormember-basedtheoryconfigaroundworkedhelpedChurchimpactshouldalwayslogo" bottomlist">){var prefixorangeHeader.push(couplegardenbridgelaunchReviewtakingvisionlittledatingButtonbeautythemesforgotSearchanchoralmostloadedChangereturnstringreloadMobileincomesupplySourceordersviewed courseAbout islandPhilipawardshandleimportOfficeregardskillsnationSportsdegreeweekly (e.g.behinddoctorloggedunitedbeyond-scaleacceptservedmarineFootercamera +_form"leavesstress" /> +.gif" onloadloaderOxfordsistersurvivlistenfemaleDesignsize="appealtext">levelsthankshigherforcedanimalanyoneAfricaagreedrecentPeople
wonderpricesturned|| {};main">inlinesundaywrap">failedcensusminutebeaconquotes150px|estateremoteemail"linkedright;signalformal1.htmlsignupprincefloat:.png" forum.AccesspaperssoundsextendHeightsliderUTF-8"& Before. WithstudioownersmanageprofitjQueryannualparamsboughtfamousgooglelongeri++) {israelsayingdecidehome">headerensurebranchpiecesblock;statedtop">boston.test(avatartested_countforumsschemaindex,filledsharesreaderalert(appearSubmitline">body"> +* TheThoughseeingjerseyNews +System DavidcancertablesprovedApril reallydriveritem">more">boardscolorscampusfirst || [];media.guitarfinishwidth:showedOther .php" assumelayerswilsonstoresreliefswedenCustomeasily your String + +Whiltaylorclear:resortfrenchthough") + "buyingbrandsMembername">oppingsector5px;">vspacepostermajor coffeemartinmaturehappenkansaslink">Images=falsewhile hspace0& + +In powerPolski-colorjordanBottomStart -count2.htmlnews">01.jpgOnline-rightmillerseniorISBN 00,000 guidesvalue)ectionrepair.xml" rights.html-blockregExp:hoverwithinvirginphones using + var >'); + + +bahasabrasilgalegomagyarpolskisrpskiردو中文简体繁體信息中国我们一个公司管理论坛可以服务时间个人产品自己企业查看工作联系没有网站所有评论中心文章用户首页作者技术问题相关下载搜索使用软件在线主题资料视频回复注册网络收藏内容推荐市场消息空间发布什么好友生活图片发展如果手机新闻最新方式北京提供关于更多这个系统知道游戏广告其他发表安全第一会员进行点击版权电子世界设计免费教育加入活动他们商品博客现在上海如何已经留言详细社区登录本站需要价格支持国际链接国家建设朋友阅读法律位置经济选择这样当前分类排行因为交易最后音乐不能通过行业科技可能设备合作大家社会研究专业全部项目这里还是开始情况电脑文件品牌帮助文化资源大学学习地址浏览投资工程要求怎么时候功能主要目前资讯城市方法电影招聘声明任何健康数据美国汽车介绍但是交流生产所以电话显示一些单位人员分析地图旅游工具学生系列网友帖子密码频道控制地区基本全国网上重要第二喜欢进入友情这些考试发现培训以上政府成为环境香港同时娱乐发送一定开发作品标准欢迎解决地方一下以及责任或者客户代表积分女人数码销售出现离线应用列表不同编辑统计查询不要有关机构很多播放组织政策直接能力来源時間看到热门关键专区非常英语百度希望美女比较知识规定建议部门意见精彩日本提高发言方面基金处理权限影片银行还有分享物品经营添加专家这种话题起来业务公告记录简介质量男人影响引用报告部分快速咨询时尚注意申请学校应该历史只是返回购买名称为了成功说明供应孩子专题程序一般會員只有其它保护而且今天窗口动态状态特别认为必须更新小说我們作为媒体包括那么一样国内是否根据电视学院具有过程由于人才出来不过正在明星故事关系标题商务输入一直基础教学了解建筑结果全球通知计划对于艺术相册发生真的建立等级类型经验实现制作来自标签以下原创无法其中個人一切指南关闭集团第三关注因此照片深圳商业广州日期高级最近综合表示专辑行为交通评价觉得精华家庭完成感觉安装得到邮件制度食品虽然转载报价记者方案行政人民用品东西提出酒店然后付款热点以前完全发帖设置领导工业医院看看经典原因平台各种增加材料新增之后职业效果今年论文我国告诉版主修改参与打印快乐机械观点存在精神获得利用继续你们这么模式语言能够雅虎操作风格一起科学体育短信条件治疗运动产业会议导航先生联盟可是問題结构作用调查資料自动负责农业访问实施接受讨论那个反馈加强女性范围服務休闲今日客服觀看参加的话一点保证图书有效测试移动才能决定股票不断需求不得办法之间采用营销投诉目标爱情摄影有些複製文学机会数字装修购物农村全面精品其实事情水平提示上市谢谢普通教师上传类别歌曲拥有创新配件只要时代資訊达到人生订阅老师展示心理贴子網站主題自然级别简单改革那些来说打开代码删除证券节目重点次數多少规划资金找到以后大全主页最佳回答天下保障现代检查投票小时沒有正常甚至代理目录公开复制金融幸福版本形成准备行情回到思想怎样协议认证最好产生按照服装广东动漫采购新手组图面板参考政治容易天地努力人们升级速度人物调整流行造成文字韩国贸易开展相關表现影视如此美容大小报道条款心情许多法规家居书店连接立即举报技巧奥运登入以来理论事件自由中华办公妈妈真正不错全文合同价值别人监督具体世纪团队创业承担增长有人保持商家维修台湾左右股份答案实际电信经理生命宣传任务正式特色下来协会只能当然重新內容指导运行日志賣家超过土地浙江支付推出站长杭州执行制造之一推广现场描述变化传统歌手保险课程医疗经过过去之前收入年度杂志美丽最高登陆未来加工免责教程版块身体重庆出售成本形式土豆出價东方邮箱南京求职取得职位相信页面分钟网页确定图例网址积极错误目的宝贝机关风险授权病毒宠物除了評論疾病及时求购站点儿童每天中央认识每个天津字体台灣维护本页个性官方常见相机战略应当律师方便校园股市房屋栏目员工导致突然道具本网结合档案劳动另外美元引起改变第四会计說明隐私宝宝规范消费共同忘记体系带来名字發表开放加盟受到二手大量成人数量共享区域女孩原则所在结束通信超级配置当时优秀性感房产遊戲出口提交就业保健程度参数事业整个山东情感特殊分類搜尋属于门户财务声音及其财经坚持干部成立利益考虑成都包装用戶比赛文明招商完整真是眼睛伙伴威望领域卫生优惠論壇公共良好充分符合附件特点不可英文资产根本明显密碼公众民族更加享受同学启动适合原来问答本文美食绿色稳定终于生物供求搜狐力量严重永远写真有限竞争对象费用不好绝对十分促进点评影音优势不少欣赏并且有点方向全新信用设施形象资格突破随着重大于是毕业智能化工完美商城统一出版打造產品概况用于保留因素中國存储贴图最愛长期口价理财基地安排武汉里面创建天空首先完善驱动下面不再诚信意义阳光英国漂亮军事玩家群众农民即可名稱家具动画想到注明小学性能考研硬件观看清楚搞笑首頁黄金适用江苏真实主管阶段註冊翻译权利做好似乎通讯施工狀態也许环保培养概念大型机票理解匿名cuandoenviarmadridbuscariniciotiempoporquecuentaestadopuedenjuegoscontraestánnombretienenperfilmaneraamigosciudadcentroaunquepuedesdentroprimerpreciosegúnbuenosvolverpuntossemanahabíaagostonuevosunidoscarlosequiponiñosmuchosalgunacorreoimagenpartirarribamaríahombreempleoverdadcambiomuchasfueronpasadolíneaparecenuevascursosestabaquierolibroscuantoaccesomiguelvarioscuatrotienesgruposseráneuropamediosfrenteacercademásofertacochesmodeloitalialetrasalgúncompracualesexistecuerposiendoprensallegarviajesdineromurciapodrápuestodiariopuebloquieremanuelpropiocrisisciertoseguromuertefuentecerrargrandeefectopartesmedidapropiaofrecetierrae-mailvariasformasfuturoobjetoseguirriesgonormasmismosúnicocaminositiosrazóndebidopruebatoledoteníajesúsesperococinaorigentiendacientocádizhablarseríalatinafuerzaestiloguerraentraréxitolópezagendavídeoevitarpaginametrosjavierpadresfácilcabezaáreassalidaenvíojapónabusosbienestextosllevarpuedanfuertecomúnclaseshumanotenidobilbaounidadestáseditarcreadoдлячтокакилиэтовсеегопритакещеужеКакбезбылониВсеподЭтотомчемнетлетразонагдемнеДляПринаснихтемктогодвоттамСШАмаяЧтовасвамемуТакдванамэтиэтуВамтехпротутнаддняВоттринейВаснимсамтотрубОнимирнееОООлицэтаОнанемдоммойдвеоносудकेहैकीसेकाकोऔरपरनेएककिभीइसकरतोहोआपहीयहयातकथाjagranआजजोअबदोगईजागएहमइनवहयेथेथीघरजबदीकईजीवेनईनएहरउसमेकमवोलेसबमईदेओरआमबसभरबनचलमनआगसीलीعلىإلىهذاآخرعددالىهذهصورغيركانولابينعرضذلكهنايومقالعليانالكنحتىقبلوحةاخرفقطعبدركنإذاكمااحدإلافيهبعضكيفبحثومنوهوأناجدالهاسلمعندليسعبرصلىمنذبهاأنهمثلكنتالاحيثمصرشرححولوفياذالكلمرةانتالفأبوخاصأنتانهاليعضووقدابنخيربنتلكمشاءوهيابوقصصومارقمأحدنحنعدمرأياحةكتبدونيجبمنهتحتجهةسنةيتمكرةغزةنفسبيتللهلناتلكقلبلماعنهأولشيءنورأمافيكبكلذاترتببأنهمسانكبيعفقدحسنلهمشعرأهلشهرقطرطلبprofileservicedefaulthimselfdetailscontentsupportstartedmessagesuccessfashioncountryaccountcreatedstoriesresultsrunningprocesswritingobjectsvisiblewelcomearticleunknownnetworkcompanydynamicbrowserprivacyproblemServicerespectdisplayrequestreservewebsitehistoryfriendsoptionsworkingversionmillionchannelwindow.addressvisitedweathercorrectproductedirectforwardyou canremovedsubjectcontrolarchivecurrentreadinglibrarylimitedmanagerfurthersummarymachineminutesprivatecontextprogramsocietynumberswrittenenabledtriggersourcesloadingelementpartnerfinallyperfectmeaningsystemskeepingculture",journalprojectsurfaces"expiresreviewsbalanceEnglishContentthroughPlease opinioncontactaverageprimaryvillageSpanishgallerydeclinemeetingmissionpopularqualitymeasuregeneralspeciessessionsectionwriterscounterinitialreportsfiguresmembersholdingdisputeearlierexpressdigitalpictureAnothermarriedtrafficleadingchangedcentralvictoryimages/reasonsstudiesfeaturelistingmust beschoolsVersionusuallyepisodeplayinggrowingobviousoverlaypresentactions</ul> +wrapperalreadycertainrealitystorageanotherdesktopofferedpatternunusualDigitalcapitalWebsitefailureconnectreducedAndroiddecadesregular & animalsreleaseAutomatgettingmethodsnothingPopularcaptionletterscapturesciencelicensechangesEngland=1&History = new CentralupdatedSpecialNetworkrequirecommentwarningCollegetoolbarremainsbecauseelectedDeutschfinanceworkersquicklybetweenexactlysettingdiseaseSocietyweaponsexhibit<!--Controlclassescoveredoutlineattacksdevices(windowpurposetitle="Mobile killingshowingItaliandroppedheavilyeffects-1']); +confirmCurrentadvancesharingopeningdrawingbillionorderedGermanyrelated</form>includewhetherdefinedSciencecatalogArticlebuttonslargestuniformjourneysidebarChicagoholidayGeneralpassage,"animatefeelingarrivedpassingnaturalroughly. + +The but notdensityBritainChineselack oftributeIreland" data-factorsreceivethat isLibraryhusbandin factaffairsCharlesradicalbroughtfindinglanding:lang="return leadersplannedpremiumpackageAmericaEdition]"Messageneed tovalue="complexlookingstationbelievesmaller-mobilerecordswant tokind ofFirefoxyou aresimilarstudiedmaximumheadingrapidlyclimatekingdomemergedamountsfoundedpioneerformuladynastyhow to SupportrevenueeconomyResultsbrothersoldierlargelycalling."AccountEdward segmentRobert effortsPacificlearnedup withheight:we haveAngelesnations_searchappliedacquiremassivegranted: falsetreatedbiggestbenefitdrivingStudiesminimumperhapsmorningsellingis usedreversevariant role="missingachievepromotestudentsomeoneextremerestorebottom:evolvedall thesitemapenglishway to AugustsymbolsCompanymattersmusicalagainstserving})(); +paymenttroubleconceptcompareparentsplayersregionsmonitor ''The winningexploreadaptedGalleryproduceabilityenhancecareers). The collectSearch ancientexistedfooter handlerprintedconsoleEasternexportswindowsChannelillegalneutralsuggest_headersigning.html">settledwesterncausing-webkitclaimedJusticechaptervictimsThomas mozillapromisepartieseditionoutside:false,hundredOlympic_buttonauthorsreachedchronicdemandssecondsprotectadoptedprepareneithergreatlygreateroverallimprovecommandspecialsearch.worshipfundingthoughthighestinsteadutilityquarterCulturetestingclearlyexposedBrowserliberal} catchProjectexamplehide();FloridaanswersallowedEmperordefenseseriousfreedomSeveral-buttonFurtherout of != nulltrainedDenmarkvoid(0)/all.jspreventRequestStephen + +When observe</h2> +Modern provide" alt="borders. + +For + +Many artistspoweredperformfictiontype ofmedicalticketsopposedCouncilwitnessjusticeGeorge Belgium...</a>twitternotablywaitingwarfare Other rankingphrasesmentionsurvivescholar</p> + Countryignoredloss ofjust asGeorgiastrange<head><stopped1']); +islandsnotableborder:list ofcarried100,000</h3> + severalbecomesselect wedding00.htmlmonarchoff theteacherhighly biologylife ofor evenrise of»plusonehunting(thoughDouglasjoiningcirclesFor theAncientVietnamvehiclesuch ascrystalvalue =Windowsenjoyeda smallassumed<a id="foreign All rihow theDisplayretiredhoweverhidden;battlesseekingcabinetwas notlook atconductget theJanuaryhappensturninga:hoverOnline French lackingtypicalextractenemieseven ifgeneratdecidedare not/searchbeliefs-image:locatedstatic.login">convertviolententeredfirst">circuitFinlandchemistshe was10px;">as suchdivided</span>will beline ofa greatmystery/index.fallingdue to railwaycollegemonsterdescentit withnuclearJewish protestBritishflowerspredictreformsbutton who waslectureinstantsuicidegenericperiodsmarketsSocial fishingcombinegraphicwinners<br /><by the NaturalPrivacycookiesoutcomeresolveSwedishbrieflyPersianso muchCenturydepictscolumnshousingscriptsnext tobearingmappingrevisedjQuery(-width:title">tooltipSectiondesignsTurkishyounger.match(})(); + +burningoperatedegreessource=Richardcloselyplasticentries</tr> +color:#ul id="possessrollingphysicsfailingexecutecontestlink toDefault<br /> +: true,chartertourismclassicproceedexplain</h1> +online.?xml vehelpingdiamonduse theairlineend -->).attr(readershosting#ffffffrealizeVincentsignals src="/ProductdespitediversetellingPublic held inJoseph theatreaffects<style>a largedoesn'tlater, ElementfaviconcreatorHungaryAirportsee theso thatMichaelSystemsPrograms, and width=e"tradingleft"> +personsGolden Affairsgrammarformingdestroyidea ofcase ofoldest this is.src = cartoonregistrCommonsMuslimsWhat isin manymarkingrevealsIndeed,equally/show_aoutdoorescape(Austriageneticsystem,In the sittingHe alsoIslandsAcademy + <!--Daniel bindingblock">imposedutilizeAbraham(except{width:putting).html(|| []; +DATA[ *kitchenmountedactual dialectmainly _blank'installexpertsif(typeIt also© ">Termsborn inOptionseasterntalkingconcerngained ongoingjustifycriticsfactoryits ownassaultinvitedlastinghis ownhref="/" rel="developconcertdiagramdollarsclusterphp?id=alcohol);})();using a><span>vesselsrevivalAddressamateurandroidallegedillnesswalkingcentersqualifymatchesunifiedextinctDefensedied in + <!-- customslinkingLittle Book ofeveningmin.js?are thekontakttoday's.html" target=wearingAll Rig; +})();raising Also, crucialabout">declare--> +<scfirefoxas muchappliesindex, s, but type = + +<!--towardsRecordsPrivateForeignPremierchoicesVirtualreturnsCommentPoweredinline;povertychamberLiving volumesAnthonylogin" RelatedEconomyreachescuttinggravitylife inChapter-shadowNotable</td> + returnstadiumwidgetsvaryingtravelsheld bywho arework infacultyangularwho hadairporttown of + +Some 'click'chargeskeywordit willcity of(this);Andrew unique checkedor more300px; return;rsion="pluginswithin herselfStationFederalventurepublishsent totensionactresscome tofingersDuke ofpeople,exploitwhat isharmonya major":"httpin his menu"> +monthlyofficercouncilgainingeven inSummarydate ofloyaltyfitnessand wasemperorsupremeSecond hearingRussianlongestAlbertalateralset of small">.appenddo withfederalbank ofbeneathDespiteCapitalgrounds), and percentit fromclosingcontainInsteadfifteenas well.yahoo.respondfighterobscurereflectorganic= Math.editingonline paddinga wholeonerroryear ofend of barrierwhen itheader home ofresumedrenamedstrong>heatingretainscloudfrway of March 1knowingin partBetweenlessonsclosestvirtuallinks">crossedEND -->famous awardedLicenseHealth fairly wealthyminimalAfricancompetelabel">singingfarmersBrasil)discussreplaceGregoryfont copursuedappearsmake uproundedboth ofblockedsaw theofficescoloursif(docuwhen heenforcepush(fuAugust UTF-8">Fantasyin mostinjuredUsuallyfarmingclosureobject defenceuse of Medical<body> +evidentbe usedkeyCodesixteenIslamic#000000entire widely active (typeofone cancolor =speakerextendsPhysicsterrain<tbody>funeralviewingmiddle cricketprophetshifteddoctorsRussell targetcompactalgebrasocial-bulk ofman and</td> + he left).val()false);logicalbankinghome tonaming Arizonacredits); +}); +founderin turnCollinsbefore But thechargedTitle">CaptainspelledgoddessTag -->Adding:but wasRecent patientback in=false&Lincolnwe knowCounterJudaismscript altered']); + has theunclearEvent',both innot all + +<!-- placinghard to centersort ofclientsstreetsBernardassertstend tofantasydown inharbourFreedomjewelry/about..searchlegendsis mademodern only ononly toimage" linear painterand notrarely acronymdelivershorter00&as manywidth="/* <![Ctitle =of the lowest picked escapeduses ofpeoples PublicMatthewtacticsdamagedway forlaws ofeasy to windowstrong simple}catch(seventhinfoboxwent topaintedcitizenI don'tretreat. Some ww."); +bombingmailto:made in. Many carries||{};wiwork ofsynonymdefeatsfavoredopticalpageTraunless sendingleft"><comScorAll thejQuery.touristClassicfalse" Wilhelmsuburbsgenuinebishops.split(global followsbody ofnominalContactsecularleft tochiefly-hidden-banner</li> + +. When in bothdismissExplorealways via thespañolwelfareruling arrangecaptainhis sonrule ofhe tookitself,=0&(calledsamplesto makecom/pagMartin Kennedyacceptsfull ofhandledBesides//--></able totargetsessencehim to its by common.mineralto takeways tos.org/ladvisedpenaltysimple:if theyLettersa shortHerbertstrikes groups.lengthflightsoverlapslowly lesser social </p> + it intoranked rate oful> + attemptpair ofmake itKontaktAntoniohaving ratings activestreamstrapped").css(hostilelead tolittle groups,Picture--> + + rows=" objectinverse<footerCustomV><\/scrsolvingChamberslaverywoundedwhereas!= 'undfor allpartly -right:Arabianbacked centuryunit ofmobile-Europe,is homerisk ofdesiredClintoncost ofage of become none ofp"Middle ead')[0Criticsstudios>©group">assemblmaking pressedwidget.ps:" ? rebuiltby someFormer editorsdelayedCanonichad thepushingclass="but arepartialBabylonbottom carrierCommandits useAs withcoursesa thirddenotesalso inHouston20px;">accuseddouble goal ofFamous ).bind(priests Onlinein Julyst + "gconsultdecimalhelpfulrevivedis veryr'+'iptlosing femalesis alsostringsdays ofarrivalfuture <objectforcingString(" /> + here isencoded. The balloondone by/commonbgcolorlaw of Indianaavoidedbut the2px 3pxjquery.after apolicy.men andfooter-= true;for usescreen.Indian image =family,http://  driverseternalsame asnoticedviewers})(); + is moreseasonsformer the newis justconsent Searchwas thewhy theshippedbr><br>width: height=made ofcuisineis thata very Admiral fixed;normal MissionPress, ontariocharsettry to invaded="true"spacingis mosta more totallyfall of}); + immensetime inset outsatisfyto finddown tolot of Playersin Junequantumnot thetime todistantFinnishsrc = (single help ofGerman law andlabeledforestscookingspace">header-well asStanleybridges/globalCroatia About [0]; + it, andgroupedbeing a){throwhe madelighterethicalFFFFFF"bottom"like a employslive inas seenprintermost ofub-linkrejectsand useimage">succeedfeedingNuclearinformato helpWomen'sNeitherMexicanprotein<table by manyhealthylawsuitdevised.push({sellerssimply Through.cookie Image(older">us.js"> Since universlarger open to!-- endlies in']); + marketwho is ("DOMComanagedone fortypeof Kingdomprofitsproposeto showcenter;made itdressedwere inmixtureprecisearisingsrc = 'make a securedBaptistvoting + var March 2grew upClimate.removeskilledway the</head>face ofacting right">to workreduceshas haderectedshow();action=book ofan area== "htt<header +<html>conformfacing cookie.rely onhosted .customhe wentbut forspread Family a meansout theforums.footage">MobilClements" id="as highintense--><!--female is seenimpliedset thea stateand hisfastestbesidesbutton_bounded"><img Infoboxevents,a youngand areNative cheaperTimeoutand hasengineswon the(mostlyright: find a -bottomPrince area ofmore ofsearch_nature,legallyperiod,land ofor withinducedprovingmissilelocallyAgainstthe wayk"px;"> +pushed abandonnumeralCertainIn thismore inor somename isand, incrownedISBN 0-createsOctobermay notcenter late inDefenceenactedwish tobroadlycoolingonload=it. TherecoverMembersheight assumes<html> +people.in one =windowfooter_a good reklamaothers,to this_cookiepanel">London,definescrushedbaptismcoastalstatus title" move tolost inbetter impliesrivalryservers SystemPerhapses and contendflowinglasted rise inGenesisview ofrising seem tobut in backinghe willgiven agiving cities.flow of Later all butHighwayonly bysign ofhe doesdiffersbattery&lasinglesthreatsintegertake onrefusedcalled =US&See thenativesby thissystem.head of:hover,lesbiansurnameand allcommon/header__paramsHarvard/pixel.removalso longrole ofjointlyskyscraUnicodebr /> +AtlantanucleusCounty,purely count">easily build aonclicka givenpointerh"events else { +ditionsnow the, with man whoorg/Webone andcavalryHe diedseattle00,000 {windowhave toif(windand itssolely m"renewedDetroitamongsteither them inSenatorUs</a><King ofFrancis-produche usedart andhim andused byscoringat hometo haverelatesibilityfactionBuffalolink"><what hefree toCity ofcome insectorscountedone daynervoussquare };if(goin whatimg" alis onlysearch/tuesdaylooselySolomonsexual - <a hrmedium"DO NOT France,with a war andsecond take a > + + +market.highwaydone inctivity"last">obligedrise to"undefimade to Early praisedin its for hisathleteJupiterYahoo! termed so manyreally s. The a woman?value=direct right" bicycleacing="day andstatingRather,higher Office are nowtimes, when a pay foron this-link">;borderaround annual the Newput the.com" takin toa brief(in thegroups.; widthenzymessimple in late{returntherapya pointbanninginks"> +();" rea place\u003Caabout atr> + ccount gives a<SCRIPTRailwaythemes/toolboxById("xhumans,watchesin some if (wicoming formats Under but hashanded made bythan infear ofdenoted/iframeleft involtagein eacha"base ofIn manyundergoregimesaction </p> +<ustomVa;></importsor thatmostly &re size="</a></ha classpassiveHost = WhetherfertileVarious=[];(fucameras/></td>acts asIn some> + +<!organis <br />Beijingcatalàdeutscheuropeueuskaragaeilgesvenskaespañamensajeusuariotrabajoméxicopáginasiempresistemaoctubreduranteañadirempresamomentonuestroprimeratravésgraciasnuestraprocesoestadoscalidadpersonanúmeroacuerdomúsicamiembroofertasalgunospaísesejemploderechoademásprivadoagregarenlacesposiblehotelessevillaprimeroúltimoeventosarchivoculturamujeresentradaanuncioembargomercadograndesestudiomejoresfebrerodiseñoturismocódigoportadaespaciofamiliaantoniopermiteguardaralgunaspreciosalguiensentidovisitastítuloconocersegundoconsejofranciaminutossegundatenemosefectosmálagasesiónrevistagranadacompraringresogarcíaacciónecuadorquienesinclusodeberámateriahombresmuestrapodríamañanaúltimaestamosoficialtambienningúnsaludospodemosmejorarpositionbusinesshomepagesecuritylanguagestandardcampaignfeaturescategoryexternalchildrenreservedresearchexchangefavoritetemplatemilitaryindustryservicesmaterialproductsz-index:commentssoftwarecompletecalendarplatformarticlesrequiredmovementquestionbuildingpoliticspossiblereligionphysicalfeedbackregisterpicturesdisabledprotocolaudiencesettingsactivityelementslearninganythingabstractprogressoverviewmagazineeconomictrainingpressurevarious <strong>propertyshoppingtogetheradvancedbehaviordownloadfeaturedfootballselectedLanguagedistanceremembertrackingpasswordmodifiedstudentsdirectlyfightingnortherndatabasefestivalbreakinglocationinternetdropdownpracticeevidencefunctionmarriageresponseproblemsnegativeprogramsanalysisreleasedbanner">purchasepoliciesregionalcreativeargumentbookmarkreferrerchemicaldivisioncallbackseparateprojectsconflicthardwareinterestdeliverymountainobtained= false;for(var acceptedcapacitycomputeridentityaircraftemployedproposeddomesticincludesprovidedhospitalverticalcollapseapproachpartnerslogo"><adaughterauthor" culturalfamilies/images/assemblypowerfulteachingfinisheddistrictcriticalcgi-bin/purposesrequireselectionbecomingprovidesacademicexerciseactuallymedicineconstantaccidentMagazinedocumentstartingbottom">observed: "extendedpreviousSoftwarecustomerdecisionstrengthdetailedslightlyplanningtextareacurrencyeveryonestraighttransferpositiveproducedheritageshippingabsolutereceivedrelevantbutton" violenceanywherebenefitslaunchedrecentlyalliancefollowedmultiplebulletinincludedoccurredinternal$(this).republic><tr><tdcongressrecordedultimatesolution<ul id="discoverHome</a>websitesnetworksalthoughentirelymemorialmessagescontinueactive">somewhatvictoriaWestern title="LocationcontractvisitorsDownloadwithout right"> +measureswidth = variableinvolvedvirginianormallyhappenedaccountsstandingnationalRegisterpreparedcontrolsaccuratebirthdaystrategyofficialgraphicscriminalpossiblyconsumerPersonalspeakingvalidateachieved.jpg" />machines</h2> + keywordsfriendlybrotherscombinedoriginalcomposedexpectedadequatepakistanfollow" valuable</label>relativebringingincreasegovernorplugins/List of Header">" name=" ("graduate</head> +commercemalaysiadirectormaintain;height:schedulechangingback to catholicpatternscolor: #greatestsuppliesreliable</ul> + <select citizensclothingwatching<li id="specificcarryingsentence<center>contrastthinkingcatch(e)southernMichael merchantcarouselpadding:interior.split("lizationOctober ){returnimproved--> + +coveragechairman.png" />subjectsRichard whateverprobablyrecoverybaseballjudgmentconnect..css" /> websitereporteddefault"/></a> +electricscotlandcreationquantity. ISBN 0did not instance-search-" lang="speakersComputercontainsarchivesministerreactiondiscountItalianocriteriastrongly: 'http:'script'coveringofferingappearedBritish identifyFacebooknumerousvehiclesconcernsAmericanhandlingdiv id="William provider_contentaccuracysection andersonflexibleCategorylawrence<script>layout="approved maximumheader"></table>Serviceshamiltoncurrent canadianchannels/themes//articleoptionalportugalvalue=""intervalwirelessentitledagenciesSearch" measuredthousandspending…new Date" size="pageNamemiddle" " /></a>hidden">sequencepersonaloverflowopinionsillinoislinks"> + <title>versionssaturdayterminalitempropengineersectionsdesignerproposal="false"Españolreleasessubmit" er"additionsymptomsorientedresourceright"><pleasurestationshistory.leaving border=contentscenter">. + +Some directedsuitablebulgaria.show();designedGeneral conceptsExampleswilliamsOriginal"><span>search">operatorrequestsa "allowingDocumentrevision. + +The yourselfContact michiganEnglish columbiapriorityprintingdrinkingfacilityreturnedContent officersRussian generate-8859-1"indicatefamiliar qualitymargin:0 contentviewportcontacts-title">portable.length eligibleinvolvesatlanticonload="default.suppliedpaymentsglossary + +After guidance</td><tdencodingmiddle">came to displaysscottishjonathanmajoritywidgets.clinicalthailandteachers<head> + affectedsupportspointer;toString</small>oklahomawill be investor0" alt="holidaysResourcelicensed (which . After considervisitingexplorerprimary search" android"quickly meetingsestimate;return ;color:# height=approval, " checked.min.js"magnetic></a></hforecast. While thursdaydvertiseéhasClassevaluateorderingexistingpatients Online coloradoOptions"campbell<!-- end</span><<br /> +_popups|sciences," quality Windows assignedheight: <b classle" value=" Companyexamples<iframe believespresentsmarshallpart of properly). + +The taxonomymuch of </span> +" data-srtuguêsscrollTo project<head> +attorneyemphasissponsorsfancyboxworld's wildlifechecked=sessionsprogrammpx;font- Projectjournalsbelievedvacationthompsonlightingand the special border=0checking</tbody><button Completeclearfix +<head> +article <sectionfindingsrole in popular Octoberwebsite exposureused to changesoperatedclickingenteringcommandsinformed numbers </div>creatingonSubmitmarylandcollegesanalyticlistingscontact.loggedInadvisorysiblingscontent"s")s. This packagescheckboxsuggestspregnanttomorrowspacing=icon.pngjapanesecodebasebutton">gamblingsuch as , while </span> missourisportingtop:1px .</span>tensionswidth="2lazyloadnovemberused in height="cript"> + </<tr><td height:2/productcountry include footer" <!-- title"></jquery.</form> +(简体)(繁體)hrvatskiitalianoromânătürkçeاردوtambiénnoticiasmensajespersonasderechosnacionalserviciocontactousuariosprogramagobiernoempresasanunciosvalenciacolombiadespuésdeportesproyectoproductopúbliconosotroshistoriapresentemillonesmediantepreguntaanteriorrecursosproblemasantiagonuestrosopiniónimprimirmientrasaméricavendedorsociedadrespectorealizarregistropalabrasinterésentoncesespecialmiembrosrealidadcórdobazaragozapáginassocialesbloqueargestiónalquilersistemascienciascompletoversióncompletaestudiospúblicaobjetivoalicantebuscadorcantidadentradasaccionesarchivossuperiormayoríaalemaniafunciónúltimoshaciendoaquellosediciónfernandoambientefacebooknuestrasclientesprocesosbastantepresentareportarcongresopublicarcomerciocontratojóvenesdistritotécnicaconjuntoenergíatrabajarasturiasrecienteutilizarboletínsalvadorcorrectatrabajosprimerosnegocioslibertaddetallespantallapróximoalmeríaanimalesquiénescorazónsecciónbuscandoopcionesexteriorconceptotodavíagaleríaescribirmedicinalicenciaconsultaaspectoscríticadólaresjusticiadeberánperíodonecesitamantenerpequeñorecibidatribunaltenerifecancióncanariasdescargadiversosmallorcarequieretécnicodeberíaviviendafinanzasadelantefuncionaconsejosdifícilciudadesantiguasavanzadatérminounidadessánchezcampañasoftonicrevistascontienesectoresmomentosfacultadcréditodiversassupuestofactoressegundospequeñaгодаеслиестьбылобытьэтомЕслитогоменявсехэтойдажебылигодуденьэтотбыласебяодинсебенадосайтфотонегосвоисвойигрытожевсемсвоюлишьэтихпокаднейдомамиралиботемухотядвухсетилюдиделомиретебясвоевидечегоэтимсчеттемыценысталведьтемеводытебевышенамитипатомуправлицаоднагодызнаюмогудругвсейидеткиноодноделаделесрокиюнявесьЕстьразанашиاللهالتيجميعخاصةالذيعليهجديدالآنالردتحكمصفحةكانتاللييكونشبكةفيهابناتحواءأكثرخلالالحبدليلدروساضغطتكونهناكساحةناديالطبعليكشكرايمكنمنهاشركةرئيسنشيطماذاالفنشبابتعبررحمةكافةيقولمركزكلمةأحمدقلبييعنيصورةطريقشاركجوالأخرىمعناابحثعروضبشكلمسجلبنانخالدكتابكليةبدونأيضايوجدفريقكتبتأفضلمطبخاكثرباركافضلاحلىنفسهأيامردودأنهاديناالانمعرضتعلمداخلممكن���������������������� +  + ������������������������������������������������resourcescountriesquestionsequipmentcommunityavailablehighlightDTD/xhtmlmarketingknowledgesomethingcontainerdirectionsubscribeadvertisecharacter" value="</select>Australia" class="situationauthorityfollowingprimarilyoperationchallengedevelopedanonymousfunction functionscompaniesstructureagreement" title="potentialeducationargumentssecondarycopyrightlanguagesexclusivecondition</form> +statementattentionBiography} else { +solutionswhen the Analyticstemplatesdangeroussatellitedocumentspublisherimportantprototypeinfluence»</effectivegenerallytransformbeautifultransportorganizedpublishedprominentuntil thethumbnailNational .focus();over the migrationannouncedfooter"> +exceptionless thanexpensiveformationframeworkterritoryndicationcurrentlyclassNamecriticismtraditionelsewhereAlexanderappointedmaterialsbroadcastmentionedaffiliate</option>treatmentdifferent/default.Presidentonclick="biographyotherwisepermanentFrançaisHollywoodexpansionstandards</style> +reductionDecember preferredCambridgeopponentsBusiness confusion> +<title>presentedexplaineddoes not worldwideinterfacepositionsnewspaper</table> +mountainslike the essentialfinancialselectionaction="/abandonedEducationparseInt(stabilityunable to +relationsNote thatefficientperformedtwo yearsSince thethereforewrapper">alternateincreasedBattle ofperceivedtrying tonecessaryportrayedelectionsElizabethdiscoveryinsurances.length;legendaryGeographycandidatecorporatesometimesservices.inheritedCommunityreligiouslocationsCommitteebuildingsthe worldno longerbeginningreferencecannot befrequencytypicallyinto the relative;recordingpresidentinitiallytechniquethe otherit can beexistenceunderlinethis timetelephoneitemscopepracticesadvantage);return For otherprovidingdemocracyboth the extensivesufferingsupportedcomputers functionpracticalsaid thatit may beEnglish
+suspectedmargin: 0spiritual + +microsoftgraduallydiscussedhe becameexecutivejquery.jshouseholdconfirmedpurchasedliterallydestroyedup to thevariationremainingit is notcenturiesJapanese among thecompletedalgorithminterestsrebellionundefinedencourageresizableinvolvingsensitiveuniversalprovision(althoughfeaturingconducted), which continued-header">February numerous overflow:componentfragmentsexcellentcolspan="technicalnear the Advanced source ofexpressedHong Kong Facebookmultiple mechanismelevationoffensive + sponsoreddocument.or "there arethose whomovementsprocessesdifficultsubmittedrecommendconvincedpromoting" width=".replace(classicalcoalitionhis firstdecisionsassistantindicatedevolution-wrapper"enough toalong thedelivered--> + + +
Archbishop class="nobeing usedapproachesprivilegesnoscript> +results inmay be theEaster eggmechanismsreasonablePopulationCollectionselected">noscript> /index.phparrival of-jssdk'));managed toincompletecasualtiescompletionChristiansSeptember arithmeticproceduresmight haveProductionit appearsPhilosophyfriendshipleading togiving thetoward theguaranteeddocumentedcolor:#000video gamecommissionreflectingchange theassociatedsans-serifonkeypress; padding:He was theunderlyingtypically , and the srcElementsuccessivesince the should be networkingaccountinguse of thelower thanshows that + complaintscontinuousquantitiesastronomerhe did notdue to itsapplied toan averageefforts tothe futureattempt toTherefore,capabilityRepublicanwas formedElectronickilometerschallengespublishingthe formerindigenousdirectionssubsidiaryconspiracydetails ofand in theaffordablesubstancesreason forconventionitemtype="absolutelysupposedlyremained aattractivetravellingseparatelyfocuses onelementaryapplicablefound thatstylesheetmanuscriptstands for no-repeat(sometimesCommercialin Americaundertakenquarter ofan examplepersonallyindex.php? +percentagebest-knowncreating a" dir="ltrLieutenant +
is said tostructuralreferendummost oftena separate-> +
implementedcan be seenthere was ademonstratecontainer">connectionsthe Britishwas written!important;px; margin-followed byability to complicatedduring the immigrationalso called

as follows:merged withthrough thecommercial pointed outopportunityview of therequirementdivision ofprogramminghe receivedsetInterval">maintainingChristopherMuch of thewritings of" height="2size of theversion of mixture of between theExamples ofeducationalcompetitive onsubmit="director ofdistinctive/DTD XHTML relating totendency toprovince ofwhich woulddespite thescientific legislature.innerHTML allegationsAgriculturewas used inapproach tointelligentyears later,sans-serifdeterminingPerformanceappearances, which is foundationsabbreviatedhigher thans from the individual composed ofsupposed toclaims thatattributionfont-size:1elements ofHistorical his brotherat the timeanniversarygoverned byrelated to ultimately innovationsit is stillcan only bedefinitionstoGMTStringA number ofimg class="Eventually,was changedoccurred inneighboringdistinguishwhen he wasintroducingterrestrialMany of theargues thatan Americanconquest ofwidespread were killedscreen and In order toexpected todescendantsare locatedlegislativegenerations backgroundmost peopleyears afterthere is nothe highestfrequently they do notargued thatshowed thatpredominanttheologicalby the timeconsideringshort-livedcan be usedvery littleone of the had alreadyinterpretedcommunicatefeatures ofgovernment,entered the" height="3Independentpopulationslarge-scale. Although used in thedestructionpossibilitystarting intwo or moreexpressionssubordinatelarger thanhistory and +Continentaleliminatingwill not bepractice ofin front ofsite of theensure thatto create amississippipotentiallyoutstandingbetter thanwhat is nowsituated inmeta name="TraditionalsuggestionsTranslationthe form ofatmosphericideologicalenterprisescalculatingeast of theremnants ofpluginspage/index.php?remained intransformedHe was alsowas alreadystatisticalin favor ofMinistry ofmovement offormulationis required +question ofwas electedto become abecause of some peopleinspired bysuccessful a time whenmore commonamongst thean officialwidth:100%;technology,was adoptedto keep thesettlementslive birthsindex.html"Connecticutassigned to&times;account foralign=rightthe companyalways beenreturned toinvolvementBecause thethis period" name="q" confined toa result ofvalue="" />is actuallyEnvironment + +Conversely,> +
this is notthe presentif they areand finallya matter of +
+ +faster thanmajority ofafter whichcomparativeto maintainimprove theawarded theer" class="frameborderrestorationin the sameanalysis oftheir firstDuring the continentalsequence offunction(){font-size: work on the +adopted theproperty ofdirected byeffectivelywas broughtchildren ofProgramminglonger thanmanuscriptswar againstby means ofand most ofsimilar to proprietaryoriginatingprestigiousgrammaticalexperience.to make theIt was alsois found incompetitorsin the U.S.replace thebrought thecalculationfall of thethe generalpracticallyin honor ofreleased inresidentialand some ofking of thereaction to1st Earl ofculture andprincipally + they can beback to thesome of hisexposure toare similarform of theaddFavoritecitizenshippart in thepeople within practiceto continue&minus;approved by the first allowed theand for thefunctioningplaying thesolution toheight="0" in his bookmore than afollows thecreated thepresence in nationalistthe idea ofa characterwere forced class="btndays of thefeatured inshowing theinterest inin place ofturn of thethe head ofLord of thepoliticallyhas its ownEducationalapproval ofsome of theeach other,behavior ofand becauseand anotherappeared onrecorded inblack"may includethe world'scan lead torefers to aborder="0" government winning theresulted in while the Washington,the subjectcity in the>

+ reflect theto completebecame moreradioactiverejected bywithout anyhis father,which couldcopy of theto indicatea politicalaccounts ofconstitutesworked witherof his lifeaccompaniedclientWidthprevent theLegislativedifferentlytogether inhas severalfor anothertext of thefounded thee with the is used forchanged theusually theplace wherewhereas the> The currentthe site ofsubstantialexperience,in the Westthey shouldslovenčinacomentariosuniversidadcondicionesactividadesexperienciatecnologíaproducciónpuntuaciónaplicacióncontraseñacategoríasregistrarseprofesionaltratamientoregístratesecretaríaprincipalesprotecciónimportantesimportanciaposibilidadinteresantecrecimientonecesidadessuscribirseasociacióndisponiblesevaluaciónestudiantesresponsableresoluciónguadalajararegistradosoportunidadcomercialesfotografíaautoridadesingenieríatelevisióncompetenciaoperacionesestablecidosimplementeactualmentenavegaciónconformidadline-height:font-family:" : "http://applicationslink" href="specifically// +/index.html"window.open( !important;application/independence//www.googleorganizationautocompleterequirementsconservative
most notably/>
notification'undefined')Furthermore,believe thatinnerHTML = prior to thedramaticallyreferring tonegotiationsheadquartersSouth AfricaunsuccessfulPennsylvaniaAs a result, +
English (US)appendChild(transmissions. However, intelligence" tabindex="float:right;Commonwealthranging fromin which theat least onereproductionencyclopedia;font-size:1jurisdictionat that time">compensationchampionshipmedia="all" violation ofreference toreturn true;Strict//EN" transactionsinterventionverificationInformation difficultiesChampionshipcapabilities} + +Christianityfor example,Professionalrestrictionssuggest thatwas released(such as theremoveClass(unemploymentthe Americanstructure of/index.html published inspan class=""> + +f (document.border: 1px {font-size:1treatment of0" height="1modificationIndependencedivided intogreater thanachievementsestablishingJavaScript" neverthelesssignificanceBroadcasting> container"> +such as the influence ofa particularsrc='http://navigation" half of the substantial  advantage ofdiscovery offundamental metropolitanthe opposite" xml:lang="deliberatelyalign=centerevolution ofpreservationimprovementsbeginning inJesus ChristPublicationsdisagreementtext-align:r, function()similaritiesbody>is currentlyalphabeticalis sometimestype="image/many of the flow:hidden;available indescribe theexistence ofall over thethe Internet